In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import math
import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import TensorDataset, DataLoader
from torch.nn import TransformerEncoder, TransformerEncoderLayer
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR
from matplotlib import pyplot as plt
import random
device = torch.device("cuda")

## Data pre-processing ##

In [None]:
# Load raw data
base_info = pd.read_csv('EnergyContest/BSinfo.csv')
cell_data = pd.read_csv('EnergyContest/CLdata.csv', parse_dates=['Time'])
energy_data = pd.read_csv('EnergyContest/ECdata.csv', parse_dates=['Time'])
power_consumption_prediction = pd.read_csv('EnergyContest/power_consumption_prediction.csv', parse_dates=['Time'])

In [None]:
# Combine CLdata and BSinfo
Total = pd.merge(base_info,cell_data, on=['BS', 'CellName'], how='inner')
Total_dummies = pd.get_dummies(Total, columns=['Mode','RUType','Antennas','Bandwidth','Frequency']) # one-hot encoding
Total_pivot = pd.pivot_table(Total_dummies, values=[col for col in Total_dummies.columns if (col != 'BS') and (col != 'Time') ], index=['Time', 'BS'], columns='CellName', fill_value=0)
Total_pivot.reset_index(inplace=True)
Total_pivot.columns = [f'{col[0]}_{col[1]}' if col[1] else col[0] for col in Total_pivot.columns]

# Add feature : mean of load, sum of load, mean of ESMode, sum of ESMode
load_columns = [col for col in Total_pivot.columns if col.startswith('load')]
Total_pivot['load_avg'] = Total_pivot[load_columns].mean(axis=1)
Total_pivot['load_sum'] = Total_pivot[load_columns].sum(axis=1)
ESMode_columns = [col for col in Total_pivot.columns if col.startswith('ESMode')]
Total_pivot['ESMode_avg'] = Total_pivot[ESMode_columns].mean(axis=1)
Total_pivot['ESMode_sum'] = Total_pivot[ESMode_columns].sum(axis=1)

# Combine features and labels
train_data = pd.merge(energy_data,Total_pivot, on=['Time', 'BS'], how='left')
test_data = pd.merge(power_consumption_prediction, Total_pivot, on=['Time', 'BS'], how='left')
combined_data = pd.concat([train_data, test_data], ignore_index=False)

# perform one-hot encoding on time and BS
combined_data['Time'] = pd.to_datetime(combined_data['Time']).dt.dayofyear*24+pd.to_datetime(combined_data['Time']).dt.hour 
combined_data = pd.get_dummies(combined_data, columns=['Time','BS'])

# improve generalization
bs_columns = [col for col in combined_data.columns if col.startswith('BS')]
for col in bs_columns:
    combined_data[col] = combined_data[col] / 100 # Scale the selected columns by 100

# Obtain training set and test set
train_data = combined_data.iloc[:len(train_data)]
test_data = combined_data.iloc[len(train_data):]

In [None]:
# Delete features with a standard deviation of 0
def preprocess_data(df, columns_with_std_zero):
    X = df.drop(columns=['Energy','w'] + columns_with_std_zero)
    y = df['Energy']
    return X, y

columns_with_std_zero1 = train_data.columns[train_data.std() == 0].tolist()
columns_with_std_zero2 = train_data.columns[test_data.std() == 0].tolist()
columns_with_std_zero = list(set(columns_with_std_zero1 + columns_with_std_zero2))

# divide training set and test set into features and labels
X,y = preprocess_data(train_data,columns_with_std_zero)
X_test,y_test = preprocess_data(test_data,columns_with_std_zero)
print(X.columns)
print(train_data.columns)
print(y)

In [None]:
# Define loss function
def MAPELoss(output, target):
    return torch.mean(torch.abs((target - output) / target))

def calculate_mape(y_true, y_pred):
    y_true = y_true.cpu().detach().numpy()
    y_pred = y_pred.cpu().detach().numpy()
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Define Model
class SELayer(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SELayer, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel),
            nn.Sigmoid()
        )

    def forward(self, x):
        y = self.fc(x)
        return x * y

# class MyModel(nn.Module):
#     def __init__(self, input_dim, num_layers=7):
#         super(MyModel, self).__init__()
        
#         self.layers = nn.ModuleList()
        
#         self.layers.append(nn.Linear(input_dim, 1024))
#         self.layers.append(SELayer(1024))
        
#         for i in range(num_layers - 1):
#             input_dim = 1024 // (2 ** i)
#             output_dim = 1024 // (2 ** (i + 1))
#             self.layers.append(nn.Linear(input_dim, output_dim))
#             self.layers.append(SELayer(output_dim))
        
#         input_dim = 1024 // (2 ** (num_layers - 1))
#         self.layers.append(nn.Linear(input_dim, 1))
        
#     def forward(self, x):
#         for layer in self.layers:
#             if isinstance(layer, nn.Linear):
#                 x = F.leaky_relu(layer(x))
#             elif isinstance(layer, SELayer):
#                 x = layer(x)
#         return x

class MyModel(nn.Module):
    def __init__(self, input_dim, num_layers=7):
        super(MyModel, self).__init__()

        self.layers = nn.ModuleList()

        self.layers.append(nn.Linear(input_dim, 1024))
        self.layers.append(nn.BatchNorm1d(1024))
        self.layers.append(nn.ReLU())
        self.layers.append(nn.Dropout(0.5))
        self.layers.append(SELayer(1024))

        for i in range(num_layers - 1):
            input_dim = 1024 // (2 ** i)
            output_dim = 1024 // (2 ** (i + 1))
            self.layers.append(nn.Linear(input_dim, output_dim))
            self.layers.append(nn.BatchNorm1d(output_dim))
            self.layers.append(nn.ReLU())
            self.layers.append(nn.Dropout(0.5))
            self.layers.append(SELayer(output_dim))

        input_dim = 1024 // (2 ** (num_layers - 1))
        self.layers.append(nn.Linear(input_dim, 1))
        # self.layers.append(SELayer(1))

    def forward(self, x):
        for layer in self.layers:
            # print(x.shape)
            if isinstance(layer, nn.Linear):
                x = F.leaky_relu(layer(x))
            elif isinstance(layer, SELayer):
                x = layer(x)

        return x

def weight_init(m):
    if isinstance(m, torch.nn.Conv1d):
        torch.nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')
        if m.bias is not None:
            torch.nn.init.zeros_(m.bias)
    elif isinstance(m, torch.nn.Linear):
        torch.nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')
        if m.bias is not None:
            torch.nn.init.zeros_(m.bias)

## Train ##

In [None]:
seed = 42
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

continue_train = 0
X_tensor = torch.Tensor(X.values).to(device)
y_tensor = torch.Tensor(y.values).unsqueeze(1).to(device)
X_data = TensorDataset(X_tensor, y_tensor)
X_loader = DataLoader(dataset=X_data, batch_size=2**11, shuffle=True)

if continue_train == 1:
    model = torch.load('./model_best_w5.pth')

else:
    model = MyModel(X.shape[1]).to(device)
    # model.apply(weight_init)

# Define loss function and optimizer
criterion = MAPELoss
optimizer = optim.AdamW(model.parameters(), lr=0.0001, weight_decay=0.001)
scheduler = StepLR(optimizer, step_size=35000, gamma=0.1)  

# Train the model
num_epochs = 35000

best_mape = 100
best_epoch = 0

try:
    for epoch in range(num_epochs):
        running_loss = 0.0
        running_mape = 0.0
        model.train()  # Set the model to training mode
        for i, data in enumerate(X_loader, 0):
            inputs, labels = data
            optimizer.zero_grad()
            
            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # calculate running loss and MAPE
            running_loss += loss.item()*100
            # running_mape += calculate_mape(labels, outputs).item()

        current_lr = scheduler.get_lr()
        print('Epoch %d, loss: %.3f, learning rate: %.6f' % (epoch + 1, running_loss / len(X_loader), current_lr[0]))
        scheduler.step()

        if best_mape > (running_loss / len(X_loader)):
            torch.save(model, "./model_best_w5.pth")
            best_mape = running_loss / len(X_loader)
            best_epoch = epoch + 1
        if running_loss / len(X_loader) <= 1.2:
            break
    torch.save(model, "./model_last_w5.pth")
    print('Finished Training')
    print('best_epoch %d, best_mape %.3f' % (best_epoch, best_mape))
except KeyboardInterrupt:
    torch.save(model, "./model_last_w5.pth")
    print(f"Model saved to {'./model_last_w5.pth'}")

## Evaluate ##

In [None]:
X_test_tensor = torch.Tensor(X_test.values).to(device)
y_test_tensor = torch.Tensor(y_test.values).unsqueeze(1).to(device)
test_data = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(dataset=test_data, batch_size=1, shuffle=False)

evaluate_flag = 1
if evaluate_flag == 1:
    model = torch.load("BestModel/best_w5.pth")
else:
    model = torch.load("./model_best_w5.pth")
    
model.eval()  # Set the model to evaluation mode
predictions = []
with torch.no_grad():  # We don't need gradients for prediction
    for i, data in enumerate(test_loader, 0):
        inputs, _ = data  # Ignore the weights
        outputs = model(inputs)
        predictions.extend(outputs.cpu().numpy())

predictions_numpy = np.array(predictions)
predictions_flatten = predictions_numpy.flatten()
predictions_series = pd.Series(predictions_flatten, name="PredictedEnergy")

test_data_new = pd.read_csv('EnergyContest/power_consumption_prediction.csv')
test_data_new['Energy'] = predictions_series
test_data_new['ID'] = test_data_new['Time'].astype(str) + "_" + test_data_new['BS'].astype(str)
test_data_new = test_data_new[['ID', 'Energy']]
test_data_new.to_csv('./power_consumption_prediction_w5.csv', index=False)