In [74]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.pipeline import Pipeline
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

In [75]:
base=pd.read_csv('BSinfo.csv')
cell=pd.read_csv('CLdata.csv')
energy=pd.read_csv('ECdata.csv')
submit=pd.read_csv("PCprediction.csv")

In [76]:
base['BS'] = base['BS'].str.replace('B_', '')
base['CellName'] = base['CellName'].str.replace('Cell', '')
base['RUType'] = base['RUType'].str.replace('Type', '')
base['Mode'] = base['Mode'].str.replace('Mode', '')

In [77]:
cell['BS'] = cell['BS'].str.replace('B_', '')
cell['CellName'] = cell['CellName'].str.replace('Cell', '')
cell['Time'] = pd.to_datetime(cell['Time'])

In [78]:
energy['BS'] = energy['BS'].str.replace('B_', '')
energy['Time'] = pd.to_datetime(energy['Time'])

In [79]:
merged_df = pd.merge(energy, cell, on=['Time', 'BS'], how='left')
final = pd.merge(merged_df, base,  on=['BS', 'CellName'], how='left')
final['Time']=final['Time'].values.astype(float).reshape(-1, 1)  # Convert datetime to float and reshape

In [80]:
x=final
y=x.pop(item='Energy')

In [81]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [82]:
# Create a StandardScaler instance
scaler = StandardScaler()

In [83]:
# Fit the scaler on the training data and transform both training and testing data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [84]:
# Linear Regression
linear_reg_pipe = Pipeline([
    ('model', LinearRegression())
])

In [85]:
# Polynomial Regression
poly_reg_pipe = Pipeline([
    ('poly', PolynomialFeatures(degree=2)),
    ('model', LinearRegression())
])

In [86]:
# Decision Tree Regressor
decision_tree_pipe = Pipeline([
    ('model', DecisionTreeRegressor(random_state=42))
])

In [87]:
# Random Forest Regressor
random_forest_pipe = Pipeline([
    ('model', RandomForestRegressor(random_state=42))
])

In [88]:
# XGBoost Regressor
xgb_pipe = Pipeline([
    ('model', XGBRegressor(random_state=42))
])


In [89]:
def create_deep_learning_model(input_dim):
    model = Sequential()
    model.add(Dense(64, input_dim=input_dim, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1))  # Regression task, so no activation function
    
    optimizer = Adam(learning_rate=0.001)
    model.compile(loss='mean_squared_error', optimizer=optimizer)
    return model

In [90]:
nn_pipe = Pipeline([
    ('model', create_deep_learning_model(X_train_scaled.shape[1]))
])

In [91]:
# List of pipelines
pipelines = [
    ('Linear Regression', linear_reg_pipe),
    ('Polynomial Regression', poly_reg_pipe),
    ('Decision Tree', decision_tree_pipe),
    ('Random Forest', random_forest_pipe),
    ('XGBoost', xgb_pipe),
    ('Neural Network', nn_pipe)
]

In [92]:
# Define the WMAPE function
def wmape(y_true, y_pred):
    weights = np.abs(y_true)
    wmape = np.sum(np.abs(y_true - y_pred) / weights) * 100.0 / np.sum(weights)
    return wmape

In [94]:
# Train and evaluate each model in the pipeline
for model_name, pipe in pipelines:
    pipe.fit(X_train_scaled, y_train)
    y_pred = pipe.predict(X_test_scaled)

    if model_name == 'Neural Network':
        y_pred = y_pred.flatten()
    
    wmape_score = wmape(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    
    print(f"Model: {model_name}")
    print(f"WMAPE: {wmape_score}")
    print(f"R-squared: {r2}")
    print(f"Mean Squared Error: {mse}")
    print(f"Mean Absolute Error: {mae}")

    scores = cross_val_score(pipe, X_train_scaled, y_train, cv=5, scoring='neg_mean_squared_error')
    mse_scores = -scores  # Convert negative MSE back to positive
    print(f"Mean MSE: {mse_scores.mean()}")
    print()

Model: Linear Regression
WMAPE: 0.6861249619111771
R-squared: 0.7623472360057131
Mean Squared Error: 48.83985266814597
Mean Absolute Error: 5.010667001298484
Mean MSE: 48.09380150122859

Model: Polynomial Regression
WMAPE: 0.45167802636942483
R-squared: 0.8965102784998095
Mean Squared Error: 21.26810000349195
Mean Absolute Error: 3.3551680111133253
Mean MSE: 51792048266975.9

Model: Decision Tree
WMAPE: 0.2781228871798739
R-squared: 0.9329750836864842
Mean Squared Error: 13.774243492180128
Mean Absolute Error: 2.2994606809181324
Mean MSE: 14.52273151562468

Model: Random Forest
WMAPE: 0.21345549508203698
R-squared: 0.9635767145985283
Mean Squared Error: 7.485323809406546
Mean Absolute Error: 1.7359519247792952
Mean MSE: 8.03956394677178

Model: XGBoost
WMAPE: 0.21723970619796168
R-squared: 0.9708134271770196
Mean Squared Error: 5.9981120884280745
Mean Absolute Error: 1.682067036189562
Mean MSE: 5.967338338363651

Model: Neural Network
WMAPE: 0.4560367822503056
R-squared: 0.882979491038

In [93]:
# Perform cross-validation for each model in the pipeline
for model_name, pipe in pipelines:
    print(f"Model: {model_name}")
    
    if model_name == 'Neural Network':
        # ANN requires numpy array as input
        scores = cross_val_score(pipe, X_train_scaled, y_train, cv=5, scoring='neg_mean_squared_error')
        mse_scores = -scores  # Convert negative MSE back to positive
        print(f"Mean MSE: {mse_scores.mean()}")
    else:
        scores = cross_val_score(pipe, X_train_scaled, y_train, cv=5, scoring='neg_mean_absolute_error')
        mae_scores = -scores  # Convert negative MAE back to positive
        print(f"Mean MAE: {mae_scores.mean()}")
    
    print()

Model: Linear Regression
Mean MAE: 4.975656780348674

Model: Polynomial Regression
Mean MAE: 35540.090583458004

Model: Decision Tree
Mean MAE: 2.350670310107303

Model: Random Forest
Mean MAE: 1.7896239771705296

Model: XGBoost
Mean MAE: 1.6682504536880167

Model: Neural Network




















Mean MSE: 26.985120318079243



In [95]:
best_model = None
best_score = float('inf')  # Initialize with a high value

for model_name, pipe in pipelines:
    if model_name == 'Neural Network':
        scores = cross_val_score(pipe, X_train_scaled, y_train, cv=5, scoring='neg_mean_squared_error')
        score = -scores.mean()  # Convert negative MSE back to positive
    else:
        scores = cross_val_score(pipe, X_train_scaled, y_train, cv=5, scoring='neg_mean_absolute_error')
        score = -scores.mean()  # Convert negative MAE back to positive
        
    print(f"Model: {model_name}, Mean Score: {score}")
    
    if score < best_score:
        best_score = score
        best_model = model_name

print(f"Best Model: {best_model}, Best Score: {best_score}")


Model: Linear Regression, Mean Score: 4.975656780348674
Model: Polynomial Regression, Mean Score: 35540.090583458004
Model: Decision Tree, Mean Score: 2.350670310107303
Model: Random Forest, Mean Score: 1.7896239771705296
Model: XGBoost, Mean Score: 1.6682504536880167
Model: Neural Network, Mean Score: 21.6491864597907
Best Model: XGBoost, Best Score: 1.6682504536880167


In [96]:
from sklearn.model_selection import cross_val_score
import numpy as np

# Define a function to calculate WMAPE
def wmape(y_true, y_pred):
    weights = np.abs(y_true)
    wmape = np.sum(np.abs(y_true - y_pred) / weights) * 100.0 / np.sum(weights)
    return wmape

# Initialize best model and best score
best_model = None
best_score = float('inf')  # Initialize with a high value

for model_name, pipe in pipelines:
    if model_name == 'Neural Network':
        scores = cross_val_score(pipe, X_train_scaled, y_train, cv=5, scoring='neg_mean_squared_error')
        mse_scores = -scores  # Convert negative MSE back to positive
        wmape_scores = [wmape(y_train, y_pred) for y_pred in np.sqrt(mse_scores)]
        score = np.mean(wmape_scores)
    else:
        scores = cross_val_score(pipe, X_train_scaled, y_train, cv=5, scoring='neg_mean_absolute_error')
        mae_scores = -scores  # Convert negative MAE back to positive
        score = np.mean([wmape(y_train, y_pred) for y_pred in mae_scores])
    
    print(f"Model: {model_name}, Mean WMAPE Score: {score}")
    
    if score < best_score:
        best_score = score
        best_model = model_name

print(f"Best Model: {best_model}, Best Mean WMAPE Score: {best_score}")


Model: Linear Regression, Mean WMAPE Score: 2.697488028823642
Model: Polynomial Regression, Mean WMAPE Score: 5391.986880562631
Model: Decision Tree, Mean WMAPE Score: 3.094961973689344
Model: Random Forest, Mean WMAPE Score: 3.1799287955615307
Model: XGBoost, Mean WMAPE Score: 3.1983100271003897
Model: Neural Network, Mean WMAPE Score: 2.741592748773994
Best Model: Linear Regression, Best Mean WMAPE Score: 2.697488028823642
