In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

base=pd.read_csv('BSinfo.csv')
cell=pd.read_csv('CLdata.csv')
energy=pd.read_csv('ECdata.csv')
submit=pd.read_csv("PCprediction.csv")

base['BS'] = base['BS'].str.replace('B_', '')
base['CellName'] = base['CellName'].str.replace('Cell', '')
base['RUType'] = base['RUType'].str.replace('Type', '')
base['Mode'] = base['Mode'].str.replace('Mode', '')

cell['BS'] = cell['BS'].str.replace('B_', '')
cell['CellName'] = cell['CellName'].str.replace('Cell', '')
cell['Time'] = pd.to_datetime(cell['Time'])

energy['BS'] = energy['BS'].str.replace('B_', '')
energy['Time'] = pd.to_datetime(energy['Time'])

merged_df = pd.merge(energy, cell, on=['Time', 'BS'], how='left')
final = pd.merge(merged_df, base,  on=['BS', 'CellName'], how='left')
final['Time']=final['Time'].values.astype(float).reshape(-1, 1)  # Convert datetime to float and reshape

x=final
y=x.pop(item='Energy')

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the scaler on the training data and transform both training and testing data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Now X_train_scaled and X_test_scaled are scaled versions of the training and testing data


# Create a Linear Regression model
model = LinearRegression()

# Fit the model on the scaled training data
model.fit(X_train_scaled, y_train)

# Predict the target variable for the scaled testing data
y_pred = model.predict(X_test_scaled)

# Calculate the Weighted Mean Absolute Percentage Error (WMAPE)
def wmape(y_true, y_pred):
    weights = np.abs(y_true)
    wmape = np.sum(np.abs(y_true - y_pred) / weights) * 100.0 / np.sum(weights)
    return wmape

wmape_score = wmape(y_test, y_pred)
print("WMAPE:", wmape_score)

# Calculate the R-squared (R2) score
r2 = r2_score(y_test, y_pred)
print("R-squared:", r2)

# Calculate the Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error:", mae)

WMAPE: 0.6861249619111771
R-squared: 0.7623472360057131
Mean Squared Error: 48.83985266814597
Mean Absolute Error: 5.010667001298484


In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

base=pd.read_csv('BSinfo.csv')
cell=pd.read_csv('CLdata.csv')
energy=pd.read_csv('ECdata.csv')
submit=pd.read_csv("PCprediction.csv")

base['BS'] = base['BS'].str.replace('B_', '')
base['CellName'] = base['CellName'].str.replace('Cell', '')
base['RUType'] = base['RUType'].str.replace('Type', '')
base['Mode'] = base['Mode'].str.replace('Mode', '')

cell['BS'] = cell['BS'].str.replace('B_', '')
cell['CellName'] = cell['CellName'].str.replace('Cell', '')
cell['Time'] = pd.to_datetime(cell['Time'])

energy['BS'] = energy['BS'].str.replace('B_', '')
energy['Time'] = pd.to_datetime(energy['Time'])

merged_df = pd.merge(energy, cell, on=['Time', 'BS'], how='left')
final = pd.merge(merged_df, base,  on=['BS', 'CellName'], how='left')
final['Time']=final['Time'].values.astype(float).reshape(-1, 1)  # Convert datetime to float and reshape

x=final
y=x.pop(item='Energy')

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the scaler on the training data and transform both training and testing data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Now X_train_scaled and X_test_scaled are scaled versions of the training and testing data


# Create a Decision Tree Regressor model
model = DecisionTreeRegressor(random_state=42)

# Fit the model on the scaled training data
model.fit(X_train_scaled, y_train)

# Predict the target variable for the scaled testing data
y_pred = model.predict(X_test_scaled)

# Calculate the Weighted Mean Absolute Percentage Error (WMAPE)
def wmape(y_true, y_pred):
    weights = np.abs(y_true)
    wmape = np.sum(np.abs(y_true - y_pred) / weights) * 100.0 / np.sum(weights)
    return wmape

wmape_score = wmape(y_test, y_pred)
print("WMAPE:", wmape_score)

# Calculate the R-squared (R2) score
r2 = r2_score(y_test, y_pred)
print("R-squared:", r2)

# Calculate the Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error:", mae)

WMAPE: 0.2781228871798739
R-squared: 0.9329750836864842
Mean Squared Error: 13.774243492180128
Mean Absolute Error: 2.2994606809181324


In [None]:
import pandas as pd
import numpy as np
from scipy import stats

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor

base = pd.read_csv('BSinfo.csv')
cell = pd.read_csv('CLdata.csv')
energy = pd.read_csv('ECdata.csv')
submit = pd.read_csv("PCprediction.csv")

base['BS'] = base['BS'].str.replace('B_', '')
base['CellName'] = base['CellName'].str.replace('Cell', '')
base['RUType'] = base['RUType'].str.replace('Type', '')
base['Mode'] = base['Mode'].str.replace('Mode', '')

cell['BS'] = cell['BS'].str.replace('B_', '')
cell['CellName'] = cell['CellName'].str.replace('Cell', '')
cell['Time'] = pd.to_datetime(cell['Time'])

energy['BS'] = energy['BS'].str.replace('B_', '')
energy['Time'] = pd.to_datetime(energy['Time'])

merged_df = pd.merge(energy, cell, on=['Time', 'BS'], how='left')
final = pd.merge(merged_df, base, on=['BS', 'CellName'], how='left')
final['Time'] = final['Time'].values.astype(float).reshape(-1, 1)  # Convert datetime to float and reshape
print(final.dtypes)
# Convert specified columns from object to int
int_columns = ['BS', 'CellName', 'RUType', 'Mode']
final[int_columns] = final[int_columns].astype(int)


submit['Time'] = pd.to_datetime(submit['Time'])
print(submit.shape)
submit.drop(['w'], axis=1, inplace=True)
# Create the 'ID' column by joining 'Time' and 'BS' columns
submit['ID'] = submit['Time'].astype(str) + '_' + submit['BS']
submit['BS'] = submit['BS'].str.replace('B_', '')
submit['Time'] = submit['Time'].values.astype(float).reshape(-1, 1)  # Convert datetime to float and reshape
submit['BS'] = submit['BS'].astype(int)
submit = submit[['ID', 'Time', 'BS', 'Energy']]
test = submit[['ID', 'Time', 'BS', 'Energy']]

numeric_cols = ['CellName', 'load', 'ESMode1', 'ESMode2', 'ESMode3',
                'ESMode4', 'ESMode5', 'ESMode6', 'RUType', 'Mode', 'Frequency',
                'Bandwidth', 'Antennas', 'TXpower']
# grouped_df = final.groupby(["BS"])[numeric_cols].median().reset_index()
def calculate_median(group):
    numeric_group = group[numeric_cols]
    return pd.Series(np.median(numeric_group), index=numeric_cols)

grouped_df = final.groupby("BS")[numeric_cols].apply(calculate_median).reset_index()

print(grouped_df.shape)
grouped_df

# Merge 'test' with 'final' (excluding 'Time' column) on the 'BS' column using a left join
merged_df = test.merge(grouped_df, on='BS', how='left')

# Find median values for each column (excluding 'Time') in 'final' DataFrame
median_values = grouped_df.median()

# Fill missing values in merged DataFrame with median values
merged_df.fillna(median_values, inplace=True)
print(merged_df.shape)
merged_df

X_train = final
y_train = X_train.pop(item='Energy')

X_test = merged_df
X_test.pop(item="ID")
y_test = X_test.pop(item='Energy')

# Create a Decision Tree Regressor model
model = DecisionTreeRegressor(random_state=42)

# Fit the model on the training data
model.fit(X_train, y_train)

# Predict the target variable for the testing data
y_pred = model.predict(X_test)

submit['Energy'] = y_pred
submit = submit[['ID', 'Energy']]
submit.to_csv('SampleSubmission__23_.csv', index=False)