In [1]:
import pandas as pd
import pickle
from prophet import Prophet
import os

# Load and preprocess your data
base = pd.read_csv('../../Dataset/BSinfo.csv')
cell = pd.read_csv('../../Dataset/CLdata.csv')
energy = pd.read_csv('../../Dataset/ECdata.csv')

CLdata_grouped = cell.groupby(['BS', 'Time']).sum().reset_index()
base_cell_energy = pd.merge(CLdata_grouped, energy,
                            on=('Time', 'BS'), how='inner')
base_cell_energy['BS'] = base_cell_energy['BS'].str.replace('B_', '')
base_cell_energy['Time'] = pd.to_datetime(base_cell_energy['Time'])

# Get unique base station values from the dataset
unique_base_stations = base_cell_energy['BS'].unique()

# Create a directory to save models
save_dir = 'saved_models_load_ES_sum'
os.makedirs(save_dir, exist_ok=True)


data = base_cell_energy.copy()
non_numeric_columns = ['CellName']
# Resample the data and fill missing values
data.set_index('Time', inplace=True)
data_resampled = data.drop(columns=non_numeric_columns).resample('H').mean()
data_resampled['Energy'].fillna(method='ffill', inplace=True)

# Rename columns for Prophet
data_resampled.rename(columns={'Energy': 'y'}, inplace=True)
data_resampled.reset_index(inplace=True)
data_resampled.rename(columns={'Time': 'ds'}, inplace=True)

# Create and fit the general Prophet model
general_model = Prophet()
general_model.fit(data_resampled)

# Save the general model to a file in the 'saved_models' folder
general_model_filename = os.path.join(save_dir, 'general_model.pkl')
with open(general_model_filename, 'wb') as f:
    pickle.dump(general_model, f)


# Train and save separate models for each base station
for bs in unique_base_stations:
    # Filter data for a specific base station
    bs_data = base_cell_energy[base_cell_energy['BS'] == bs]

    # Skip base stations with insufficient data
    if bs_data.shape[0] < 2:
        print(f"Skipping Base Station {bs} due to insufficient data")
        continue
    # Resample the data and fill missing values (rest of the preprocessing steps)
    bs_data.set_index('Time', inplace=True)
    bs_data_resampled = bs_data.drop(columns=non_numeric_columns).resample('H').mean()
    bs_data_resampled['Energy'].fillna(method='ffill', inplace=True)
    bs_data_resampled.rename(columns={'Energy': 'y'}, inplace=True)
    bs_data_resampled.reset_index(inplace=True)
    bs_data_resampled.rename(columns={'Time': 'ds'}, inplace=True)

    # Fill missing values in the 'load' column with the mode
    # Get the first mode if there are multiple
    mode_load = bs_data_resampled['load'].mode()[0]
    bs_data_resampled['load'].fillna(mode_load, inplace=True)

    # Fill missing values in the 'ESMode1' column with the mean
    mean_ES1 = bs_data_resampled['ESMode1'].mean()
    bs_data_resampled['ESMode1'].fillna(mean_ES1, inplace=True)

    # Fill missing values in the 'ESMode2' column with the mean
    mean_ES2 = bs_data_resampled['ESMode2'].mean()
    bs_data_resampled['ESMode2'].fillna(mean_ES2, inplace=True)

    # Fill missing values in the 'ESMode6' column with the mean
    mean_ES6 = bs_data_resampled['ESMode6'].mean()
    bs_data_resampled['ESMode6'].fillna(mean_ES6, inplace=True)

    # Create and fit the Prophet model
    model = Prophet()
    model.add_regressor('load')
    model.add_regressor('ESMode1')
    model.add_regressor('ESMode2')
    model.add_regressor('ESMode6')
    model.fit(bs_data_resampled)

    # Save the model to a file in the 'saved_models' folder
    model_filename = os.path.join(save_dir, f'model_B_{bs}.pkl')
    with open(model_filename, 'wb') as f:
        pickle.dump(model, f)

  from .autonotebook import tqdm as notebook_tqdm
Importing plotly failed. Interactive plots will not work.
09:19:08 - cmdstanpy - INFO - Chain [1] start processing
09:19:08 - cmdstanpy - INFO - Chain [1] done processing
09:19:09 - cmdstanpy - INFO - Chain [1] start processing
09:19:09 - cmdstanpy - INFO - Chain [1] done processing
09:19:09 - cmdstanpy - INFO - Chain [1] start processing
09:19:09 - cmdstanpy - INFO - Chain [1] done processing
09:19:10 - cmdstanpy - INFO - Chain [1] start processing
09:19:10 - cmdstanpy - INFO - Chain [1] done processing
09:19:10 - cmdstanpy - INFO - Chain [1] start processing
09:19:10 - cmdstanpy - INFO - Chain [1] done processing
09:19:11 - cmdstanpy - INFO - Chain [1] start processing
09:19:11 - cmdstanpy - INFO - Chain [1] done processing
09:19:11 - cmdstanpy - INFO - Chain [1] start processing
09:19:11 - cmdstanpy - INFO - Chain [1] done processing
09:19:11 - cmdstanpy - INFO - Chain [1] start processing
09:19:12 - cmdstanpy - INFO - Chain [1] done

Skipping Base Station 835 due to insufficient data


09:25:48 - cmdstanpy - INFO - Chain [1] done processing
09:25:48 - cmdstanpy - INFO - Chain [1] start processing
09:25:48 - cmdstanpy - INFO - Chain [1] done processing
09:25:49 - cmdstanpy - INFO - Chain [1] start processing
09:25:49 - cmdstanpy - INFO - Chain [1] done processing
09:25:49 - cmdstanpy - INFO - Chain [1] start processing
09:25:49 - cmdstanpy - INFO - Chain [1] done processing
09:25:50 - cmdstanpy - INFO - Chain [1] start processing
09:25:50 - cmdstanpy - INFO - Chain [1] done processing
09:25:50 - cmdstanpy - INFO - Chain [1] start processing
09:25:50 - cmdstanpy - INFO - Chain [1] done processing
09:25:51 - cmdstanpy - INFO - Chain [1] start processing
09:25:51 - cmdstanpy - INFO - Chain [1] done processing
09:25:51 - cmdstanpy - INFO - Chain [1] start processing
09:25:51 - cmdstanpy - INFO - Chain [1] done processing
09:25:52 - cmdstanpy - INFO - Chain [1] start processing
09:25:52 - cmdstanpy - INFO - Chain [1] done processing
09:25:52 - cmdstanpy - INFO - Chain [1] 

Skipping Base Station 854 due to insufficient data


09:25:53 - cmdstanpy - INFO - Chain [1] start processing
09:25:54 - cmdstanpy - INFO - Chain [1] done processing
09:25:54 - cmdstanpy - INFO - Chain [1] start processing
09:25:54 - cmdstanpy - INFO - Chain [1] done processing
09:25:54 - cmdstanpy - INFO - Chain [1] start processing
09:25:55 - cmdstanpy - INFO - Chain [1] done processing
09:25:55 - cmdstanpy - INFO - Chain [1] start processing
09:25:55 - cmdstanpy - INFO - Chain [1] done processing
09:25:56 - cmdstanpy - INFO - Chain [1] start processing
09:25:56 - cmdstanpy - INFO - Chain [1] done processing
09:25:56 - cmdstanpy - INFO - Chain [1] start processing
09:25:56 - cmdstanpy - INFO - Chain [1] done processing
09:25:57 - cmdstanpy - INFO - Chain [1] start processing
09:25:57 - cmdstanpy - INFO - Chain [1] done processing
09:25:57 - cmdstanpy - INFO - Chain [1] start processing
09:25:57 - cmdstanpy - INFO - Chain [1] done processing
09:25:58 - cmdstanpy - INFO - Chain [1] start processing
09:25:58 - cmdstanpy - INFO - Chain [1]

In [27]:
import pandas as pd
import pickle
import os

# Load the new dataset
base = pd.read_csv('../../Dataset/BSinfo.csv')
cell = pd.read_csv('../../Dataset/CLdata.csv')
submit = pd.read_csv("../../Dataset/SampleSubmission.csv")
# print("cell:",cell)
# print("base:",base)
# print("submit:",submit)



CLdata_grouped = cell.groupby([ 'Time','BS']).sum().reset_index()
cell_submit = pd.merge(CLdata_grouped, submit, on=('Time', 'BS'), how='inner')
CLdata_grouped.head()

Unnamed: 0,Time,BS,CellName,load,ESMode1,ESMode2,ESMode3,ESMode4,ESMode5,ESMode6
0,1/1/2023 10:00,B_0,Cell0,0.624745,0.0,0.0,0.0,0,0.0,0.0
1,1/1/2023 10:00,B_1,Cell0,0.02809,0.0,0.0,0.0,0,0.0,0.0
2,1/1/2023 10:00,B_10,Cell0,0.020947,0.0,0.0,0.0,0,0.0,0.0
3,1/1/2023 10:00,B_100,Cell0,0.07193,0.0,0.0,0.0,0,0.0,0.0
4,1/1/2023 10:00,B_101,Cell0,0.01697,0.0,0.0,0.0,0,0.0,0.0


In [24]:
cell.head()

Unnamed: 0,Time,BS,CellName,load,ESMode1,ESMode2,ESMode3,ESMode4,ESMode5,ESMode6
0,1/1/2023 1:00,B_0,Cell0,0.487936,0.0,0.0,0.0,0,0.0,0.0
1,1/1/2023 2:00,B_0,Cell0,0.344468,0.0,0.0,0.0,0,0.0,0.0
2,1/1/2023 3:00,B_0,Cell0,0.193766,0.0,0.0,0.0,0,0.0,0.0
3,1/1/2023 4:00,B_0,Cell0,0.222383,0.0,0.0,0.0,0,0.0,0.0
4,1/1/2023 5:00,B_0,Cell0,0.175436,0.0,0.0,0.0,0,0.0,0.0


In [25]:
CLdata_grouped.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 118768 entries, 0 to 118767
Data columns (total 10 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   BS        118768 non-null  object 
 1   Time      118768 non-null  object 
 2   CellName  118768 non-null  object 
 3   load      118768 non-null  float64
 4   ESMode1   118768 non-null  float64
 5   ESMode2   118768 non-null  float64
 6   ESMode3   118768 non-null  float64
 7   ESMode4   118768 non-null  int64  
 8   ESMode5   118768 non-null  float64
 9   ESMode6   118768 non-null  float64
dtypes: float64(6), int64(1), object(3)
memory usage: 9.1+ MB


In [28]:
cell_submit.head()

Unnamed: 0,Time,BS,CellName,load,ESMode1,ESMode2,ESMode3,ESMode4,ESMode5,ESMode6,Energy,w


In [20]:
import pandas as pd
import pickle
import os

# Load the new dataset
base = pd.read_csv('../../Dataset/BSinfo.csv')
cell = pd.read_csv('../../Dataset/CLdata.csv')
submit = pd.read_csv("../../Dataset/SampleSubmission.csv")
# print("cell:",cell)
# print("base:",base)
# print("submit:",submit)



CLdata_grouped = cell.groupby(['BS', 'Time']).sum().reset_index()
cell_submit = pd.merge(CLdata_grouped, submit, on=('Time', 'BS'), how='inner')

print('cell',CLdata_grouped)

cell_submit['Energy'] = 0
# cell_submit = cell_submit.drop_duplicates(subset=['Time', 'BS'], keep='first')
cell_submit = cell_submit[['Time', 'BS', 'Energy','load', 'ESMode1', 'ESMode2', 'ESMode6']]
cell_submit['Time'] = pd.to_datetime(cell_submit['Time'])
unique_base_stations_submit = cell_submit['BS'].unique()
print(unique_base_stations_submit)

# print(cell_submit)

# Load the general model
save_dir = 'saved_models_load_ES_sum'
general_model_filename = os.path.join(save_dir, 'general_model.pkl')
with open(general_model_filename, 'rb') as f:
    general_model = pickle.load(f)


for bs in unique_base_stations_submit:
    # Load the corresponding Prophet model if available, otherwise use the general model
    model_filename = os.path.join(save_dir, f'model_{bs}.pkl')

    if os.path.exists(model_filename):
        with open(model_filename, "rb") as f:
            loaded_model = pickle.load(f)
    else:
        print(bs)
        print(f'model_{bs}.pkl')
        # Use the general model if the specific model is not available
        loaded_model = general_model

    # Filter new data for the specific base station
    bs_data_submit = cell_submit[cell_submit['BS'] == bs]

    # Create the 'future' DataFrame for prediction
    future = pd.DataFrame({'ds': bs_data_submit['Time']})

    # Add columns to the 'future' DataFrame
    future['load'] = bs_data_submit['load']
    future['ESMode1'] = bs_data_submit['ESMode1']
    future['ESMode2'] = bs_data_submit['ESMode2']
    future['ESMode6'] = bs_data_submit['ESMode6']

    # Use the loaded model to predict values for the new data
    forecast = loaded_model.predict(future)
    # print(forecast,future)
    # Update the 'Energy' column in the submit DataFrame with predictions
    submit.loc[submit['BS'] == bs, 'Energy'] = forecast['yhat'].values

submit['Time'] = pd.to_datetime(submit['Time'])

# Create the 'ID' column by joining 'Time' and 'BS' columns
submit['ID'] = submit['Time'].astype(str) + '_' + submit['BS']

# Remove unnecessary columns and reorder columns
submit = submit[['ID', 'Energy']]

# Save the submission file
submit.to_csv('SampleSubmission__38_.csv', index=False)
print(submit.shape)

submit.head()

cell            BS            Time CellName      load  ESMode1  ESMode2  ESMode3  \
0         B_0  1/1/2023 10:00    Cell0  0.624745      0.0      0.0      0.0   
1         B_0  1/1/2023 11:00    Cell0  0.522096      0.0      0.0      0.0   
2         B_0  1/1/2023 12:00    Cell0  0.439968      0.0      0.0      0.0   
3         B_0  1/1/2023 13:00    Cell0  0.482723      0.0      0.0      0.0   
4         B_0  1/1/2023 14:00    Cell0  0.437926      0.0      0.0      0.0   
...       ...             ...      ...       ...      ...      ...      ...   
118763  B_999   1/2/2023 5:00    Cell0  0.049673      0.0      0.0      0.0   
118764  B_999   1/2/2023 6:00    Cell0  0.054962      0.0      0.0      0.0   
118765  B_999   1/2/2023 7:00    Cell0  0.060288      0.0      0.0      0.0   
118766  B_999   1/2/2023 8:00    Cell0  0.066192      0.0      0.0      0.0   
118767  B_999   1/2/2023 9:00    Cell0  0.077212      0.0      0.0      0.0   

        ESMode4  ESMode5  ESMode6  
0         

Unnamed: 0,ID,Energy
0,2023-01-01 06:00:00_B_0,
1,2023-01-01 11:00:00_B_0,
2,2023-01-01 12:00:00_B_0,
3,2023-01-01 13:00:00_B_0,
4,2023-01-01 23:00:00_B_0,
