In [1]:
import pandas as pd
import pickle
from prophet import Prophet
import os

# Load and preprocess your data
base = pd.read_csv('BSinfo.csv')
cell = pd.read_csv('CLdata.csv')
energy = pd.read_csv('ECdata.csv')

CLdata_grouped = cell.groupby(['BS', 'Time']).sum().reset_index()
base_cell_energy = pd.merge(CLdata_grouped, energy, on=('Time','BS'), how='inner')

base_cell_energy['BS'] = base_cell_energy['BS'].str.replace('B_', '')
base_cell_energy['Time'] = pd.to_datetime(base_cell_energy['Time'])

# Get unique base station values from the dataset
unique_base_stations = base_cell_energy['BS'].unique()

# Create a directory to save models
save_dir = 'saved_models_load_ES_sum'
os.makedirs(save_dir, exist_ok=True)

data = base_cell_energy.copy()
# Resample the data and fill missing values
data.set_index('Time', inplace=True)
data_resampled = data.resample('H').mean()
data_resampled['Energy'].fillna(method='ffill', inplace=True)

# Rename columns for Prophet
data_resampled.rename(columns={'Energy': 'y'}, inplace=True)
data_resampled.reset_index(inplace=True)
data_resampled.rename(columns={'Time': 'ds'}, inplace=True)

# Create and fit the general Prophet model
general_model = Prophet()
general_model.fit(data_resampled)

# Save the general model to a file in the 'saved_models' folder
general_model_filename = os.path.join(save_dir, 'general_model.pkl')
with open(general_model_filename, 'wb') as f:
    pickle.dump(general_model, f)


# Train and save separate models for each base station
for bs in unique_base_stations:
    # Filter data for a specific base station
    bs_data = base_cell_energy[base_cell_energy['BS'] == bs]

    # Skip base stations with insufficient data
    if bs_data.shape[0] < 2:
        print(f"Skipping Base Station {bs} due to insufficient data")
        continue

    # Resample the data and fill missing values (rest of the preprocessing steps)
    bs_data.set_index('Time', inplace=True)
    bs_data_resampled = bs_data.resample('H').mean()
    bs_data_resampled['Energy'].fillna(method='ffill', inplace=True)
    bs_data_resampled.rename(columns={'Energy': 'y'}, inplace=True)
    bs_data_resampled.reset_index(inplace=True)
    bs_data_resampled.rename(columns={'Time': 'ds'}, inplace=True)

    # Fill missing values in the 'load' column with the mode
    mode_load = bs_data_resampled['load'].mode()[0]  # Get the first mode if there are multiple
    bs_data_resampled['load'].fillna(mode_load, inplace=True)

    # Fill missing values in the 'ESMode1' column with the mean
    mean_ES1 = bs_data_resampled['ESMode1'].mean()
    bs_data_resampled['ESMode1'].fillna(mean_ES1, inplace=True)

    # Fill missing values in the 'ESMode2' column with the mean
    mean_ES2 = bs_data_resampled['ESMode2'].mean()
    bs_data_resampled['ESMode2'].fillna(mean_ES2, inplace=True)

    # Fill missing values in the 'ESMode6' column with the mean
    mean_ES6 = bs_data_resampled['ESMode6'].mean()
    bs_data_resampled['ESMode6'].fillna(mean_ES6, inplace=True)


    # Create and fit the Prophet model
    model = Prophet()
    model.add_regressor('load')
    model.add_regressor('ESMode1')
    model.add_regressor('ESMode2')
    model.add_regressor('ESMode6')
    model.fit(bs_data_resampled)

    # Save the model to a file in the 'saved_models' folder
    model_filename = os.path.join(save_dir, f'model_B_{bs}.pkl')
    with open(model_filename, 'wb') as f:
        pickle.dump(model, f)


22:58:56 - cmdstanpy - INFO - Chain [1] start processing
22:58:57 - cmdstanpy - INFO - Chain [1] done processing
22:58:57 - cmdstanpy - INFO - Chain [1] start processing
22:58:57 - cmdstanpy - INFO - Chain [1] done processing
22:58:57 - cmdstanpy - INFO - Chain [1] start processing
22:58:57 - cmdstanpy - INFO - Chain [1] done processing
22:58:57 - cmdstanpy - INFO - Chain [1] start processing
22:58:57 - cmdstanpy - INFO - Chain [1] done processing
22:58:57 - cmdstanpy - INFO - Chain [1] start processing
22:58:57 - cmdstanpy - INFO - Chain [1] done processing
22:58:58 - cmdstanpy - INFO - Chain [1] start processing
22:58:58 - cmdstanpy - INFO - Chain [1] done processing
22:58:58 - cmdstanpy - INFO - Chain [1] start processing
22:58:58 - cmdstanpy - INFO - Chain [1] done processing
22:58:58 - cmdstanpy - INFO - Chain [1] start processing
22:58:58 - cmdstanpy - INFO - Chain [1] done processing
22:58:58 - cmdstanpy - INFO - Chain [1] start processing
22:58:59 - cmdstanpy - INFO - Chain [1]

Skipping Base Station 835 due to insufficient data


23:01:35 - cmdstanpy - INFO - Chain [1] done processing
23:01:35 - cmdstanpy - INFO - Chain [1] start processing
23:01:35 - cmdstanpy - INFO - Chain [1] done processing
23:01:35 - cmdstanpy - INFO - Chain [1] start processing
23:01:35 - cmdstanpy - INFO - Chain [1] done processing
23:01:36 - cmdstanpy - INFO - Chain [1] start processing
23:01:36 - cmdstanpy - INFO - Chain [1] done processing
23:01:36 - cmdstanpy - INFO - Chain [1] start processing
23:01:36 - cmdstanpy - INFO - Chain [1] done processing
23:01:36 - cmdstanpy - INFO - Chain [1] start processing
23:01:37 - cmdstanpy - INFO - Chain [1] done processing
23:01:37 - cmdstanpy - INFO - Chain [1] start processing
23:01:37 - cmdstanpy - INFO - Chain [1] done processing
23:01:37 - cmdstanpy - INFO - Chain [1] start processing
23:01:37 - cmdstanpy - INFO - Chain [1] done processing
23:01:37 - cmdstanpy - INFO - Chain [1] start processing
23:01:38 - cmdstanpy - INFO - Chain [1] done processing
23:01:38 - cmdstanpy - INFO - Chain [1] 

Skipping Base Station 854 due to insufficient data


23:01:39 - cmdstanpy - INFO - Chain [1] done processing
23:01:39 - cmdstanpy - INFO - Chain [1] start processing
23:01:39 - cmdstanpy - INFO - Chain [1] done processing
23:01:39 - cmdstanpy - INFO - Chain [1] start processing
23:01:40 - cmdstanpy - INFO - Chain [1] done processing
23:01:40 - cmdstanpy - INFO - Chain [1] start processing
23:01:40 - cmdstanpy - INFO - Chain [1] done processing
23:01:40 - cmdstanpy - INFO - Chain [1] start processing
23:01:40 - cmdstanpy - INFO - Chain [1] done processing
23:01:41 - cmdstanpy - INFO - Chain [1] start processing
23:01:41 - cmdstanpy - INFO - Chain [1] done processing
23:01:41 - cmdstanpy - INFO - Chain [1] start processing
23:01:41 - cmdstanpy - INFO - Chain [1] done processing
23:01:41 - cmdstanpy - INFO - Chain [1] start processing
23:01:42 - cmdstanpy - INFO - Chain [1] done processing
23:01:42 - cmdstanpy - INFO - Chain [1] start processing
23:01:42 - cmdstanpy - INFO - Chain [1] done processing
23:01:42 - cmdstanpy - INFO - Chain [1] 

In [2]:
import pandas as pd
import pickle
import os

# Load the new dataset
base = pd.read_csv('BSinfo.csv')
cell = pd.read_csv('CLdata.csv')
submit = pd.read_csv("PCprediction.csv")
print(submit.shape)

CLdata_grouped = cell.groupby(['BS', 'Time']).sum().reset_index()
cell_submit = pd.merge(CLdata_grouped, submit, on=('Time','BS'), how='inner')

cell_submit['Energy'] = 0
# cell_submit = cell_submit.drop_duplicates(subset=['Time', 'BS'], keep='first')
cell_submit= cell_submit[['Time', 'BS', 'Energy', 'load','ESMode1','ESMode2','ESMode6']]
cell_submit['Time'] = pd.to_datetime(cell_submit['Time'])
unique_base_stations_submit = cell_submit['BS'].unique()

print(unique_base_stations_submit)

# Load the general model
save_dir = 'saved_models_load_ES_sum'
general_model_filename = os.path.join(save_dir, 'general_model.pkl')
with open(general_model_filename, 'rb') as f:
    general_model = pickle.load(f)


for bs in unique_base_stations_submit:
    # Load the corresponding Prophet model if available, otherwise use the general model
    model_filename = os.path.join(save_dir, f'model_{bs}.pkl')
    
    if os.path.exists(model_filename):
        with open(model_filename, "rb") as f:
            loaded_model = pickle.load(f)
    else:
        print(bs)
        print(f'model_{bs}.pkl')
        # Use the general model if the specific model is not available
        loaded_model = general_model
    
    # Filter new data for the specific base station
    bs_data_submit = cell_submit[cell_submit['BS'] == bs]

    # Create the 'future' DataFrame for prediction
    future = pd.DataFrame({'ds': bs_data_submit['Time']})
    
    # Add columns to the 'future' DataFrame
    future['load'] = bs_data_submit['load']
    future['ESMode1'] = bs_data_submit['ESMode1']
    future['ESMode2'] = bs_data_submit['ESMode2']
    future['ESMode6'] = bs_data_submit['ESMode6']

    # Use the loaded model to predict values for the new data
    forecast = loaded_model.predict(future)


    # Update the 'Energy' column in the submit DataFrame with predictions
    submit.loc[submit['BS'] == bs, 'Energy'] = forecast['yhat'].values

submit['Time'] = pd.to_datetime(submit['Time'])

# Create the 'ID' column by joining 'Time' and 'BS' columns
submit['ID'] = submit['Time'].astype(str) + '_' + submit['BS']

# Remove unnecessary columns and reorder columns
submit = submit[['ID', 'Energy']]

# Save the submission file
submit.to_csv('SampleSubmission__38_.csv', index=False)
print(submit.shape)

submit.head()

(26139, 4)
['B_0' 'B_1' 'B_10' ... 'B_997' 'B_998' 'B_999']
B_1000
model_B_1000.pkl
B_1001
model_B_1001.pkl
B_1002
model_B_1002.pkl
B_1008
model_B_1008.pkl
B_1009
model_B_1009.pkl
B_1010
model_B_1010.pkl
B_1011
model_B_1011.pkl
B_1012
model_B_1012.pkl
B_1013
model_B_1013.pkl
B_1014
model_B_1014.pkl
B_1015
model_B_1015.pkl
B_1016
model_B_1016.pkl
B_1017
model_B_1017.pkl
B_1019
model_B_1019.pkl
B_13
model_B_13.pkl
B_274
model_B_274.pkl
B_315
model_B_315.pkl
B_363
model_B_363.pkl
B_651
model_B_651.pkl
B_827
model_B_827.pkl
B_828
model_B_828.pkl
B_835
model_B_835.pkl
B_837
model_B_837.pkl
B_838
model_B_838.pkl
B_840
model_B_840.pkl
B_841
model_B_841.pkl
B_842
model_B_842.pkl
B_843
model_B_843.pkl
B_848
model_B_848.pkl
B_849
model_B_849.pkl
B_850
model_B_850.pkl
B_854
model_B_854.pkl
B_855
model_B_855.pkl
B_861
model_B_861.pkl
B_862
model_B_862.pkl
B_864
model_B_864.pkl
B_867
model_B_867.pkl
B_869
model_B_869.pkl
B_875
model_B_875.pkl
B_876
model_B_876.pkl
B_877
model_B_877.pkl
B_878
model_

Unnamed: 0,ID,Energy
0,2023-01-01 06:00:00_B_0,62.535943
1,2023-01-01 11:00:00_B_0,74.933075
2,2023-01-01 12:00:00_B_0,73.765103
3,2023-01-01 13:00:00_B_0,72.990438
4,2023-01-01 23:00:00_B_0,79.506291
