In [1]:
import pandas as pd
import pickle
from prophet import Prophet
import os

# Load and preprocess your data
base = pd.read_csv('BSinfo.csv')
cell = pd.read_csv('CLdata.csv')
energy = pd.read_csv('ECdata.csv')

base_cell = pd.merge(cell, base, on=['BS', 'CellName'], how='left')
base_cell_energy = pd.merge(base_cell, energy, on=['Time', 'BS'], how='inner')

base_cell_energy['BS'] = base_cell_energy['BS'].str.replace('B_', '').astype(int)
base_cell_energy['CellName'] = base_cell_energy['CellName'].str.replace('Cell', '').astype(int)
base_cell_energy['RUType'] = base_cell_energy['RUType'].str.replace('Type', '').astype(int)
base_cell_energy['Mode'] = base_cell_energy['Mode'].str.replace('Mode', '').astype(int)

base_cell_energy['Time'] = pd.to_datetime(base_cell_energy['Time'])

# Get unique base station values from the dataset
unique_base_stations = base_cell_energy['BS'].unique()

# Create a directory to save models
save_dir = 'saved_models'
os.makedirs(save_dir, exist_ok=True)

data = base_cell_energy.copy()
# Resample the data and fill missing values
data.set_index('Time', inplace=True)
data_resampled = data.resample('H').mean()
data_resampled['Energy'].fillna(method='ffill', inplace=True)

# Rename columns for Prophet
data_resampled.rename(columns={'Energy': 'y'}, inplace=True)
data_resampled.reset_index(inplace=True)
data_resampled.rename(columns={'Time': 'ds'}, inplace=True)

# Create and fit the general Prophet model
general_model = Prophet()
general_model.fit(data_resampled)

# Save the general model to a file in the 'saved_models' folder
general_model_filename = os.path.join(save_dir, 'general_model.pkl')
with open(general_model_filename, 'wb') as f:
    pickle.dump(general_model, f)


# Train and save separate models for each base station
for bs in unique_base_stations:
    # Filter data for a specific base station
    bs_data = base_cell_energy[base_cell_energy['BS'] == bs]

    # Skip base stations with insufficient data
    if bs_data.shape[0] < 2:
        print(f"Skipping Base Station {bs} due to insufficient data")
        continue

    # Resample the data and fill missing values (rest of the preprocessing steps)
    bs_data.set_index('Time', inplace=True)
    bs_data_resampled = bs_data.resample('H').mean()
    bs_data_resampled['Energy'].fillna(method='ffill', inplace=True)
    bs_data_resampled.rename(columns={'Energy': 'y'}, inplace=True)
    bs_data_resampled.reset_index(inplace=True)
    bs_data_resampled.rename(columns={'Time': 'ds'}, inplace=True)

    # Create and fit the Prophet model
    model = Prophet()
    model.fit(bs_data_resampled)

    # Save the model to a file in the 'saved_models' folder
    model_filename = os.path.join(save_dir, f'model_B_{bs}.pkl')
    with open(model_filename, 'wb') as f:
        pickle.dump(model, f)


15:18:57 - cmdstanpy - INFO - Chain [1] start processing
15:18:58 - cmdstanpy - INFO - Chain [1] done processing
15:18:58 - cmdstanpy - INFO - Chain [1] start processing
15:18:58 - cmdstanpy - INFO - Chain [1] done processing
15:18:58 - cmdstanpy - INFO - Chain [1] start processing
15:18:58 - cmdstanpy - INFO - Chain [1] done processing
15:18:58 - cmdstanpy - INFO - Chain [1] start processing
15:18:58 - cmdstanpy - INFO - Chain [1] done processing
15:18:58 - cmdstanpy - INFO - Chain [1] start processing
15:18:58 - cmdstanpy - INFO - Chain [1] done processing
15:18:58 - cmdstanpy - INFO - Chain [1] start processing
15:18:58 - cmdstanpy - INFO - Chain [1] done processing
15:18:59 - cmdstanpy - INFO - Chain [1] start processing
15:18:59 - cmdstanpy - INFO - Chain [1] done processing
15:18:59 - cmdstanpy - INFO - Chain [1] start processing
15:18:59 - cmdstanpy - INFO - Chain [1] done processing
15:18:59 - cmdstanpy - INFO - Chain [1] start processing
15:18:59 - cmdstanpy - INFO - Chain [1]

Skipping Base Station 835 due to insufficient data


15:21:37 - cmdstanpy - INFO - Chain [1] done processing
15:21:37 - cmdstanpy - INFO - Chain [1] start processing
15:21:37 - cmdstanpy - INFO - Chain [1] done processing
15:21:37 - cmdstanpy - INFO - Chain [1] start processing
15:21:37 - cmdstanpy - INFO - Chain [1] done processing
15:21:38 - cmdstanpy - INFO - Chain [1] start processing
15:21:38 - cmdstanpy - INFO - Chain [1] done processing
15:21:38 - cmdstanpy - INFO - Chain [1] start processing
15:21:38 - cmdstanpy - INFO - Chain [1] done processing
15:21:38 - cmdstanpy - INFO - Chain [1] start processing
15:21:38 - cmdstanpy - INFO - Chain [1] done processing
15:21:38 - cmdstanpy - INFO - Chain [1] start processing
15:21:39 - cmdstanpy - INFO - Chain [1] done processing
15:21:39 - cmdstanpy - INFO - Chain [1] start processing
15:21:39 - cmdstanpy - INFO - Chain [1] done processing
15:21:39 - cmdstanpy - INFO - Chain [1] start processing
15:21:39 - cmdstanpy - INFO - Chain [1] done processing
15:21:39 - cmdstanpy - INFO - Chain [1] 

Skipping Base Station 854 due to insufficient data


15:21:40 - cmdstanpy - INFO - Chain [1] done processing
15:21:40 - cmdstanpy - INFO - Chain [1] start processing
15:21:40 - cmdstanpy - INFO - Chain [1] done processing
15:21:40 - cmdstanpy - INFO - Chain [1] start processing
15:21:40 - cmdstanpy - INFO - Chain [1] done processing
15:21:40 - cmdstanpy - INFO - Chain [1] start processing
15:21:40 - cmdstanpy - INFO - Chain [1] done processing
15:21:41 - cmdstanpy - INFO - Chain [1] start processing
15:21:41 - cmdstanpy - INFO - Chain [1] done processing
15:21:41 - cmdstanpy - INFO - Chain [1] start processing
15:21:41 - cmdstanpy - INFO - Chain [1] done processing
15:21:41 - cmdstanpy - INFO - Chain [1] start processing
15:21:41 - cmdstanpy - INFO - Chain [1] done processing
15:21:41 - cmdstanpy - INFO - Chain [1] start processing
15:21:42 - cmdstanpy - INFO - Chain [1] done processing
15:21:42 - cmdstanpy - INFO - Chain [1] start processing
15:21:42 - cmdstanpy - INFO - Chain [1] done processing
15:21:42 - cmdstanpy - INFO - Chain [1] 

In [2]:
import pandas as pd
import pickle
from prophet import Prophet
import os

# Load the new dataset
base = pd.read_csv('BSinfo.csv')
cell = pd.read_csv('CLdata.csv')
submit = pd.read_csv("PCprediction.csv")
print(submit.shape)

submit.drop(['w'], axis=1, inplace=True)

base_cell = pd.merge(cell, base, on=['BS', 'CellName'], how='left')

base_cell_submit = pd.merge(submit, base_cell, on=['Time', 'BS'], how='left')
base_cell_submit = base_cell_submit.drop_duplicates(subset=['Time', 'BS'], keep='first')

base_cell_submit['BS'] = base_cell_submit['BS'].str.replace('B_', '').astype(int)
base_cell_submit['CellName'] = base_cell_submit['CellName'].str.replace('Cell', '').astype(int)
base_cell_submit['RUType'] = base_cell_submit['RUType'].str.replace('Type', '').astype(int)
base_cell_submit['Mode'] = base_cell_submit['Mode'].str.replace('Mode', '').astype(int)
base_cell_submit['Time'] = pd.to_datetime(base_cell_submit['Time'])

base_cell_submit.drop(['Energy'], axis=1, inplace=True)

# submit['BS'] = submit['BS'].str.replace('B_', '').astype(int)
# Get unique base station values from the submit dataset
unique_base_stations_submit = submit['BS'].unique()
# unique_base_stations_submit = submit['BS'].str.replace('B_', '').astype(int).unique()
print(unique_base_stations_submit)


# Load the general model
save_dir = 'saved_models'
general_model_filename = os.path.join(save_dir, 'general_model.pkl')
with open(general_model_filename, 'rb') as f:
    general_model = pickle.load(f)

# Predict energy consumption for each case in the submit dataset
submit['Energy'] = 0  # Initialize the 'Energy' column

for bs in unique_base_stations_submit:
    # Load the corresponding Prophet model if available, otherwise use the general model
    model_filename = os.path.join(save_dir, f'model_{bs}.pkl')
    
    if os.path.exists(model_filename):
        with open(model_filename, "rb") as f:
            loaded_model = pickle.load(f)
    else:
        print(bs)
        print(f'model_{bs}.pkl')
        # Use the general model if the specific model is not available
        loaded_model = general_model
    
    # Filter new data for the specific base station
    bs_data_submit = submit[submit['BS'] == bs]

    # Create the 'future' DataFrame for prediction
    future = pd.DataFrame({'ds': bs_data_submit['Time']})

    # Use the loaded model to predict values for the new data
    forecast = loaded_model.predict(future)

    # Update the 'Energy' column in the submit DataFrame with predictions
    submit.loc[submit['BS'] == bs, 'Energy'] = forecast['yhat'].values

submit['Time'] = pd.to_datetime(submit['Time'])

# Create the 'ID' column by joining 'Time' and 'BS' columns
submit['ID'] = submit['Time'].astype(str) + '_' + submit['BS']

# Remove unnecessary columns and reorder columns
submit = submit[['ID', 'Energy']]

# Save the submission file
submit.to_csv('SampleSubmission__37_.csv', index=False)
print(submit.shape)

submit.head()

(26139, 4)
['B_0' 'B_1' 'B_2' ... 'B_1017' 'B_1018' 'B_1019']
B_13
model_B_13.pkl
B_274
model_B_274.pkl
B_315
model_B_315.pkl
B_363
model_B_363.pkl
B_651
model_B_651.pkl
B_827
model_B_827.pkl
B_828
model_B_828.pkl
B_835
model_B_835.pkl
B_837
model_B_837.pkl
B_838
model_B_838.pkl
B_840
model_B_840.pkl
B_841
model_B_841.pkl
B_842
model_B_842.pkl
B_843
model_B_843.pkl
B_848
model_B_848.pkl
B_849
model_B_849.pkl
B_850
model_B_850.pkl
B_854
model_B_854.pkl
B_855
model_B_855.pkl
B_861
model_B_861.pkl
B_862
model_B_862.pkl
B_864
model_B_864.pkl
B_867
model_B_867.pkl
B_869
model_B_869.pkl
B_875
model_B_875.pkl
B_876
model_B_876.pkl
B_877
model_B_877.pkl
B_878
model_B_878.pkl
B_879
model_B_879.pkl
B_880
model_B_880.pkl
B_881
model_B_881.pkl
B_882
model_B_882.pkl
B_893
model_B_893.pkl
B_894
model_B_894.pkl
B_895
model_B_895.pkl
B_897
model_B_897.pkl
B_898
model_B_898.pkl
B_899
model_B_899.pkl
B_902
model_B_902.pkl
B_903
model_B_903.pkl
B_904
model_B_904.pkl
B_908
model_B_908.pkl
B_909
model_B_90

Unnamed: 0,ID,Energy
0,2023-01-01 06:00:00_B_0,63.279875
1,2023-01-01 11:00:00_B_0,75.453925
2,2023-01-01 12:00:00_B_0,74.104447
3,2023-01-01 13:00:00_B_0,72.529464
4,2023-01-01 23:00:00_B_0,79.085237
