In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [6]:
base=pd.read_csv('BSinfo.csv')
cell=pd.read_csv('CLdata.csv')
energy=pd.read_csv('ECdata.csv')
submit=pd.read_csv("PCprediction.csv")

In [7]:
# Calculate average energy values by BS from the energy dataset
average_energy_by_bs = energy.groupby('BS')['Energy'].mean()

# Create a new column 'AverageEnergy' in the base dataset using the average values
base['AverageEnergy'] = base['BS'].map(average_energy_by_bs)

base


Unnamed: 0,BS,CellName,RUType,Mode,Frequency,Bandwidth,Antennas,TXpower,AverageEnergy
0,B_0,Cell0,Type1,Mode2,365.000,20,4,6.875934,73.091525
1,B_1,Cell0,Type2,Mode2,532.000,20,4,6.875934,17.743187
2,B_2,Cell0,Type1,Mode2,365.000,20,4,6.875934,56.843105
3,B_3,Cell0,Type2,Mode2,532.000,20,4,6.875934,35.814045
4,B_4,Cell0,Type2,Mode2,532.000,20,4,6.875934,22.374467
...,...,...,...,...,...,...,...,...,...
1212,B_925,Cell1,Type8,Mode1,697.002,10,4,7.877728,12.301943
1213,B_105,Cell2,Type1,Mode2,426.980,2,2,6.877429,59.198630
1214,B_745,Cell2,Type1,Mode2,426.980,2,2,6.877429,58.336003
1215,B_105,Cell3,Type1,Mode2,426.980,2,2,6.877429,59.198630


In [8]:
submit.shape

(26139, 4)

In [9]:
# Convert 'Time' column to datetime format
energy['Time'] = pd.to_datetime(energy['Time'])
submit['Time'] = pd.to_datetime(submit['Time'])

In [10]:
submit['Energy']=submit['BS'].map(average_energy_by_bs)
submit['Energy'].fillna(average_energy_by_bs.mean(), inplace=True)
# Create the 'ID' column by joining 'Time' and 'BS' columns
submit['ID'] = submit['Time'].astype(str) + '_' + submit['BS']

# Remove unnecessary columns and reorder columns
submit = submit[['ID', 'Energy']]

# Save the submission file
submit.to_csv('SampleSubmission__18_.csv', index=False)

submit.shape


(26139, 2)

In [11]:
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import LabelEncoder

# Load the data
energy=pd.read_csv('ECdata.csv')
submit=pd.read_csv("PCprediction.csv")

# Convert 'Time' column to datetime format
energy['Time'] = pd.to_datetime(energy['Time'])
submit['Time'] = pd.to_datetime(submit['Time'])

# Initialize the label encoder
encoder = LabelEncoder()

# Combine unique values from both 'energy' and 'submit' datasets and fit the encoder
combined_bs = pd.concat([energy['BS'], submit['BS']])
encoder.fit(combined_bs)

# Transform the 'BS' column in both energy and submit datasets
energy['BS_encoded'] = encoder.transform(energy['BS'])
submit['BS_encoded'] = encoder.transform(submit['BS'])

# Fit a Decision Tree regressor model on the energy data
model = DecisionTreeRegressor()
X = energy[['BS_encoded']]  # Use only the encoded 'BS' column as a feature
y = energy['Energy']
model.fit(X, y)

# Use the trained model to predict 'Energy' for the 'submit' data
X_pred = submit[['BS_encoded']]  # Use only the encoded 'BS' column as a feature
submit['Energy'] = model.predict(X_pred)

# Create the 'ID' column by joining 'Time' and 'BS' columns
submit['ID'] = submit['Time'].astype(str) + '_' + submit['BS']

# Remove unnecessary columns and reorder columns
submit = submit[['ID', 'Energy']]

# Save the submission file
submit.to_csv('SampleSubmission.csv', index=False)

submit.head()


Unnamed: 0,ID,Energy
0,2023-01-01 06:00:00_B_0,73.091525
1,2023-01-01 11:00:00_B_0,73.091525
2,2023-01-01 12:00:00_B_0,73.091525
3,2023-01-01 13:00:00_B_0,73.091525
4,2023-01-01 23:00:00_B_0,73.091525


In [12]:
base.head()

Unnamed: 0,BS,CellName,RUType,Mode,Frequency,Bandwidth,Antennas,TXpower,AverageEnergy
0,B_0,Cell0,Type1,Mode2,365.0,20,4,6.875934,73.091525
1,B_1,Cell0,Type2,Mode2,532.0,20,4,6.875934,17.743187
2,B_2,Cell0,Type1,Mode2,365.0,20,4,6.875934,56.843105
3,B_3,Cell0,Type2,Mode2,532.0,20,4,6.875934,35.814045
4,B_4,Cell0,Type2,Mode2,532.0,20,4,6.875934,22.374467


In [13]:
base.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1217 entries, 0 to 1216
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   BS             1217 non-null   object 
 1   CellName       1217 non-null   object 
 2   RUType         1217 non-null   object 
 3   Mode           1217 non-null   object 
 4   Frequency      1217 non-null   float64
 5   Bandwidth      1217 non-null   int64  
 6   Antennas       1217 non-null   int64  
 7   TXpower        1217 non-null   float64
 8   AverageEnergy  1118 non-null   float64
dtypes: float64(3), int64(2), object(4)
memory usage: 85.7+ KB


In [14]:
base.RUType.value_counts()

Type1     401
Type4     231
Type6     199
Type7     116
Type5      67
Type3      49
Type11     36
Type2      35
Type9      27
Type12     22
Type10     21
Type8      13
Name: RUType, dtype: int64

In [15]:
base.Mode.value_counts()

Mode2    1125
Mode1      92
Name: Mode, dtype: int64

In [16]:
base.Bandwidth.value_counts()

20    568
10    329
2     286
8      24
5      10
Name: Bandwidth, dtype: int64

In [17]:
base.Antennas.value_counts()

2     949
1      99
4      82
8      44
64     22
32     21
Name: Antennas, dtype: int64

In [18]:
base.Frequency.value_counts()

365.000    302
426.980    293
532.000    266
189.000    255
697.002     68
979.998     22
155.600      8
715.998      2
364.000      1
Name: Frequency, dtype: int64

In [19]:
base.CellName.value_counts()

Cell0    1020
Cell1     193
Cell2       2
Cell3       2
Name: CellName, dtype: int64

In [20]:
base.TXpower.value_counts()

6.875934    551
6.427504    258
7.325859    212
8.046487     31
7.327504     24
8.034529     18
6.877429     15
6.128550     14
8.036173     14
8.375336     11
5.979073     10
5.680120      6
7.916592      5
6.576981      5
5.381166      5
6.726457      4
7.598057      3
6.532138      2
6.412556      2
7.323617      2
5.949178      2
7.319432      1
7.086398      1
7.344245      1
6.761584      1
7.318386      1
7.340209      1
7.336173      1
7.550075      1
7.325710      1
7.101345      1
8.013303      1
6.372795      1
6.980568      1
7.882063      1
6.691928      1
8.032735      1
7.891629      1
7.887444      1
7.316293      1
7.321525      1
6.397608      1
6.681614      1
7.877728      1
Name: TXpower, dtype: int64

In [21]:
cell.head()

Unnamed: 0,Time,BS,CellName,load,ESMode1,ESMode2,ESMode3,ESMode4,ESMode5,ESMode6
0,1/1/2023 1:00,B_0,Cell0,0.487936,0.0,0.0,0.0,0,0.0,0.0
1,1/1/2023 2:00,B_0,Cell0,0.344468,0.0,0.0,0.0,0,0.0,0.0
2,1/1/2023 3:00,B_0,Cell0,0.193766,0.0,0.0,0.0,0,0.0,0.0
3,1/1/2023 4:00,B_0,Cell0,0.222383,0.0,0.0,0.0,0,0.0,0.0
4,1/1/2023 5:00,B_0,Cell0,0.175436,0.0,0.0,0.0,0,0.0,0.0


In [22]:
cell.ESMode1.value_counts()

0.000000    119334
1.000000      2182
0.001389       576
0.983333        86
0.979167        76
             ...  
0.150000         1
0.344444         1
0.550000         1
0.075000         1
0.519444         1
Name: ESMode1, Length: 565, dtype: int64

In [23]:
cell.ESMode2.value_counts()

0.000000    119522
1.000000      1616
0.999722       346
0.000556       220
0.001111       196
             ...  
0.261667         1
0.649722         1
0.348056         1
0.873611         1
0.725000         1
Name: ESMode2, Length: 1234, dtype: int64

In [24]:
cell.ESMode3.value_counts()

0.000000    125353
0.002533         1
0.099807         1
0.120849         1
0.053396         1
             ...  
0.031455         1
0.007751         1
0.049877         1
0.061596         1
0.026360         1
Name: ESMode3, Length: 223, dtype: int64

In [25]:
cell.ESMode4.value_counts()

0    125575
Name: ESMode4, dtype: int64

In [26]:
cell.ESMode5.value_counts()

0.000000    125572
0.000751         1
0.629762         1
0.768070         1
Name: ESMode5, dtype: int64

In [27]:
cell.ESMode6.value_counts()

0.000000    118245
0.924341         6
0.924342         5
0.924337         3
0.924339         3
             ...  
0.701364         1
0.686719         1
0.708874         1
0.774581         1
0.537125         1
Name: ESMode6, Length: 7303, dtype: int64

In [28]:
energy.head()

Unnamed: 0,Time,BS,Energy,BS_encoded
0,2023-01-01 01:00:00,B_0,64.275037,0
1,2023-01-01 02:00:00,B_0,55.904335,0
2,2023-01-01 03:00:00,B_0,57.698057,0
3,2023-01-01 04:00:00,B_0,55.156951,0
4,2023-01-01 05:00:00,B_0,56.053812,0


In [29]:
cell.CellName.value_counts()

Cell0    118768
Cell1      6711
Cell2        48
Cell3        48
Name: CellName, dtype: int64

In [30]:
cell.BS.value_counts()

B_105     330
B_745     326
B_584     286
B_585     286
B_506     286
         ... 
B_890      24
B_891      24
B_892      24
B_893      24
B_1019     24
Name: BS, Length: 1020, dtype: int64

In [31]:
cell.load.value_counts()

0.045923    104
0.008240     93
0.008260     85
0.040008     84
0.000000     82
           ... 
0.643340      1
0.743020      1
0.615360      1
0.698260      1
0.081865      1
Name: load, Length: 70505, dtype: int64

In [32]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder

# Load the data
energy=pd.read_csv('ECdata.csv')
submit=pd.read_csv("PCprediction.csv")

# Convert 'Time' column to datetime format
energy['Time'] = pd.to_datetime(energy['Time'])
submit['Time'] = pd.to_datetime(submit['Time'])

# Initialize the label encoder
encoder = LabelEncoder()

# Combine unique values from both 'energy' and 'submit' datasets and fit the encoder
combined_bs = pd.concat([energy['BS'], submit['BS']])
encoder.fit(combined_bs)

# Transform the 'BS' column in both energy and submit datasets
energy['BS_encoded'] = encoder.transform(energy['BS'])
submit['BS_encoded'] = encoder.transform(submit['BS'])

# Fit a Linear Regression model on the energy data
model = LinearRegression()
X = energy[['BS_encoded']]  # Use only the encoded 'BS' column as a feature
y = energy['Energy']
model.fit(X, y)

# Use the trained model to predict 'Energy' for the 'submit' data
X_pred = submit[['BS_encoded']]  # Use only the encoded 'BS' column as a feature
submit['Energy'] = model.predict(X_pred)

# Create the 'ID' column by joining 'Time' and 'BS' columns
submit['ID'] = submit['Time'].astype(str) + '_' + submit['BS']

# Remove unnecessary columns and reorder columns
submit = submit[['ID', 'Energy']]

# Save the submission file
submit.to_csv('SampleSubmission.csv', index=False)

submit.head()


Unnamed: 0,ID,Energy
0,2023-01-01 06:00:00_B_0,28.827889
1,2023-01-01 11:00:00_B_0,28.827889
2,2023-01-01 12:00:00_B_0,28.827889
3,2023-01-01 13:00:00_B_0,28.827889
4,2023-01-01 23:00:00_B_0,28.827889


In [33]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder

# Load the data
energy = pd.read_csv('ECdata.csv')
submit = pd.read_csv("PCprediction.csv")

# Convert 'Time' column to datetime format
energy['Time'] = pd.to_datetime(energy['Time'])
submit['Time'] = pd.to_datetime(submit['Time'])

# Initialize the label encoder
encoder = LabelEncoder()

# Combine unique values from both 'energy' and 'submit' datasets and fit the encoder
combined_bs = pd.concat([energy['BS'], submit['BS']])
encoder.fit(combined_bs)

# Transform the 'BS' column in both energy and submit datasets
energy['BS_encoded'] = encoder.transform(energy['BS'])
submit['BS_encoded'] = encoder.transform(submit['BS'])

# Fit a Random Forest regressor model on the energy data
model = RandomForestRegressor(n_estimators=100, random_state=42)  # You can adjust the number of estimators
X = energy[['BS_encoded']]  # Use only the encoded 'BS' column as a feature
y = energy['Energy']
model.fit(X, y)

# Use the trained model to predict 'Energy' for the 'submit' data
X_pred = submit[['BS_encoded']]  # Use only the encoded 'BS' column as a feature
submit['Energy'] = model.predict(X_pred)

# Create the 'ID' column by joining 'Time' and 'BS' columns
submit['ID'] = submit['Time'].astype(str) + '_' + submit['BS']

# Remove unnecessary columns and reorder columns
submit = submit[['ID', 'Energy']]

# Save the submission file
submit.to_csv('SampleSubmission.csv', index=False)

submit.head()


Unnamed: 0,ID,Energy
0,2023-01-01 06:00:00_B_0,73.131131
1,2023-01-01 11:00:00_B_0,73.131131
2,2023-01-01 12:00:00_B_0,73.131131
3,2023-01-01 13:00:00_B_0,73.131131
4,2023-01-01 23:00:00_B_0,73.131131


In [34]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV

# Load the data
energy = pd.read_csv('ECdata.csv')
submit = pd.read_csv("PCprediction.csv")

# Convert 'Time' column to datetime format
energy['Time'] = pd.to_datetime(energy['Time'])
submit['Time'] = pd.to_datetime(submit['Time'])

# Initialize the label encoder
encoder = LabelEncoder()

# Combine unique values from both 'energy' and 'submit' datasets and fit the encoder
combined_bs = pd.concat([energy['BS'], submit['BS']])
encoder.fit(combined_bs)

# Transform the 'BS' column in both energy and submit datasets
energy['BS_encoded'] = encoder.transform(energy['BS'])
submit['BS_encoded'] = encoder.transform(submit['BS'])

# Set up parameter grid for GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],  # You can add more values to try
    'max_depth': [None, 10, 20],  # You can add more values to try
    'min_samples_split': [2, 5, 10],  # You can add more values to try
    'min_samples_leaf': [1, 2, 4],  # You can add more values to try
}

# Create a Random Forest regressor
rf = RandomForestRegressor(random_state=42)

# Perform GridSearchCV
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1)
X = energy[['BS_encoded']]
y = energy['Energy']
grid_search.fit(X, y)

# Get the best parameters from the grid search
best_params = grid_search.best_params_

# Use the best parameters to fit the model
best_model = RandomForestRegressor(random_state=42, **best_params)
best_model.fit(X, y)

# Use the trained model to predict 'Energy' for the 'submit' data
X_pred = submit[['BS_encoded']]
submit['Energy'] = best_model.predict(X_pred)

# Create the 'ID' column by joining 'Time' and 'BS' columns
submit['ID'] = submit['Time'].astype(str) + '_' + submit['BS']

# Remove unnecessary columns and reorder columns
submit = submit[['ID', 'Energy']]

# Save the submission file
submit.to_csv('SampleSubmission.csv', index=False)

submit.head()


In [None]:
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import OrdinalEncoder

# Load the data
energy = pd.read_csv('ECdata.csv')
submit = pd.read_csv("PCprediction.csv")

# Convert 'Time' column to datetime format
energy['Time'] = pd.to_datetime(energy['Time'])
submit['Time'] = pd.to_datetime(submit['Time'])

# Initialize the ordinal encoder
encoder = OrdinalEncoder()

# Fit the encoder on the combined 'BS' values
combined_bs = pd.concat([energy['BS'], submit['BS']])
encoder.fit(combined_bs.values.reshape(-1, 1))

# Transform the 'BS' column in both energy and submit datasets
energy['BS_encoded'] = encoder.transform(energy['BS'].values.reshape(-1, 1))
submit['BS_encoded'] = encoder.transform(submit['BS'].values.reshape(-1, 1))

# Fit a Decision Tree regressor model on the energy data
model = DecisionTreeRegressor()
X = energy[['BS_encoded']]  # Use only the encoded 'BS' column as a feature
y = energy['Energy']
model.fit(X, y)

# Use the trained model to predict 'Energy' for the 'submit' data
X_pred = submit[['BS_encoded']]  # Use only the encoded 'BS' column as a feature
submit['Energy'] = model.predict(X_pred)

# Create the 'ID' column by joining 'Time' and 'BS' columns
submit['ID'] = submit['Time'].astype(str) + '_' + submit['BS']

# Remove unnecessary columns and reorder columns
submit = submit[['ID', 'Energy']]

# Save the submission file
submit.to_csv('SampleSubmission.csv', index=False)

submit.head()


Unnamed: 0,ID,Energy
0,2023-01-01 06:00:00_B_0,73.091525
1,2023-01-01 11:00:00_B_0,73.091525
2,2023-01-01 12:00:00_B_0,73.091525
3,2023-01-01 13:00:00_B_0,73.091525
4,2023-01-01 23:00:00_B_0,73.091525


In [None]:
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Load the data
energy = pd.read_csv('ECdata.csv')
submit = pd.read_csv("PCprediction.csv")

# Convert 'Time' column to datetime format
energy['Time'] = pd.to_datetime(energy['Time'])
submit['Time'] = pd.to_datetime(submit['Time'])

# Initialize the label encoder
encoder = LabelEncoder()

# Combine unique values from both 'energy' and 'submit' datasets and fit the encoder
combined_bs = pd.concat([energy['BS'], submit['BS']])
encoder.fit(combined_bs)

# Transform the 'BS' column in both energy and submit datasets
energy['BS_encoded'] = encoder.transform(energy['BS'])
submit['BS_encoded'] = encoder.transform(submit['BS'])

# Define parameter distributions for tuning
param_dist = {
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': np.arange(2, 21),
    'min_samples_leaf': np.arange(1, 21),
    'max_features': ['auto', 'sqrt', 'log2', None]
}

# Initialize the Decision Tree regressor
model = DecisionTreeRegressor()

# Initialize RandomizedSearchCV for parameter tuning
random_search = RandomizedSearchCV(model, param_distributions=param_dist, n_iter=100, cv=5, random_state=42)

# Fit the RandomizedSearchCV on the training data
X = energy[['BS_encoded']]
y = energy['Energy']
random_search.fit(X, y)

# Use the best model to predict 'Energy' for the 'submit' data
best_model = random_search.best_estimator_
X_pred = submit[['BS_encoded']]
submit['Energy'] = best_model.predict(X_pred)

# Create the 'ID' column by joining 'Time' and 'BS' columns
submit['ID'] = submit['Time'].astype(str) + '_' + submit['BS']

# Remove unnecessary columns and reorder columns
submit = submit[['ID', 'Energy']]

# Save the submission file
submit.to_csv('SampleSubmission.csv', index=False)

submit.head()


Unnamed: 0,ID,Energy
0,2023-01-01 06:00:00_B_0,73.091525
1,2023-01-01 11:00:00_B_0,73.091525
2,2023-01-01 12:00:00_B_0,73.091525
3,2023-01-01 13:00:00_B_0,73.091525
4,2023-01-01 23:00:00_B_0,73.091525


In [None]:
import pandas as pd
from xgboost import XGBRegressor
from sklearn.preprocessing import LabelEncoder

# Load the data
energy = pd.read_csv('ECdata.csv')
submit = pd.read_csv("PCprediction.csv")

# Convert 'Time' column to datetime format
energy['Time'] = pd.to_datetime(energy['Time'])
submit['Time'] = pd.to_datetime(submit['Time'])

# Initialize the label encoder
encoder = LabelEncoder()

# Combine unique values from both 'energy' and 'submit' datasets and fit the encoder
combined_bs = pd.concat([energy['BS'], submit['BS']])
encoder.fit(combined_bs)

# Transform the 'BS' column in both energy and submit datasets
energy['BS_encoded'] = encoder.transform(energy['BS'])
submit['BS_encoded'] = encoder.transform(submit['BS'])

# Fit an XGBoost regressor model on the energy data
model = XGBRegressor()
X = energy[['BS_encoded']]  # Use only the encoded 'BS' column as a feature
y = energy['Energy']
model.fit(X, y)

# Use the trained model to predict 'Energy' for the 'submit' data
X_pred = submit[['BS_encoded']]  # Use only the encoded 'BS' column as a feature
submit['Energy'] = model.predict(X_pred)

# Create the 'ID' column by joining 'Time' and 'BS' columns
submit['ID'] = submit['Time'].astype(str) + '_' + submit['BS']

# Remove unnecessary columns and reorder columns
submit = submit[['ID', 'Energy']]

# Save the submission file
submit.to_csv('SampleSubmission.csv', index=False)

submit.head()


Unnamed: 0,ID,Energy
0,2023-01-01 06:00:00_B_0,72.533684
1,2023-01-01 11:00:00_B_0,72.533684
2,2023-01-01 12:00:00_B_0,72.533684
3,2023-01-01 13:00:00_B_0,72.533684
4,2023-01-01 23:00:00_B_0,72.533684


In [None]:
import pandas as pd
from sklearn.svm import SVR
from sklearn.preprocessing import LabelEncoder

# Load the data
energy = pd.read_csv('ECdata.csv')
submit = pd.read_csv("PCprediction.csv")

# Convert 'Time' column to datetime format
energy['Time'] = pd.to_datetime(energy['Time'])
submit['Time'] = pd.to_datetime(submit['Time'])

# Initialize the label encoder
encoder = LabelEncoder()

# Combine unique values from both 'energy' and 'submit' datasets and fit the encoder
combined_bs = pd.concat([energy['BS'], submit['BS']])
encoder.fit(combined_bs)

# Transform the 'BS' column in both energy and submit datasets
energy['BS_encoded'] = encoder.transform(energy['BS'])
submit['BS_encoded'] = encoder.transform(submit['BS'])

# Fit an SVM regressor model on the energy data
model = SVR()
X = energy[['BS_encoded']]  # Use only the encoded 'BS' column as a feature
y = energy['Energy']
model.fit(X, y)

# Use the trained model to predict 'Energy' for the 'submit' data
X_pred = submit[['BS_encoded']]  # Use only the encoded 'BS' column as a feature
submit['Energy'] = model.predict(X_pred)

# Create the 'ID' column by joining 'Time' and 'BS' columns
submit['ID'] = submit['Time'].astype(str) + '_' + submit['BS']

# Remove unnecessary columns and reorder columns
submit = submit[['ID', 'Energy']]

# Save the submission file
submit.to_csv('SampleSubmission.csv', index=False)

submit.head()


Unnamed: 0,ID,Energy
0,2023-01-01 06:00:00_B_0,22.276198
1,2023-01-01 11:00:00_B_0,22.276198
2,2023-01-01 12:00:00_B_0,22.276198
3,2023-01-01 13:00:00_B_0,22.276198
4,2023-01-01 23:00:00_B_0,22.276198


In [None]:
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import LabelEncoder

# Load the data
energy = pd.read_csv('ECdata.csv')
submit = pd.read_csv("PCprediction.csv")

# Convert 'Time' column to datetime format
energy['Time'] = pd.to_datetime(energy['Time'])
submit['Time'] = pd.to_datetime(submit['Time'])

# Initialize the label encoder
encoder = LabelEncoder()

# Combine unique values from both 'energy' and 'submit' datasets and fit the encoder
combined_bs = pd.concat([energy['BS'], submit['BS']])
encoder.fit(combined_bs)

# Transform the 'BS' column in both energy and submit datasets
energy['BS_encoded'] = encoder.transform(energy['BS'])
submit['BS_encoded'] = encoder.transform(submit['BS'])

# Fit a Gradient Boosting regressor model on the energy data
model = GradientBoostingRegressor()
X = energy[['BS_encoded']]  # Use only the encoded 'BS' column as a feature
y = energy['Energy']
model.fit(X, y)

# Use the trained model to predict 'Energy' for the 'submit' data
X_pred = submit[['BS_encoded']]  # Use only the encoded 'BS' column as a feature
submit['Energy'] = model.predict(X_pred)

# Create the 'ID' column by joining 'Time' and 'BS' columns
submit['ID'] = submit['Time'].astype(str) + '_' + submit['BS']

# Remove unnecessary columns and reorder columns
submit = submit[['ID', 'Energy']]

# Save the submission file
submit.to_csv('SampleSubmission.csv', index=False)

submit.head()


Unnamed: 0,ID,Energy
0,2023-01-01 06:00:00_B_0,68.09552
1,2023-01-01 11:00:00_B_0,68.09552
2,2023-01-01 12:00:00_B_0,68.09552
3,2023-01-01 13:00:00_B_0,68.09552
4,2023-01-01 23:00:00_B_0,68.09552


In [None]:
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import OneHotEncoder

# Load the data
energy = pd.read_csv('ECdata.csv')
submit = pd.read_csv("PCprediction.csv")

# Convert 'Time' column to datetime format
energy['Time'] = pd.to_datetime(energy['Time'])
submit['Time'] = pd.to_datetime(submit['Time'])

# Perform One-Hot Encoding on 'BS' column
encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')
energy_encoded = pd.DataFrame(encoder.fit_transform(energy[['BS']]), columns=encoder.get_feature_names(['BS']))
submit_encoded = pd.DataFrame(encoder.transform(submit[['BS']]), columns=encoder.get_feature_names(['BS']))

# Concatenate encoded dataframes with original dataframes
energy = pd.concat([energy, energy_encoded], axis=1)
submit = pd.concat([submit, submit_encoded], axis=1)

# Fit a Gradient Boosting regressor model on the energy data
model = GradientBoostingRegressor()
X = energy[energy_encoded.columns]  # Use only the encoded columns as features
y = energy['Energy']
model.fit(X, y)

# Use the trained model to predict 'Energy' for the 'submit' data
X_pred = submit[submit_encoded.columns]  # Use only the encoded columns as features
submit['Energy'] = model.predict(X_pred)

# Create the 'ID' column by joining 'Time' and 'BS' columns
submit['ID'] = submit['Time'].astype(str) + '_' + submit['BS']

# Remove unnecessary columns and reorder columns
submit = submit[['ID', 'Energy']]

# Save the submission file
submit.to_csv('SampleSubmission.csv', index=False)

submit.head()




Unnamed: 0,ID,Energy
0,2023-01-01 06:00:00_B_0,53.046987
1,2023-01-01 11:00:00_B_0,53.046987
2,2023-01-01 12:00:00_B_0,53.046987
3,2023-01-01 13:00:00_B_0,53.046987
4,2023-01-01 23:00:00_B_0,53.046987


In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the data
energy = pd.read_csv('ECdata.csv')
submit = pd.read_csv("PCprediction.csv")

# Convert 'Time' column to datetime format
energy['Time'] = pd.to_datetime(energy['Time'])
submit['Time'] = pd.to_datetime(submit['Time'])

# Initialize the label encoder
encoder = LabelEncoder()

# Combine unique values from both 'energy' and 'submit' datasets and fit the encoder
combined_bs = pd.concat([energy['BS'], submit['BS']])
encoder.fit(combined_bs)

# Transform the 'BS' column in both energy and submit datasets
energy['BS_encoded'] = encoder.transform(energy['BS'])
submit['BS_encoded'] = encoder.transform(submit['BS'])

# Prepare data for neural network
X = energy[['BS_encoded']].values  # Use only the encoded 'BS' column as a feature
y = energy['Energy'].values

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Build the neural network model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train_scaled, y_train, epochs=50, batch_size=32, validation_data=(X_val_scaled, y_val))

# Prepare data for prediction
X_pred = submit[['BS_encoded']].values  # Use only the encoded 'BS' column as a feature
X_pred_scaled = scaler.transform(X_pred)

# Use the trained model to predict 'Energy' for the 'submit' data
predicted_energy = model.predict(X_pred_scaled).flatten()

# Create the 'ID' column by joining 'Time' and 'BS' columns
submit['ID'] = submit['Time'].astype(str) + '_' + submit['BS']

# Assign predicted energies to the submit data
submit['Energy'] = predicted_energy

# Remove unnecessary columns and reorder columns
submit = submit[['ID', 'Energy']]

# Save the submission file
submit.to_csv('SampleSubmission.csv', index=False)

submit.head()


ModuleNotFoundError: No module named 'tensorflow'