In [8]:
# pip install sagemaker

In [12]:
import pandas as pd
import numpy as np
from catboost import CatBoostRegressor, Pool
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# Load the dataset
df = pd.read_csv('modified_electricity_consumption_data5.csv')

# Check the columns to verify if 'datetime' is present
print("Columns in the dataset:", df.columns)

# Convert 'datetime' column to datetime data type if present
if 'datetime' in df.columns:
    df['datetime'] = pd.to_datetime(df['datetime'])
else:
    print("Column 'datetime' not found in DataFrame.")

# Aggregate daily data to monthly data
df['year_month'] = df['datetime'].dt.to_period('M')
monthly_df = df.groupby(['year_month', 'guri_num', 'degmada'])['total_KW'].sum().reset_index()

# Extract month and year from the year_month column
monthly_df['year'] = monthly_df['year_month'].dt.year
monthly_df['month'] = monthly_df['year_month'].dt.month

# Select the features and target variable
features = ['guri_num', 'degmada', 'month', 'year']
target = 'total_KW'

X = monthly_df[features]
y = monthly_df[target]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Specify the categorical features
categorical_features = ['degmada']

# Create the Pool object for CatBoost
train_pool = Pool(data=X_train, label=y_train, cat_features=categorical_features)
test_pool = Pool(data=X_test, label=y_test, cat_features=categorical_features)

# Train a CatBoost Regressor
model = CatBoostRegressor(iterations=1000, depth=6, learning_rate=0.1, loss_function='RMSE', random_seed=42)
model.fit(train_pool, verbose=100)

# Make predictions on the test set
y_pred = model.predict(test_pool)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print(f"Root Mean Squared Error: {rmse}")
print(f"R² Score: {r2}")
print(f"Mean Absolute Error: {mae}")


Columns in the dataset: Index(['datetime', 'guri_num', 'degmada', 'total_KW'], dtype='object')
0:	learn: 39.9781308	total: 41.9ms	remaining: 41.9s
100:	learn: 23.2117998	total: 4.96s	remaining: 44.1s
200:	learn: 20.7557630	total: 9.63s	remaining: 38.3s
300:	learn: 19.2349156	total: 14.4s	remaining: 33.4s
400:	learn: 18.1441464	total: 19.3s	remaining: 28.8s
500:	learn: 17.4554544	total: 23.9s	remaining: 23.8s
600:	learn: 16.9524639	total: 28.5s	remaining: 18.9s
700:	learn: 16.5224416	total: 33.2s	remaining: 14.2s
800:	learn: 16.1596499	total: 37.8s	remaining: 9.38s
900:	learn: 15.8641785	total: 42.3s	remaining: 4.65s
999:	learn: 15.5842698	total: 46.9s	remaining: 0us
Root Mean Squared Error: 15.720821600311991
R² Score: 0.8624713036473084
Mean Absolute Error: 12.110870270426457


In [14]:

# Prepare for future predictions
# Extract unique house numbers (guri_num) from your dataset
valid_guri_nums = monthly_df['guri_num'].unique()

# Generate future months for prediction
future_dates = pd.date_range(start='2023-01-01', end='2025-12-31', freq='M')

# Create a DataFrame for future predictions
future_df = pd.DataFrame({
    'year_month': np.tile(future_dates, len(valid_guri_nums)),
    'guri_num': np.repeat(valid_guri_nums, len(future_dates))
})

# Add district column by merging with unique guri_num to district mapping
guri_district_mapping = monthly_df[['guri_num', 'degmada']].drop_duplicates()
future_df = future_df.merge(guri_district_mapping, on='guri_num', how='left')

# Extract month and year for future dates
future_df['year'] = future_df['year_month'].dt.year
future_df['month'] = future_df['year_month'].dt.month

# Select the features for future prediction
X_future = future_df[['guri_num', 'degmada', 'month', 'year']]

# Create the Pool object for future data
future_pool = Pool(data=X_future, cat_features=categorical_features)

# Predict future electricity consumption using the trained CatBoost model
future_df['predicted_total_KW'] = model.predict(future_pool)

# Display the future predictions
print(future_df.head(24))

# Save the future predictions to CSV
# future_df.to_csv('future_predictions.csv', index=False)


   year_month  guri_num degmada  year  month  predicted_total_KW
0  2023-01-31     10349  Shibis  2023      1           91.726891
1  2023-02-28     10349  Shibis  2023      2          128.784529
2  2023-03-31     10349  Shibis  2023      3          151.429874
3  2023-04-30     10349  Shibis  2023      4          140.947065
4  2023-05-31     10349  Shibis  2023      5          139.446784
5  2023-06-30     10349  Shibis  2023      6           87.387613
6  2023-07-31     10349  Shibis  2023      7           79.387856
7  2023-08-31     10349  Shibis  2023      8           70.044841
8  2023-09-30     10349  Shibis  2023      9           61.711399
9  2023-10-31     10349  Shibis  2023     10           63.362899
10 2023-11-30     10349  Shibis  2023     11           67.248467
11 2023-12-31     10349  Shibis  2023     12           79.290036
12 2024-01-31     10349  Shibis  2024      1           91.726891
13 2024-02-29     10349  Shibis  2024      2          128.784529
14 2024-03-31     10349  

In [23]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Load the dataset
df = pd.read_csv('modified_electricity_consumption_data.csv')

# Convert 'datetime' to datetime data type
df['datetime'] = pd.to_datetime(df['datetime'])

# Aggregate daily data to monthly data
df['year_month'] = df['datetime'].dt.to_period('M')
monthly_df = df.groupby(['year_month', 'guri_num'])['total_KW'].sum().reset_index()

# Extract month and year from the year_month column
monthly_df['year'] = monthly_df['year_month'].dt.year
monthly_df['month'] = monthly_df['year_month'].dt.month

# Select the features and target variable
features = ['guri_num', 'month', 'year']
target = 'total_KW'

# Normalize the target variable
scaler = MinMaxScaler()
monthly_df[target] = scaler.fit_transform(monthly_df[[target]])

# Pivot data to have time series for each house number (guri_num)
pivot_df = monthly_df.pivot(index='year_month', columns='guri_num', values='total_KW').fillna(0)

# Prepare the dataset for LSTM
def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data) - seq_length):
        x = data[i:i+seq_length]
        y = data[i+seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

SEQ_LENGTH = 12  # Using 12 months (1 year) as the sequence length
data = pivot_df.values

X, y = create_sequences(data, SEQ_LENGTH)

# Split into train and test sets
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Build the LSTM model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(SEQ_LENGTH, X_train.shape[2])),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(25),
    Dense(X_train.shape[2])
])

model.compile(optimizer='adam', loss='mean_squared_error')

# Early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

# Make predictions on the test set
y_pred = model.predict(X_test)

# Inverse transform the predictions
y_test_inverse = scaler.inverse_transform(y_test.reshape(-1, 1)).reshape(y_test.shape)
y_pred_inverse = scaler.inverse_transform(y_pred.reshape(-1, 1)).reshape(y_pred.shape)

# Evaluate the model
mse = mean_squared_error(y_test_inverse, y_pred_inverse)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_inverse, y_pred_inverse)
mae = mean_absolute_error(y_test_inverse, y_pred_inverse)

print(f"Root Mean Squared Error: {rmse}")
print(f"R² Score: {r2}")
print(f"Mean Absolute Error: {mae}")


Epoch 1/50


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 991ms/step - loss: 0.1730 - val_loss: 0.1728
Epoch 2/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step - loss: 0.1647 - val_loss: 0.1655
Epoch 3/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step - loss: 0.1606 - val_loss: 0.1554
Epoch 4/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step - loss: 0.1430 - val_loss: 0.1426
Epoch 5/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step - loss: 0.1239 - val_loss: 0.1281
Epoch 6/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step - loss: 0.1230 - val_loss: 0.1131
Epoch 7/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step - loss: 0.1073 - val_loss: 0.0990
Epoch 8/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step - loss: 0.0970 - val_loss: 0.0863
Epoch 9/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m

In [34]:

# Prepare for future predictions
# Extract unique house numbers (guri_num) from your dataset
valid_guri_nums = monthly_df['guri_num'].unique()

# Generate future months for prediction
future_dates = pd.date_range(start='2025-01-01', end='2027-12-31', freq='M').to_period('M')

# Create a DataFrame for future predictions
future_df = pd.DataFrame(index=future_dates, columns=valid_guri_nums).fillna(0)

# Combine historical and future data
combined_df = pd.concat([pivot_df, future_df])

# Prepare data for future prediction
X_future, _ = create_sequences(combined_df.values, SEQ_LENGTH)

# Make future predictions
future_predictions = model.predict(X_future[-len(future_dates):])

# Inverse transform the future predictions
future_predictions_inverse = scaler.inverse_transform(future_predictions.reshape(-1, 1)).reshape(future_predictions.shape)

# Create a DataFrame for future predictions
future_predictions_df = pd.DataFrame(future_predictions_inverse, index=future_dates, columns=valid_guri_nums)

# Save the future predictions to CSV
# future_predictions_df.to_csv('future_predictions.csv')

print(future_predictions_df.head(12))


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
           BOO13096   BOO14883   BOO16425   BOO21552    BOO24958   BOO25971  \
2025-01  120.686211  85.631027  59.418957  79.329590  132.092148  66.229462   
2025-02   94.743477  65.351471  44.633678  58.082558  105.970062  56.541008   
2025-03   95.012154  62.812672  44.372078  56.743256  105.431168  56.377117   
2025-04   97.244232  63.049942  44.793835  58.556602  107.129845  56.791367   
2025-05  100.033386  64.974960  45.489082  61.704323  108.850449  57.601845   
2025-06   95.451950  63.046989  42.721516  60.903755  102.713905  54.663330   
2025-07   90.616829  60.122913  39.111603  60.209080   95.885681  51.958797   
2025-08   80.693085  53.904797  33.408154  56.951576   84.052330  46.708103   
2025-09   68.687141  47.427540  28.423187  51.504040   70.833412  40.985981   
2025-10   56.123119  40.924061  24.981037  44.628078   57.771664  35.538578   
2025-11   44.352085  34.960121  22.867279  37.519806   45.7

In [35]:
print(future_predictions_df.tail(24))


         BOO13096   BOO14883   BOO16425   BOO21552   BOO24958   BOO25971  \
2026-01  27.38097  26.573942  19.100641  27.311792  30.702206  24.089138   
2026-02  27.38097  26.573942  19.100641  27.311792  30.702206  24.089138   
2026-03  27.38097  26.573942  19.100641  27.311792  30.702206  24.089138   
2026-04  27.38097  26.573942  19.100641  27.311792  30.702206  24.089138   
2026-05  27.38097  26.573942  19.100641  27.311792  30.702206  24.089138   
2026-06  27.38097  26.573942  19.100641  27.311792  30.702206  24.089138   
2026-07  27.38097  26.573942  19.100641  27.311792  30.702206  24.089138   
2026-08  27.38097  26.573942  19.100641  27.311792  30.702206  24.089138   
2026-09  27.38097  26.573942  19.100641  27.311792  30.702206  24.089138   
2026-10  27.38097  26.573942  19.100641  27.311792  30.702206  24.089138   
2026-11  27.38097  26.573942  19.100641  27.311792  30.702206  24.089138   
2026-12  27.38097  26.573942  19.100641  27.311792  30.702206  24.089138   
2027-01  27.

In [1]:
#  pip install pytorch-lightning

In [4]:
import pandas as pd
import numpy as np
import pytorch_lightning as pl
from sklearn.preprocessing import MinMaxScaler
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.data import NaNLabelEncoder
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_lightning.callbacks import EarlyStopping

# Load the dataset
df = pd.read_csv('modified_electricity_consumption_data2.csv')

# Convert 'datetime' to datetime data type
df['datetime'] = pd.to_datetime(df['datetime'])

# Aggregate daily data to monthly data
df['year_month'] = df['datetime'].dt.to_period('M')
monthly_df = df.groupby(['year_month', 'guri_num'])['total_KW'].sum().reset_index()

# Convert 'year_month' to datetime for proper ordering
monthly_df['year_month'] = monthly_df['year_month'].dt.to_timestamp()

# Normalize the target variable
scaler = MinMaxScaler()
monthly_df['total_KW'] = scaler.fit_transform(monthly_df[['total_KW']])

# Create index column for TimeSeriesDataSet
monthly_df['time_idx'] = (monthly_df['year_month'] - monthly_df['year_month'].min()).dt.days // 30

# Prepare dataset
max_encoder_length = 12  # 12 months of history
max_prediction_length = 12  # 12 months to predict

training_cutoff = monthly_df['time_idx'].max() - max_prediction_length
training = TimeSeriesDataSet(
    monthly_df[lambda x: x.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="total_KW",
    group_ids=["guri_num"],
    min_encoder_length=max_encoder_length,
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=["guri_num"],
    time_varying_known_reals=["time_idx"],
    time_varying_unknown_reals=["total_KW"],
    target_normalizer=NaNLabelEncoder(add_nan=True),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)

validation = TimeSeriesDataSet.from_dataset(training, monthly_df, predict=True, stop_randomization=True)

batch_size = 64  # set this according to your system's capabilities
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)

# Define TFT model
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=8,
    output_size=7,  # 7 quantiles by default
    loss=QuantileLoss(),
    log_interval=10,
    reduce_on_plateau_patience=4,
)

# Training the model
trainer = pl.Trainer(
    max_epochs=30,
    gpus=0,  # set to >0 to use GPU
    gradient_clip_val=0.1,
    limit_train_batches=30,  # for faster training, remove/comment for full training
)

trainer.fit(tft, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)

# Make predictions
best_model_path = trainer.checkpoint_callback.best_model_path
best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)

# Select the last max_prediction_length months
predictions, index = best_tft.predict(val_dataloader, return_index=True)
actuals = validation.df[validation.df.time_idx > training_cutoff]

# Inverse transform predictions
predicted_values = scaler.inverse_transform(predictions.numpy().reshape(-1, 1)).reshape(predictions.shape)

# Create a DataFrame for actual and predicted values
predicted_df = pd.DataFrame(predicted_values.flatten(), columns=['Predicted'], index=index)
actual_df = actuals[['time_idx', 'guri_num', 'total_KW']].set_index(index)

# Concatenate the actual and predicted DataFrames
comparison_df = pd.concat([actual_df, predicted_df], axis=1)

print(comparison_df.head(12))

# Save the future predictions to CSV
# comparison_df.to_csv('future_predictions_tft.csv')


SyntaxError: invalid syntax (3679791091.py, line 3)