In [13]:
import pandas as pd
# Load your dataset
dataset = pd.read_csv('electricity_consumption_data.csv')

# Ensure the datetime column is in datetime format
dataset['datetime'] = pd.to_datetime(dataset['datetime'])

# Extract year and month from the datetime column
dataset['year'] = dataset['datetime'].dt.year
dataset['month'] = dataset['datetime'].dt.month

# Filter data to include only years up to 2022
historical_data = dataset[dataset['year'] <= 2028]

# Aggregate the data to get monthly consumption
monthly_data = historical_data.groupby(['guri_num', 'deg_num', 'year', 'month'])['total_KW'].sum().reset_index()

# Save the aggregated dataset
monthly_data.to_csv('monthly_consumption.csv', index=False)

In [14]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import joblib
import xgboost as xgb
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

# Load the aggregated monthly data
monthly_data = pd.read_csv('monthly_consumption.csv')

# Define features and target
features = monthly_data[['guri_num', 'deg_num', 'year', 'month']]
target = monthly_data['total_KW']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Train a K-Nearest Neighbors Regressor
visualization_model = KNeighborsRegressor(n_neighbors=5)  # Example with k=5
visualization_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = visualization_model.predict(X_test)

# Evaluate the best model
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print(f"Root Mean Squared Error: {rmse}")
print(f"R² Score: {r2}")
print(f"Mean Absolute Error: {mae}")

Root Mean Squared Error: 28.361275452766613
R² Score: 0.5535383802475784
Mean Absolute Error: 22.396844073189786


In [16]:
# Create a future dataframe prediction
def create_future_dataframe(guri_nums, deg_nums, start_year, end_year):
    future_periods = pd.date_range(start=f'{start_year}-01-01', end=f'{end_year}-12-01', freq='MS')
    future_data = []
    for guri_num in guri_nums:
        for deg_num in deg_nums:
            for date in future_periods:
                future_data.append({
                    'guri_num': guri_num,
                    'deg_num': deg_num,
                    'year': date.year,
                    'month': date.month
                })
    return pd.DataFrame(future_data)

# Define the range of guri_nums and deg_nums, and the future period
guri_nums = monthly_data['guri_num'].unique()
deg_nums = monthly_data['deg_num'].unique()
start_year = 2023
end_year = 2025

# Create the future dataframe
future_df = create_future_dataframe(guri_nums, deg_nums, start_year, end_year)

# Predict future consumption
future_predictions = visualization_model.predict(future_df)

# Add predictions to the future dataframe
future_df['predicted_total_KW'] = future_predictions

# Display the future dataframe with predictions
print(future_df.head())

   guri_num  deg_num  year  month  predicted_total_KW
0      1034        7  2023      1           68.105450
1      1034        7  2023      2           73.281992
2      1034        7  2023      3           64.417250
3      1034        7  2023      4           57.868865
4      1034        7  2023      5           52.853080


In [None]:
# Save the trained model
joblib.dump(visualization_model, 'visualization_model.pkl')