In [None]:
pip install meteostat

In [None]:
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from meteostat import Point, Daily
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

from sklearn.impute import SimpleImputer
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from datetime import datetime

In [None]:
scaler = joblib.load('short_term_scaler.pkl')
print("Scaler loaded \n\n")

df = pd.read_csv('short_term_consumption.csv')
df = df.dropna()
df.head()

In [None]:
feature_columns = ['temp','dwpt', 'rhum', 'prcp', 'wdir', 'wspd', 'pres', 'Month', 'Day', 'Hour', 'Day_of_week', 'previous1hr', 'previous2hr', 'previous3hr']
scaler_columns = ['temp','dwpt', 'rhum', 'prcp', 'wdir', 'wspd', 'pres', 'previous1hr', 'previous2hr', 'previous3hr']
X = df[feature_columns]
y = df['Global_active_power']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

sgd_model = SGDRegressor(random_state=42, warm_start=True)

param_grid = {
    'alpha': [0.001],
    'eta0': [0.001],
    'penalty': ['l2'],
    'max_iter': [5000],
    'tol': [1e-5,],
    'power_t': [0.25]
}



grid_search = GridSearchCV(sgd_model, param_grid, cv=5, scoring='r2', n_jobs=-1)
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_

print("Best Parameters from GridSearchCV:", grid_search.best_params_)


In [None]:
batch_size = 2000
n_batches = len(X_train) // batch_size

for i in range(n_batches):
    start_idx = i * batch_size
    end_idx = start_idx + batch_size
    X_batch = X_train.iloc[start_idx:end_idx]
    y_batch = y_train.iloc[start_idx:end_idx]

    X_batch = X_batch[feature_columns]

    best_model.partial_fit(X_batch[feature_columns], y_batch) 
    print(f"Batch {i+1}/{n_batches} fitted")


predictions = best_model.predict(X_test)

r2 = r2_score(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)

print("R² Score:", r2)
print("MSE:", mse)
print("MAE:", mae)

print()

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(20, 6))
plt.plot(range(200), y_test[:200], label='Real Values', marker='o')
plt.plot(range(200), predictions[:200], label='Predictions', marker='x')
plt.xlabel('Example')
plt.ylabel('Value')
plt.title('Prediction vs Real Value (First 200 Examples)')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
import joblib

joblib.dump(best_model, 'short_term_consumption_model.pkl')
print("Model saved")