In [1]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from ucimlrepo import fetch_ucirepo 

# Load dataset

In [None]:
from ucimlrepo import fetch_ucirepo 
individual_household_electric_power_consumption = fetch_ucirepo(id=235) 

X_original = individual_household_electric_power_consumption.data.features

print(individual_household_electric_power_consumption.metadata)
print(individual_household_electric_power_consumption.variables)


# Analyze dataset

In [None]:
# - sporządzić analizę statystyczną zbioru
# policzyć korelację Pearsona i rangową Spearmana zmiennej objaśnianej ze zmiennymi objaśniającymi (cechami)
# wyznaczyć macierz kowariancji między cechami


# Focus on 'Date' and 'Global_intensity' features

# Preprocessing

In [None]:
X = X_original[['Date', 'Time', 'Global_intensity']].copy()

# Combine 'Date' and 'Time' into a single 'Datetime' column 
# could be deleted if done in analysis
X['Datetime'] = pd.to_datetime(X['Date'] + ' ' + X['Time'], format='%d/%m/%Y %H:%M:%S')
# Convert 'Datetime' to Unix timestamp (seconds since 1970-01-01)
X['Datetime'] = X['Datetime'].astype(np.int64) // 10**9  # Convert to seconds

# Drop the original 'Date' and 'Time' columns as they are now redundant
X.drop(columns=['Date', 'Time'], inplace=True)

# Convert 'Global_intensity' to numeric, replacing any non-numeric values with meanof the coulmn 
# there is '?' sign in data 
X['Global_intensity'] = pd.to_numeric(X['Global_intensity'], errors='coerce')
X['Global_intensity'].fillna(X['Global_intensity'].mean(), inplace=True)

# Now 'X' has a single feature 'Datetime' in Unix timestamp format, and 'y' is the target variable
y = X['Global_intensity'].copy()  # The dependent variable (Global_intensity)
X.drop(columns=['Global_intensity'], inplace=True)  # Remove the target variable from features


# Standardize data

In [4]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X[['Datetime']])  # Standardize the single feature 'Datetime'

# Split the data into training and test sets

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:
print(X_train)

In [None]:
print(X_test)

In [None]:
print(y_train)

In [None]:
print(y_test)

# Traditional models

### Linear Regression

In [6]:
linear = LinearRegression()

linear.fit(X_train, y_train)
y_pred_linear = linear.predict(X_test)


### Ridge Regression

In [7]:
ridge = Ridge()
ridge_params = {'alpha': [0.1, 1, 10, 100]}
ridge_cv = GridSearchCV(ridge, ridge_params, cv=5, scoring='r2')

ridge_cv.fit(X_train, y_train)
y_pred_ridge = ridge_cv.predict(X_test)


In [None]:
print("Best Parameters for Ridge Regression:", ridge_cv.best_params_)

print("\nResults for each alpha:")
for mean_score, params in zip(ridge_cv.cv_results_['mean_test_score'], ridge_cv.cv_results_['params']):
    print(f"Alpha: {params['alpha']} | Mean R²: {mean_score}")

### Lasso Regression

In [9]:
lasso = Lasso()
lasso_params = {'alpha': [0.01, 0.1, 1, 10]}
lasso_cv = GridSearchCV(lasso, lasso_params, cv=5, scoring='r2')

lasso_cv.fit(X_train, y_train)
y_pred_lasso = lasso_cv.predict(X_test)


In [None]:
print("Best Parameters for Lasso Regression:", lasso_cv.best_params_)

print("\nResults for each alpha:")
for mean_score, params in zip(lasso_cv.cv_results_['mean_test_score'], lasso_cv.cv_results_['params']):
    print(f"Alpha: {params['alpha']} | Mean R²: {mean_score}")

# Advanced models

### Regression Tree

In [None]:
# Tomek

### Random Forest

In [None]:
# Tomek

### KNN

In [None]:
# Tomek

### Multilayer Perceptron (MLP)

In [None]:
mlp = MLPRegressor(random_state=42, max_iter=500)
mlp_params = {'hidden_layer_sizes': [(50,), (100,), (50, 50)], 
              'alpha': [0.0001, 0.001, 0.01]}
mlp_cv = GridSearchCV(mlp, mlp_params, cv=5, scoring='r2')
mlp_cv.fit(X_train, y_train)
y_pred_mlp = mlp_cv.predict(X_test)


In [None]:

print("Best Parameters for MLPRegressor:", mlp_cv.best_params_)

print("\nResults for each parameter combination:")
for params, mean_r2 in zip(mlp_cv.cv_results_['params'], mlp_cv.cv_results_['mean_test_score']):
    print(f"Hidden Layer Sizes: {params['hidden_layer_sizes']}, Alpha: {params['alpha']} | Mean R²: {mean_r2}")


### Support Vector Regression (SVR)

In [None]:
svr = SVR()
svr_params = {'C': [0.1, 1, 10], 
              'gamma': ['scale', 'auto'], 
              'kernel': ['rbf', 'linear']}
svr_cv = GridSearchCV(svr, svr_params, cv=5, scoring='r2')
svr_cv.fit(X_train, y_train)
y_pred_svr = svr_cv.predict(X_test)

In [None]:
print("Best Parameters for SVR:", svr_cv.best_params_)

print("\nResults for each parameter combination:")
for params, mean_r2 in zip(svr_cv.cv_results_['params'], svr_cv.cv_results_['mean_test_score']):
    print(f"C: {params['C']}, Gamma: {params['gamma']}, Kernel: {params['kernel']} | Mean R²: {mean_r2}")

# Results

In [11]:
def evaluate_model(name, y_test, y_pred):
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    print(f"{name}:\n\tMSE: {mse:.4f}\n\tRMSE: {rmse:.4f}\n\tR2: {r2:.4f}\n")

In [None]:
print("Model Performance Regression:")
evaluate_model("Linear Regression", y_test, y_pred_linear)
evaluate_model("Ridge Regression", y_test, y_pred_ridge)
evaluate_model("Lasso Regression", y_test, y_pred_lasso)


In [None]:
print("Model Performance Advanced:")
evaluate_model("MLP Regressor", y_test, y_pred_mlp)
evaluate_model("Support Vector Regression", y_test, y_pred_svr)

# White noise

### Regression

In [13]:
noise_scale = 5.0  
noise = np.random.normal(0, noise_scale, size=y_train.shape)
y_train_noisy = y_train + noise

linear.fit(X_train, y_train_noisy)
y_pred_linear_noisy = linear.predict(X_test)

ridge_cv.fit(X_train, y_train_noisy)
y_pred_ridge_noisy = ridge_cv.predict(X_test)

lasso_cv.fit(X_train, y_train_noisy)
y_pred_lasso_noisy = lasso_cv.predict(X_test)


### Advanced

In [None]:
mlp_cv.fit(X_train, y_train_noisy)
y_pred_mlp_noisy = mlp_cv.predict(X_test)

svr_cv.fit(X_train, y_train_noisy)
y_pred_svr_noisy = svr_cv.predict(X_test)

# Stability analysis

In [None]:
noise_scales = [0.01, 0.1, 0.5, 1.0]
results = []

for level in noise_scales:
    noise = np.random.normal(0, level, size=y_train.shape)
    y_train_noisy = y_train + noise

    linear.fit(X_train, y_train_noisy)
    y_pred_linear_noisy = linear.predict(X_test)

    ridge_cv.fit(X_train, y_train_noisy)
    y_pred_ridge_noisy = ridge_cv.predict(X_test)

    lasso_cv.fit(X_train, y_train_noisy)
    y_pred_lasso_noisy = lasso_cv.predict(X_test)
    
    print(f"Noise Level: {level}")
    evaluate_model("Linear Regression", y_test, y_pred_linear_noisy)
    evaluate_model("Ridge Regression", y_test, y_pred_ridge_noisy)
    evaluate_model("Lasso Regression", y_test, y_pred_lasso_noisy)


In [None]:
print("Advanced Model Performance with Noise:")
evaluate_model("MLP Regressor (Noisy)", y_test, y_pred_mlp_noisy)
evaluate_model("Support Vector Regression (Noisy)", y_test, y_pred_svr_noisy)

# Plots

### Regresions

In [None]:
# Step 6: Plot results regression
plt.figure(figsize=(12, 8))
plt.plot(y_test.values[:100], label="True Values", marker='o')
plt.plot(y_pred_linear[:100], label="Linear", marker='*')
plt.plot(y_pred_ridge[:100], label="Ridge", marker='x')
plt.plot(y_pred_lasso[:100], label="Lasso", marker='v')
plt.title("Model Predictions vs True Values (First 100) for regression")
plt.xlabel("Samples")
plt.ylabel("Global Active Power")
plt.legend()
plt.show()


### Advanced

In [None]:
# Step 6: Plot results
plt.figure(figsize=(12, 8))
plt.plot(y_test.values[:100], label="True Values", marker='o')
plt.plot(y_pred_mlp[:100], label="MLP", marker='s')
plt.plot(y_pred_svr[:100], label="SVR", marker='d')
plt.title("Model Predictions vs True Values (First 100) for advanced models")
plt.xlabel("Samples")
plt.ylabel("Global Active Power")
plt.legend()
plt.show()