In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error


url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00235/household_power_consumption.txt"
energy_data = pd.read_csv(url, sep=';', parse_dates={'Datetime': ['Date', 'Time']}, infer_datetime_format=True)

energy_data.dropna(inplace=True)


energy_data['Month'] = energy_data['Datetime'].dt.month
energy_data['Year'] = energy_data['Datetime'].dt.year


X = energy_data[['Month', 'Year']]
y = energy_data['Global_active_power']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


models = {
    'Linear Regression': LinearRegression(),
    'Ridge Regression': Ridge(),
    'Lasso Regression': Lasso(),
    'Decision Tree': DecisionTreeRegressor(),
    'Random Forest': RandomForestRegressor()
}


for name, model in models.items():
    scores = cross_val_score(model, X_train, y_train, cv=5, scoring='neg_mean_squared_error')
    rmse_scores = np.sqrt(-scores)
    print(f"{name}: Mean RMSE: {np.mean(rmse_scores)}, Standard Deviation RMSE: {np.std(rmse_scores)}")


best_model = RandomForestRegressor()  
best_model.fit(X_train, y_train)
y_pred = best_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print('Best Model Mean Squared Error:', mse)
