In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import Ridge

In [4]:
data = pd.read_csv('Electricity BILL.csv')
    
X = data.drop('Electricity_Bill', axis=1)
y = data['Electricity_Bill']

In [5]:
categorical_features = X.select_dtypes(include=['object']).columns
numerical_features = X.select_dtypes(exclude=['object']).columns

one_hot_encoder = OneHotEncoder(drop='first', sparse_output=False)  
X_categorical_encoded = one_hot_encoder.fit_transform(X[categorical_features])

X_categorical_encoded_df = pd.DataFrame(X_categorical_encoded, columns=one_hot_encoder.get_feature_names_out(categorical_features))
X_numerical = X[numerical_features].reset_index(drop=True)
X_encoded = pd.concat([X_numerical, X_categorical_encoded_df], axis=1)

X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

ridge_model = Ridge(alpha=1.0) 

ridge_model.fit(X_train, y_train)

y_train_pred = ridge_model.predict(X_train)
y_test_pred = ridge_model.predict(X_test)

train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)
train_rmse = np.sqrt(train_mse)
test_rmse = np.sqrt(test_mse)
train_mae = mean_absolute_error(y_train, y_train_pred)
test_mae = mean_absolute_error(y_test, y_test_pred)
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

def adjusted_r2(r2, X, y):
    n = X.shape[0]
    p = X.shape[1]
    return 1 - (1 - r2) * (n - 1) / (n - p - 1)

train_adjusted_r2 = adjusted_r2(train_r2, X_train, y_train)
test_adjusted_r2 = adjusted_r2(test_r2, X_test, y_test)

print("Train MSE:", train_mse)
print("Test MSE:", test_mse)
print("Train RMSE:", train_rmse)
print("Test RMSE:", test_rmse)
print("Train MAE:", train_mae)
print("Test MAE:", test_mae)
print("Train R2:", train_r2)
print("Test R2:", test_r2)
print("Train Adjusted R2:", train_adjusted_r2)
print("Test Adjusted R2:", test_adjusted_r2)


Train MSE: 24188925.913294327
Test MSE: 24129688.93419977
Train RMSE: 4918.223857582565
Test RMSE: 4912.197973840201
Train MAE: 3976.6837323582586
Test MAE: 3797.4628337330823
Train R2: 0.025448733209262442
Test R2: 0.006146644659019929
Train Adjusted R2: 0.00655437191433994
Test Adjusted R2: -0.07595428469523502
