In [3]:
import numpy as np
import pandas as pd
from sklearn.feature_selection import RFE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [4]:
data = pd.read_csv('Electricity BILL.csv')
    
X = data.drop('Electricity_Bill', axis=1)
y = data['Electricity_Bill']

In [5]:
# Encode categorical variables using LabelEncoder
label_encoders = {}
for column in X.select_dtypes(include=['object']).columns:
    label_encoders[column] = LabelEncoder()
    X[column] = label_encoders[column].fit_transform(X[column])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = LinearRegression()

rfe = RFE(model, n_features_to_select=3)
rfe = rfe.fit(X_train, y_train)

selected_features = np.array(X.columns)[rfe.support_]
print(f"Selected Features: {selected_features}")

X_train_selected = X_train[:, rfe.support_]
X_test_selected = X_test[:, rfe.support_]

model.fit(X_train_selected, y_train)

y_train_pred = model.predict(X_train_selected)
y_test_pred = model.predict(X_test_selected)

train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)
train_rmse = np.sqrt(train_mse)
test_rmse = np.sqrt(test_mse)
train_mae = mean_absolute_error(y_train, y_train_pred)
test_mae = mean_absolute_error(y_test, y_test_pred)
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

def adjusted_r2(r2, X, y):
    n = X.shape[0]
    p = X.shape[1]
    return 1 - (1 - r2) * (n - 1) / (n - p - 1)

train_adjusted_r2 = adjusted_r2(train_r2, X_train_selected, y_train)
test_adjusted_r2 = adjusted_r2(test_r2, X_test_selected, y_test)

print("Train MSE:", train_mse)
print("Test MSE:", test_mse)
print("Train RMSE:", train_rmse)
print("Test RMSE:", test_rmse)
print("Train MAE:", train_mae)
print("Test MAE:", test_mae)
print("Train R2:", train_r2)
print("Test R2:", test_r2)
print("Train Adjusted R2:", train_adjusted_r2)
print("Test Adjusted R2:", test_adjusted_r2)


Selected Features: ['Building_Type' 'Green_Certified' 'Number_of_Residents']
Train MSE: 24569032.90689799
Test MSE: 23941409.06299838
Train RMSE: 4956.715939702212
Test RMSE: 4892.995918964002
Train MAE: 4006.473377514736
Test MAE: 3813.948128176773
Train R2: 0.010134545491283897
Test R2: 0.013901513867940918
Train Adjusted R2: 0.007153023037944406
Test Adjusted R2: 0.0018759225736475482
