In [13]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pandas as pd

In [14]:

data = pd.read_csv('Electricity BILL.csv')

    
X = data.drop('Electricity_Bill', axis=1)
y = data['Electricity_Bill']

In [15]:

print(X.dtypes)
print(X.head())


Building_Type                   object
Construction_Year                int64
Number_of_Floors                 int64
Energy_Consumption_Per_SqM     float64
Water_Usage_Per_Building       float64
Waste_Recycled_Percentage      float64
Occupancy_Rate                 float64
Indoor_Air_Quality             float64
Smart_Devices_Count              int64
Green_Certified                  int64
Maintenance_Resolution_Time    float64
Building_Status                 object
Maintenance_Priority            object
Energy_Per_SqM                 float64
Number_of_Residents              int64
dtype: object
   Building_Type  Construction_Year  Number_of_Floors  \
0    Residential               1989                12   
1  Institutional               1980                 6   
2     Industrial               2006                10   
3     Commercial               1985                 1   
4     Industrial               2006                12   

   Energy_Consumption_Per_SqM  Water_Usage_Per_Building  \

In [16]:
#for numerical columns
numerical_columns = X.select_dtypes(include=['int64', 'float64']).columns

X[numerical_columns] = X[numerical_columns].fillna(X[numerical_columns].mean())

scaler = StandardScaler()
X[numerical_columns] = scaler.fit_transform(X[numerical_columns])


In [17]:
#for categorical columns
categorical_columns = X.select_dtypes(include=['object']).columns

le = LabelEncoder()

for col in categorical_columns:
    X[col] = X[col].fillna(X[col].mode()[0])
    # Convert to string before encoding
    X[col] = X[col].astype(str)
    X[col] = le.fit_transform(X[col])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [18]:

print(X.dtypes)

Building_Type                    int32
Construction_Year              float64
Number_of_Floors               float64
Energy_Consumption_Per_SqM     float64
Water_Usage_Per_Building       float64
Waste_Recycled_Percentage      float64
Occupancy_Rate                 float64
Indoor_Air_Quality             float64
Smart_Devices_Count            float64
Green_Certified                float64
Maintenance_Resolution_Time    float64
Building_Status                  int32
Maintenance_Priority             int32
Energy_Per_SqM                 float64
Number_of_Residents            float64
dtype: object


In [19]:
model = LinearRegression()

model.fit(X_train, y_train)

y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

mse_train = mean_squared_error(y_train, y_train_pred)
mse_test = mean_squared_error(y_test, y_test_pred)

rmse_train = np.sqrt(mse_train)
rmse_test = np.sqrt(mse_test)

mae_train = mean_absolute_error(y_train, y_train_pred)
mae_test = mean_absolute_error(y_test, y_test_pred)

r2_train = r2_score(y_train, y_train_pred)
r2_test = r2_score(y_test, y_test_pred)

n_train = X_train.shape[0]
p_train = X_train.shape[1]

n_test = X_test.shape[0]
p_test = X_test.shape[1]

adjusted_r2_train = 1 - (1 - r2_train) * (n_train - 1) / (n_train - p_train - 1)
adjusted_r2_test = 1 - (1 - r2_test) * (n_test - 1) / (n_test - p_test - 1)

print(f"Train MSE: {mse_train}")
print(f"Test MSE: {mse_test}")
print(f"Train RMSE: {rmse_train}")
print(f"Test RMSE: {rmse_test}")
print(f"Train MAE: {mae_train}")
print(f"Test MAE: {mae_test}")
print(f"Train R2: {r2_train}")
print(f"Test R2: {r2_test}")
print(f"Train Adjusted R2: {adjusted_r2_train}")
print(f"Test Adjusted R2: {adjusted_r2_test}")

Train MSE: 24475013.16847547
Test MSE: 24278016.155742623
Train RMSE: 4947.222773281538
Test RMSE: 4927.272689403604
Train MAE: 4006.32846932936
Test MAE: 3842.409312558516
Train R2: 0.013922520844610209
Test R2: 3.7344733075372893e-05
Train Adjusted R2: -0.0011091480449536562
Test Adjusted R2: -0.0640628254763429
