In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder , OneHotEncoder
from sklearn.linear_model import LinearRegression ,Ridge , ElasticNet
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.feature_selection import RFE
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.decomposition import FastICA
from sklearn.ensemble import GradientBoostingRegressor

In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.impute import SimpleImputer

# Load data
data = pd.read_csv('Electricity BILL.csv')

# Handle missing values: Drop rows with missing target values or fill missing values
data.dropna(subset=['Electricity_Bill'], inplace=True)  # Drop rows where target is missing
data.fillna(data.median(numeric_only=True), inplace=True)  # Fill missing values for numerical features

# Define features and target
X = data.drop('Electricity_Bill', axis=1)
y = data['Electricity_Bill']

# Define categorical and numerical columns
categorical_features = ['Building_Type', 'Green_Certified', 'Building_Status', 'Maintenance_Priority']
numerical_features = ['Construction_Year', 'Number_of_Floors', 'Energy_Consumption_Per_SqM', 
                      'Water_Usage_Per_Building', 'Waste_Recycled_Percentage', 'Occupancy_Rate', 
                      'Indoor_Air_Quality', 'Smart_Devices_Count', 'Maintenance_Resolution_Time', 
                      'Energy_Per_SqM', 'Number_of_Residents']

# Create preprocessing pipelines
preprocessor = ColumnTransformer(
    transformers=[
        ('num', Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler())
        ]), numerical_features),
        ('cat', Pipeline(steps=[
            ('encoder', OneHotEncoder(handle_unknown='ignore'))
        ]), categorical_features)
    ])

# Preprocess data
X_preprocessed = preprocessor.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)

# Train Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict and evaluate
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

mse_train = mean_squared_error(y_train, y_train_pred)
mse_test = mean_squared_error(y_test, y_test_pred)
rmse_train = np.sqrt(mse_train)
rmse_test = np.sqrt(mse_test)
r2_train = r2_score(y_train, y_train_pred)
r2_test = r2_score(y_test, y_test_pred)
mae_train = mean_absolute_error(y_train, y_train_pred)
mae_test = mean_absolute_error(y_test, y_test_pred)

print(f'Train MSE: {mse_train}, Test MSE: {mse_test}')
print(f'Train RMSE: {rmse_train}, Test RMSE: {rmse_test}')
print(f'Train R2: {r2_train}, Test R2: {r2_test}')
print(f'Train MAE: {mae_train}, Test MAE: {mae_test}')


Train MSE: 24188925.25027927, Test MSE: 24130184.64205347
Train RMSE: 4918.223790178652, Test RMSE: 4912.248430408775
Train R2: 0.02544875992157558, Test R2: 0.006126227449539279
Train MAE: 3976.698247757928, Test MAE: 3797.4850467106708


In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.feature_selection import RFE
from sklearn.impute import SimpleImputer

# Load data
data = pd.read_csv('Electricity BILL.csv')

# Handle missing values
data.dropna(subset=['Electricity_Bill'], inplace=True)  # Drop rows where target is missing
data.fillna(data.median(numeric_only=True), inplace=True)  # Fill missing values for numerical features

# Define features and target
X = data.drop('Electricity_Bill', axis=1)
y = data['Electricity_Bill']

# Define categorical and numerical columns
categorical_features = ['Building_Type', 'Green_Certified', 'Building_Status', 'Maintenance_Priority']

numerical_features = ['Construction_Year', 'Number_of_Floors', 'Energy_Consumption_Per_SqM', 
                      'Water_Usage_Per_Building', 'Waste_Recycled_Percentage', 'Occupancy_Rate', 
                      'Indoor_Air_Quality', 'Smart_Devices_Count', 'Maintenance_Resolution_Time', 
                      'Energy_Per_SqM', 'Number_of_Residents']

# Preprocess the features
preprocessor = ColumnTransformer(
    transformers=[
        ('num', Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler())
        ]), numerical_features),
        ('cat', Pipeline(steps=[
            ('encoder', OneHotEncoder(handle_unknown='ignore'))
        ]), categorical_features)
    ])

# Apply preprocessing
X_preprocessed = preprocessor.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)

# Perform RFE to select the top 3 features
model = LinearRegression()
rfe = RFE(estimator=model, n_features_to_select=3)
rfe.fit(X_train, y_train)

# Get the indices of the selected features
selected_features_indices = np.where(rfe.support_)[0]

# Train model using only the selected features
X_train_selected = X_train[:, selected_features_indices]
X_test_selected = X_test[:, selected_features_indices]

model_selected = LinearRegression()
model_selected.fit(X_train_selected, y_train)

# Predict and evaluate the model with selected features
y_train_pred_selected = model_selected.predict(X_train_selected)
y_test_pred_selected = model_selected.predict(X_test_selected)

mse_train_selected = mean_squared_error(y_train, y_train_pred_selected)
mse_test_selected = mean_squared_error(y_test, y_test_pred_selected)
rmse_train_selected = np.sqrt(mse_train_selected)
rmse_test_selected = np.sqrt(mse_test_selected)
r2_train_selected = r2_score(y_train, y_train_pred_selected)
r2_test_selected = r2_score(y_test, y_test_pred_selected)
mae_train_selected = mean_absolute_error(y_train, y_train_pred_selected)
mae_test_selected = mean_absolute_error(y_test, y_test_pred_selected)

# Calculate Adjusted R²
def adjusted_r2(r2, n, k):
    return 1 - (1 - r2) * (n - 1) / (n - k - 1)

n_train = X_train_selected.shape[0]
k_train = X_train_selected.shape[1]
adj_r2_train_selected = adjusted_r2(r2_train_selected, n_train, k_train)

n_test = X_test_selected.shape[0]
k_test = X_test_selected.shape[1]
adj_r2_test_selected = adjusted_r2(r2_test_selected, n_test, k_test)

# Print the results
print(f'Selected Features: {selected_features_indices}')
print(f'Train MSE (Selected Features): {mse_train_selected}')
print(f'Test MSE (Selected Features): {mse_test_selected}')
print(f'Train RMSE (Selected Features): {rmse_train_selected}')
print(f'Test RMSE (Selected Features): {rmse_test_selected}')
print(f'Train R² (Selected Features): {r2_train_selected}')
print(f'Test R² (Selected Features): {r2_test_selected}')
print(f'Train Adjusted R² (Selected Features): {adj_r2_train_selected}')
print(f'Test Adjusted R² (Selected Features): {adj_r2_test_selected}')
print(f'Train MAE (Selected Features): {mae_train_selected}')
print(f'Test MAE (Selected Features): {mae_test_selected}')


Selected Features: [12 14 16]
Train MSE (Selected Features): 24498058.620372634
Test MSE (Selected Features): 23999875.271465864
Train RMSE (Selected Features): 4949.551355463708
Test RMSE (Selected Features): 4898.966755497108
Train R² (Selected Features): 0.012994039174086702
Test R² (Selected Features): 0.011493408333802968
Train Adjusted R² (Selected Features): 0.01002112965352675
Test Adjusted R² (Selected Features): -0.0005615501011506652
Train MAE (Selected Features): 3980.743794181992
Test MAE (Selected Features): 3779.2164679776


In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.impute import SimpleImputer

# Load data
data = pd.read_csv('Electricity BILL.csv')

# Handle missing values
data.dropna(subset=['Electricity_Bill'], inplace=True)  # Drop rows where target is missing
data.fillna(data.median(numeric_only=True), inplace=True)  # Fill missing values for numerical features

# Define features and target
X = data.drop('Electricity_Bill', axis=1)
y = data['Electricity_Bill']

# Define categorical and numerical columns
categorical_features = ['Building_Type', 'Green_Certified', 'Building_Status', 'Maintenance_Priority']
numerical_features = ['Construction_Year', 'Number_of_Floors', 'Energy_Consumption_Per_SqM', 
                      'Water_Usage_Per_Building', 'Waste_Recycled_Percentage', 'Occupancy_Rate', 
                      'Indoor_Air_Quality', 'Smart_Devices_Count', 'Maintenance_Resolution_Time', 
                      'Energy_Per_SqM', 'Number_of_Residents']

# Create preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler())
        ]), numerical_features),
        ('cat', Pipeline(steps=[
            ('encoder', OneHotEncoder(handle_unknown='ignore'))
        ]), categorical_features)
    ])

# Apply preprocessing
X_preprocessed = preprocessor.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)

# Train Ridge Regression model
ridge_model = Ridge(alpha=1.0)  # Adjust alpha as needed
ridge_model.fit(X_train, y_train)

# Predict and evaluate
y_train_pred = ridge_model.predict(X_train)
y_test_pred = ridge_model.predict(X_test)

mse_train = mean_squared_error(y_train, y_train_pred)
mse_test = mean_squared_error(y_test, y_test_pred)
rmse_train = np.sqrt(mse_train)
rmse_test = np.sqrt(mse_test)
r2_train = r2_score(y_train, y_train_pred)
r2_test = r2_score(y_test, y_test_pred)
mae_train = mean_absolute_error(y_train, y_train_pred)
mae_test = mean_absolute_error(y_test, y_test_pred)

# Calculate Adjusted R²
def adjusted_r2(r2, n, k):
    return 1 - (1 - r2) * (n - 1) / (n - k - 1)

n_train = X_train.shape[0]
k_train = X_train.shape[1]
adj_r2_train = adjusted_r2(r2_train, n_train, k_train)

n_test = X_test.shape[0]
k_test = X_test.shape[1]
adj_r2_test = adjusted_r2(r2_test, n_test, k_test)

# Print the results
print(f'Train MSE (Ridge Regression): {mse_train}')
print(f'Test MSE (Ridge Regression): {mse_test}')
print(f'Train RMSE (Ridge Regression): {rmse_train}')
print(f'Test RMSE (Ridge Regression): {rmse_test}')
print(f'Train R² (Ridge Regression): {r2_train}')
print(f'Test R² (Ridge Regression): {r2_test}')
print(f'Train Adjusted R² (Ridge Regression): {adj_r2_train}')
print(f'Test Adjusted R² (Ridge Regression): {adj_r2_test}')
print(f'Train MAE (Ridge Regression): {mae_train}')
print(f'Test MAE (Ridge Regression): {mae_test}')


Train MSE (Ridge Regression): 24188931.723814905
Test MSE (Ridge Regression): 24129382.065545976
Train RMSE (Ridge Regression): 4918.224448295839
Test RMSE (Ridge Regression): 4912.166738369737
Train R² (Ridge Regression): 0.025448499108326805
Test R² (Ridge Regression): 0.006159283961667095
Train Adjusted R² (Ridge Regression): 0.002482633820920621
Test Adjusted R² (Ridge Regression): -0.0949837977590482
Train MAE (Ridge Regression): 3976.696642583069
Test MAE (Ridge Regression): 3797.5306314992536


In [9]:
from sklearn.decomposition import FastICA

def evaluate_ica(n_components, max_iter=1000, tol=0.0001):
    ica = FastICA(n_components=n_components, max_iter=max_iter, tol=tol, random_state=42)
    X_ica = ica.fit_transform(X_preprocessed)
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X_ica, y, test_size=0.2, random_state=42)

    # Train Ridge Regression model
    ridge_model = Ridge(alpha=1.0)
    ridge_model.fit(X_train, y_train)

    # Predict and evaluate
    y_train_pred = ridge_model.predict(X_train)
    y_test_pred = ridge_model.predict(X_test)

    mse_train = mean_squared_error(y_train, y_train_pred)
    mse_test = mean_squared_error(y_test, y_test_pred)
    rmse_train = np.sqrt(mse_train)
    rmse_test = np.sqrt(mse_test)
    r2_train = r2_score(y_train, y_train_pred)
    r2_test = r2_score(y_test, y_test_pred)
    mae_train = mean_absolute_error(y_train, y_train_pred)
    mae_test = mean_absolute_error(y_test, y_test_pred)

    # Calculate Adjusted R²
    def adjusted_r2(r2, n, k):
        return 1 - (1 - r2) * (n - 1) / (n - k - 1)

    n_train = X_train.shape[0]
    k_train = X_train.shape[1]
    adj_r2_train = adjusted_r2(r2_train, n_train, k_train)

    n_test = X_test.shape[0]
    k_test = X_test.shape[1]
    adj_r2_test = adjusted_r2(r2_test, n_test, k_test)

    return {
        'n_components': n_components,
        'Train MSE': mse_train,
        'Test MSE': mse_test,
        'Train RMSE': rmse_train,
        'Test RMSE': rmse_test,
        'Train R²': r2_train,
        'Test R²': r2_test,
        'Train Adjusted R²': adj_r2_train,
        'Test Adjusted R²': adj_r2_test,
        'Train MAE': mae_train,
        'Test MAE': mae_test
    }

# Evaluate ICA with different numbers of components with adjusted parameters
results = []
for n_components in [4, 6, 8, 10]:
    result = evaluate_ica(n_components)
    results.append(result)

# Print the results
for result in results:
    print(f"\nNumber of Components: {result['n_components']}")
    print(f"Train MSE: {result['Train MSE']}")
    print(f"Test MSE: {result['Test MSE']}")
    print(f"Train RMSE: {result['Train RMSE']}")
    print(f"Test RMSE: {result['Test RMSE']}")
    print(f"Train R²: {result['Train R²']}")
    print(f"Test R²: {result['Test R²']}")
    print(f"Train Adjusted R²: {result['Train Adjusted R²']}")
    print(f"Test Adjusted R²: {result['Test Adjusted R²']}")
    print(f"Train MAE: {result['Train MAE']}")
    print(f"Test MAE: {result['Test MAE']}")



Number of Components: 4
Train MSE: 24706967.72211012
Test MSE: 24639735.47677926
Train RMSE: 4970.610397336541
Test RMSE: 4963.842813464107
Train R²: 0.004577269017686958
Test R²: -0.01486114657712112
Train Adjusted R²: 0.0005755695966525032
Test Adjusted R²: -0.031430308153890474
Train MAE: 4012.4431053152707
Test MAE: 3857.7395585956506

Number of Components: 6
Train MSE: 24701889.716886185
Test MSE: 24693852.49681609
Train RMSE: 4970.099568105873
Test RMSE: 4969.290945076178
Train R²: 0.004781857532344169
Test R²: -0.01709011778728997
Train Adjusted R²: -0.0012315451411764045
Test Adjusted R²: -0.04220345402895154
Train MAE: 4012.1967724982806
Test MAE: 3866.497630628624

Number of Components: 8
Train MSE: 24653861.40560694
Test MSE: 24554779.20925727
Train RMSE: 4965.265491955786
Test RMSE: 4955.277914431972
Train R²: 0.006716877374348895
Test R²: -0.01136196878978124
Train Adjusted R²: -0.0013015534843847654
Test Adjusted R²: -0.04493415032637138
Train MAE: 4017.6966540640697
Tes

In [10]:
from sklearn.linear_model import ElasticNet

# Define different alpha values to test
alpha_values = [0.1, 0.5, 1.0, 1.5, 2.0]

def evaluate_elasticnet(alpha, X_train, X_test, y_train, y_test):
    # Initialize the ElasticNet model
    elastic_net = ElasticNet(alpha=alpha, l1_ratio=0.5, random_state=42)  # l1_ratio=0.5 for equal mix of L1 and L2
    elastic_net.fit(X_train, y_train)

    # Predict and evaluate
    y_train_pred = elastic_net.predict(X_train)
    y_test_pred = elastic_net.predict(X_test)

    mse_train = mean_squared_error(y_train, y_train_pred)
    mse_test = mean_squared_error(y_test, y_test_pred)
    rmse_train = np.sqrt(mse_train)
    rmse_test = np.sqrt(mse_test)
    r2_train = r2_score(y_train, y_train_pred)
    r2_test = r2_score(y_test, y_test_pred)
    mae_train = mean_absolute_error(y_train, y_train_pred)
    mae_test = mean_absolute_error(y_test, y_test_pred)

    # Calculate Adjusted R²
    def adjusted_r2(r2, n, k):
        return 1 - (1 - r2) * (n - 1) / (n - k - 1)

    n_train = X_train.shape[0]
    k_train = X_train.shape[1]
    adj_r2_train = adjusted_r2(r2_train, n_train, k_train)

    n_test = X_test.shape[0]
    k_test = X_test.shape[1]
    adj_r2_test = adjusted_r2(r2_test, n_test, k_test)

    return {
        'alpha': alpha,
        'Train MSE': mse_train,
        'Test MSE': mse_test,
        'Train RMSE': rmse_train,
        'Test RMSE': rmse_test,
        'Train R²': r2_train,
        'Test R²': r2_test,
        'Train Adjusted R²': adj_r2_train,
        'Test Adjusted R²': adj_r2_test,
        'Train MAE': mae_train,
        'Test MAE': mae_test
    }

# Preprocessing data (Assuming data is preprocessed as in part (c))
data = pd.read_csv('Electricity BILL.csv')

# Handle missing values
data.dropna(inplace=True)

# Normalize numerical features
numerical_features = ['Construction_Year', 'Number_of_Floors', 'Energy_Consumption_Per_SqM', 'Water_Usage_Per_Building', 'Waste_Recycled_Percentage', 'Occupancy_Rate', 'Indoor_Air_Quality', 'Smart_Devices_Count', 'Maintenance_Resolution_Time', 'Energy_Per_SqM', 'Number_of_Residents']
scaler = StandardScaler()
data[numerical_features] = scaler.fit_transform(data[numerical_features])

# Encode categorical features
categorical_features = ['Building_Type', 'Green_Certified', 'Building_Status', 'Maintenance_Priority']
data = pd.get_dummies(data, columns=categorical_features, drop_first=True)

# Split data
X = data.drop('Electricity_Bill', axis=1)
y = data['Electricity_Bill']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Evaluate ElasticNet with different alpha values
results = []
for alpha in alpha_values:
    result = evaluate_elasticnet(alpha, X_train, X_test, y_train, y_test)
    results.append(result)

# Print the results
for result in results:
    print(f"\nAlpha: {result['alpha']}")
    print(f"Train MSE: {result['Train MSE']}")
    print(f"Test MSE: {result['Test MSE']}")
    print(f"Train RMSE: {result['Train RMSE']}")
    print(f"Test RMSE: {result['Test RMSE']}")
    print(f"Train R²: {result['Train R²']}")
    print(f"Test R²: {result['Test R²']}")
    print(f"Train Adjusted R²: {result['Train Adjusted R²']}")
    print(f"Test Adjusted R²: {result['Test Adjusted R²']}")
    print(f"Train MAE: {result['Train MAE']}")
    print(f"Test MAE: {result['Test MAE']}")



Alpha: 0.1
Train MSE: 24206208.555634238
Test MSE: 24104870.920835983
Train RMSE: 4919.980544233304
Test RMSE: 4909.67116218958
Train R²: 0.024752430237967804
Test R²: 0.007168848713193787
Train Adjusted R²: 0.0058445691915610665
Test Adjusted R²: -0.0748476376974554
Train MAE: 3977.138226810615
Test MAE: 3800.1160615834174

Alpha: 0.5
Train MSE: 24321096.573701005
Test MSE: 24142616.047054958
Train RMSE: 4931.642380962046
Test RMSE: 4913.513615230445
Train R²: 0.02012368963380129
Test R²: 0.005614202880717789
Train Adjusted R²: 0.0011260876981300516
Test Adjusted R²: -0.07653071079435336
Train MAE: 3984.1312440973893
Test MAE: 3808.2256551714895

Alpha: 1.0
Train MSE: 24419264.08122563
Test MSE: 24189405.839793712
Train RMSE: 4941.585178991214
Test RMSE: 4918.272647972427
Train R²: 0.016168604188554814
Test R²: 0.003687025425760604
Train Adjusted R²: -0.0029056779751364115
Test Adjusted R²: -0.07861708986515481
Train MAE: 3989.3934225850508
Test MAE: 3815.635998406996

Alpha: 1.5
Tra

In [11]:
from sklearn.ensemble import GradientBoostingRegressor

# Define and train the Gradient Boosting Regressor model
def evaluate_gradient_boosting(X_train, X_test, y_train, y_test):
    # Initialize the Gradient Boosting Regressor
    gbr = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
    gbr.fit(X_train, y_train)

    # Predict and evaluate
    y_train_pred = gbr.predict(X_train)
    y_test_pred = gbr.predict(X_test)

    mse_train = mean_squared_error(y_train, y_train_pred)
    mse_test = mean_squared_error(y_test, y_test_pred)
    rmse_train = np.sqrt(mse_train)
    rmse_test = np.sqrt(mse_test)
    r2_train = r2_score(y_train, y_train_pred)
    r2_test = r2_score(y_test, y_test_pred)
    mae_train = mean_absolute_error(y_train, y_train_pred)
    mae_test = mean_absolute_error(y_test, y_test_pred)

    # Calculate Adjusted R²
    def adjusted_r2(r2, n, k):
        return 1 - (1 - r2) * (n - 1) / (n - k - 1)

    n_train = X_train.shape[0]
    k_train = X_train.shape[1]
    adj_r2_train = adjusted_r2(r2_train, n_train, k_train)

    n_test = X_test.shape[0]
    k_test = X_test.shape[1]
    adj_r2_test = adjusted_r2(r2_test, n_test, k_test)

    return {
        'Train MSE': mse_train,
        'Test MSE': mse_test,
        'Train RMSE': rmse_train,
        'Test RMSE': rmse_test,
        'Train R²': r2_train,
        'Test R²': r2_test,
        'Train Adjusted R²': adj_r2_train,
        'Test Adjusted R²': adj_r2_test,
        'Train MAE': mae_train,
        'Test MAE': mae_test
    }

# Preprocessing data (Assuming data is preprocessed as in part (c))
data = pd.read_csv('Electricity BILL.csv')

# Handle missing values
data.dropna(inplace=True)

# Normalize numerical features
numerical_features = ['Construction_Year', 'Number_of_Floors', 'Energy_Consumption_Per_SqM', 'Water_Usage_Per_Building', 'Waste_Recycled_Percentage', 'Occupancy_Rate', 'Indoor_Air_Quality', 'Smart_Devices_Count', 'Maintenance_Resolution_Time', 'Energy_Per_SqM', 'Number_of_Residents']
scaler = StandardScaler()
data[numerical_features] = scaler.fit_transform(data[numerical_features])

# Encode categorical features using One-Hot Encoding
categorical_features = ['Building_Type', 'Green_Certified', 'Building_Status', 'Maintenance_Priority']
data = pd.get_dummies(data, columns=categorical_features, drop_first=True)

# Split data
X = data.drop('Electricity_Bill', axis=1)
y = data['Electricity_Bill']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Evaluate Gradient Boosting Regressor
results_gbr = evaluate_gradient_boosting(X_train, X_test, y_train, y_test)

# Print the results for Gradient Boosting Regressor
print(f"\nGradient Boosting Regressor Results:")
print(f"Train MSE: {results_gbr['Train MSE']}")
print(f"Test MSE: {results_gbr['Test MSE']}")
print(f"Train RMSE: {results_gbr['Train RMSE']}")
print(f"Test RMSE: {results_gbr['Test RMSE']}")
print(f"Train R²: {results_gbr['Train R²']}")
print(f"Test R²: {results_gbr['Test R²']}")
print(f"Train Adjusted R²: {results_gbr['Train Adjusted R²']}")
print(f"Test Adjusted R²: {results_gbr['Test Adjusted R²']}")
print(f"Train MAE: {results_gbr['Train MAE']}")
print(f"Test MAE: {results_gbr['Test MAE']}")



Gradient Boosting Regressor Results:
Train MSE: 15548098.780395458
Test MSE: 24811287.905512877
Train RMSE: 3943.1077566299728
Test RMSE: 4981.093043249933
Train R²: 0.37358031452342877
Test R²: -0.021927046074571743
Train Adjusted R²: 0.3614354430703116
Test Adjusted R²: -0.10634710640247103
Train MAE: 3155.777526146695
Test MAE: 3841.472244635082
