In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from sklearn.preprocessing import StandardScaler, LabelEncoder,OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import Ridge

In [5]:
data = pd.read_csv('Electricity BILL.csv')
X = data.drop('Electricity_Bill', axis=1)
y = data['Electricity_Bill']

categorical_features = X.select_dtypes(include=['object']).columns
numerical_features = X.select_dtypes(exclude=['object']).columns

one_hot_encoder = OneHotEncoder(drop='first', sparse_output=False)  
X_categorical_encoded = one_hot_encoder.fit_transform(X[categorical_features])

X_categorical_encoded_df = pd.DataFrame(X_categorical_encoded, columns=one_hot_encoder.get_feature_names_out(categorical_features))
X_numerical = X[numerical_features].reset_index(drop=True)
X_encoded = pd.concat([X_numerical, X_categorical_encoded_df], axis=1)

X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [6]:

def evaluate_gradient_boosting(X_train, X_test, y_train, y_test):
    gbr = GradientBoostingRegressor(random_state=42)
    
    gbr.fit(X_train, y_train)
    
    y_train_pred = gbr.predict(X_train)
    y_test_pred = gbr.predict(X_test)
    
    train_mse = mean_squared_error(y_train, y_train_pred)
    train_rmse = np.sqrt(train_mse)
    train_mae = mean_absolute_error(y_train, y_train_pred)
    train_r2 = r2_score(y_train, y_train_pred)
    train_adjusted_r2 = 1 - (1 - train_r2) * (len(y_train) - 1) / (len(y_train) - X_train.shape[1] - 1)
    
    test_mse = mean_squared_error(y_test, y_test_pred)
    test_rmse = np.sqrt(test_mse)
    test_mae = mean_absolute_error(y_test, y_test_pred)
    test_r2 = r2_score(y_test, y_test_pred)
    test_adjusted_r2 = 1 - (1 - test_r2) * (len(y_test) - 1) / (len(y_test) - X_test.shape[1] - 1)
    
    print(f"=== Gradient Boosting Regressor ===")
    print(f"Train MSE: {train_mse}")
    print(f"Test MSE: {test_mse}")
    print(f"Train RMSE: {train_rmse}")
    print(f"Test RMSE: {test_rmse}")
    print(f"Train MAE: {train_mae}")
    print(f"Test MAE: {test_mae}")
    print(f"Train R2: {train_r2}")
    print(f"Test R2: {test_r2}")
    print(f"Train Adjusted R2: {train_adjusted_r2}")
    print(f"Test Adjusted R2: {test_adjusted_r2}")
    
    return {
        'train_mse': train_mse, 'test_mse': test_mse,
        'train_rmse': train_rmse, 'test_rmse': test_rmse,
        'train_mae': train_mae, 'test_mae': test_mae,
        'train_r2': train_r2, 'test_r2': test_r2,
        'train_adjusted_r2': train_adjusted_r2, 'test_adjusted_r2': test_adjusted_r2
    }


In [7]:
gbr_results = evaluate_gradient_boosting(X_train, X_test, y_train, y_test)


=== Gradient Boosting Regressor ===
Train MSE: 15548098.780395458
Test MSE: 24763212.870917924
Train RMSE: 3943.1077566299728
Test RMSE: 4976.26495184068
Train MAE: 3155.777526146695
Test MAE: 3837.5378752506076
Train R2: 0.37358031452342877
Test R2: -0.019946932092554936
Train Adjusted R2: 0.3614354430703116
Test Adjusted R2: -0.10420341778715736
