In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder,StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.decomposition import FastICA

In [4]:
data = pd.read_csv('Electricity BILL.csv')

X = data.drop('Electricity_Bill', axis=1)
y = data['Electricity_Bill']

categorical_features = X.select_dtypes(include=['object']).columns
numerical_features = X.select_dtypes(exclude=['object']).columns

one_hot_encoder = OneHotEncoder(drop='first', sparse_output=False)  
X_categorical_encoded = one_hot_encoder.fit_transform(X[categorical_features])

X_categorical_encoded_df = pd.DataFrame(X_categorical_encoded, columns=one_hot_encoder.get_feature_names_out(categorical_features))
X_numerical = X[numerical_features].reset_index(drop=True)
X_encoded = pd.concat([X_numerical, X_categorical_encoded_df], axis=1)

X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [5]:

def evaluate_model(X_train, X_test, y_train, y_test):
    model = LinearRegression()

    model.fit(X_train, y_train)

    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    train_mse = mean_squared_error(y_train, y_train_pred)
    test_mse = mean_squared_error(y_test, y_test_pred)
    train_rmse = np.sqrt(train_mse)
    test_rmse = np.sqrt(test_mse)
    train_mae = mean_absolute_error(y_train, y_train_pred)
    test_mae = mean_absolute_error(y_test, y_test_pred)
    train_r2 = r2_score(y_train, y_train_pred)
    test_r2 = r2_score(y_test, y_test_pred)

    def adjusted_r2(r2, X, y):
        n = X.shape[0]
        p = X.shape[1]
        return 1 - (1 - r2) * (n - 1) / (n - p - 1)

    train_adjusted_r2 = adjusted_r2(train_r2, X_train, y_train)
    test_adjusted_r2 = adjusted_r2(test_r2, X_test, y_test)

    return {
        "Train MSE": train_mse,
        "Test MSE": test_mse,
        "Train RMSE": train_rmse,
        "Test RMSE": test_rmse,
        "Train MAE": train_mae,
        "Test MAE": test_mae,
        "Train R2": train_r2,
        "Test R2": test_r2,
        "Train Adjusted R2": train_adjusted_r2,
        "Test Adjusted R2": test_adjusted_r2
    }

for n_components in [4, 5, 6, 8]:
    print(f"\n=== ICA with {n_components} components ===")
    
    ica = FastICA(n_components=n_components, random_state=42)
    
    X_train_ica = ica.fit_transform(X_train)
    X_test_ica = ica.transform(X_test)
    
    metrics = evaluate_model(X_train_ica, X_test_ica, y_train, y_test)
    
    for metric, value in metrics.items():
        print(f"{metric}: {value}")



=== ICA with 4 components ===
Train MSE: 24589773.930624887
Test MSE: 24232749.731079873
Train RMSE: 4958.807712608434
Test RMSE: 4922.67708986481
Train MAE: 3978.097434702609
Test MAE: 3802.1953103700625
Train R2: 0.009298907273198487
Test R2: 0.0019017777209204834
Train Adjusted R2: 0.005316189312487674
Test Adjusted R2: -0.014393703459146145

=== ICA with 5 components ===
Train MSE: 24588480.246258456
Test MSE: 24254297.990001556
Train RMSE: 4958.67726780625
Test RMSE: 4924.86527633006
Train MAE: 3978.746224466576
Test MAE: 3804.2047264950625
Train R2: 0.009351028716814813
Test R2: 0.0010142482757846683
Train Adjusted R2: 0.004367884998086491
Test Adjusted R2: -0.019456771226760816

=== ICA with 6 components ===
Train MSE: 24587094.91133023
Test MSE: 24236046.1667434
Train RMSE: 4958.537577888286
Test RMSE: 4923.011899918931
Train MAE: 3978.878636668621
Test MAE: 3802.738812113786
Train R2: 0.009406842683671868
Test R2: 0.001766004166035673
Train Adjusted R2: 0.003421385539766564
T