In [2]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, HuberRegressor, TheilSenRegressor, RANSACRegressor
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import root_mean_squared_error as rmse
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import median_absolute_error as mdn
from sklearn.metrics import r2_score as r2
from sklearn.metrics import mean_absolute_percentage_error as mape
from sklearn.model_selection import train_test_split, cross_val_score, KFold, GridSearchCV, RandomizedSearchCV
from sklearn.datasets import fetch_california_housing
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import r2_score

In [3]:
df = pd.read_csv('ensemble_methods_data.csv')
df.head()

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,...,x12,x13,x14,x15,x16,x17,x18,x19,x20,y
0,0.895355,-0.804316,-0.241497,-0.491154,0.454741,-0.061764,0.253165,-0.25618,-0.746578,-0.205859,...,-0.51643,-0.033613,0.479442,-0.196467,0.62953,-0.631578,-0.284875,-1.548618,-0.176774,-40.982233
1,0.885577,0.238499,-1.078315,0.917768,0.43375,0.358918,-1.364676,-0.656835,-1.116451,-0.440902,...,0.215669,-0.276335,-1.039793,-0.352546,0.139821,1.781685,-0.310396,0.459204,0.987862,155.53502
2,1.112047,-0.089336,0.268167,0.193799,0.629558,0.273311,-1.506922,-0.215568,0.169945,-2.021002,...,-0.885326,-0.604719,-1.20433,-0.772637,-1.94734,-0.551516,1.02707,-1.188385,-0.160905,-83.416703
3,0.736763,0.223995,1.030439,-0.334604,-1.083188,1.291469,-0.629234,0.272518,-0.265828,-0.836195,...,0.233595,-1.277085,0.307003,2.116803,-1.419384,0.61886,0.565633,-0.017862,1.077294,82.954891
4,-0.037887,-2.226762,-0.002112,1.104175,0.604025,-0.748072,-0.035758,0.206135,-1.335016,2.435985,...,-1.659906,0.527568,-0.074252,0.088823,-0.138796,0.820967,-0.513397,1.941746,-0.085419,-154.028441


In [15]:
X = df.drop('y', axis = 1)
y = df[['y']]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

linear = LinearRegression()
linear.fit(X_train, y_train)
y_pred_linear = linear.predict(X_test)
print("Linear Regression")
print("RMSE:", rmse(y_pred_linear, y_test))
print("MSE:", mse(y_pred_linear, y_test))
print("MAE:", mae(y_pred_linear, y_test))
print("Median Absolute Error:", mdn(y_pred_linear, y_test))
print("R2:", r2(y_pred_linear, y_test))
print("MAPE:", mape(y_pred_linear, y_test))

Linear Regression
RMSE: 0.39766681767827167
MSE: 0.15813889788236377
MAE: 0.32082997909218014
Median Absolute Error: 0.27348586363366323
R2: 0.9999941355653444
MAPE: 0.029254770436260305


array([-0.00259188])

In [5]:
ridge = Ridge()
ridge.fit(X_train, y_train)
y_pred_ridge = ridge.predict(X_test)

print("Ridge Regression")
print("RMSE:", rmse(y_pred_ridge, y_test))
print("MSE:", mse(y_pred_ridge, y_test))
print("MAE:", mae(y_pred_ridge, y_test))
print("Median Absolute Error:", mdn(y_pred_ridge, y_test))
print("R2:", r2(y_pred_ridge, y_test))
print("MAPE:", mape(y_pred_ridge, y_test))

Ridge Regression
RMSE: 0.4435228152292265
MSE: 0.19671248762885857
MAE: 0.3618750976033255
Median Absolute Error: 0.3060487279321151
R2: 0.9999926860093509
MAPE: 0.028729019185981954


In [6]:
lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_train)
y_pred_lasso = lasso.predict(X_test)
print("\nLasso Regression")
print("RMSE:", rmse(y_pred_lasso, y_test))
print("MSE:", mse(y_pred_lasso, y_test))
print("MAE:", mae(y_pred_lasso, y_test))
print("Median Absolute Error:", mdn(y_pred_lasso, y_test))
print("R2:", r2(y_pred_lasso, y_test))
print("MAPE:", mape(y_pred_lasso, y_test))


Lasso Regression
RMSE: 0.44371324243298826
MSE: 0.1968814415103958
MAE: 0.35894017847870197
Median Absolute Error: 0.3152326408379622
R2: 0.9999926807049739
MAPE: 0.011619515133225997


In [7]:
elasticnet = ElasticNet()
elasticnet.fit(X_train, y_train)
y_pred_elasticnet = elasticnet.predict(X_test)
print("\nElasticNet Regression")
print("RMSE:", rmse(y_pred_elasticnet, y_test))
print("MSE:", mse(y_pred_elasticnet, y_test))
print("MAE:", mae(y_pred_elasticnet, y_test))
print("Median Absolute Error:", mdn(y_pred_elasticnet, y_test))
print("R2:", r2(y_pred_elasticnet, y_test))
print("MAPE:", mape(y_pred_elasticnet, y_test))


ElasticNet Regression
RMSE: 57.13445910448213
MSE: 3264.3464171617416
MAE: 46.002052629321206
Median Absolute Error: 39.50884025321672
R2: 0.7178802996979605
MAPE: 0.6315988314965858


In [8]:
huber = HuberRegressor()
huber.fit(X_train, y_train)
y_pred_huber = huber.predict(X_test)
print("\nHuber Regressor")
print("RMSE:", rmse(y_pred_huber, y_test))
print("MSE:", mse(y_pred_huber, y_test))
print("MAE:", mae(y_pred_huber, y_test))
print("Median Absolute Error:", mdn(y_pred_huber, y_test))
print("R2:", r2(y_pred_huber, y_test))
print("MAPE:", mape(y_pred_huber, y_test))


Huber Regressor
RMSE: 0.398378501799774
MSE: 0.15870543069623255
MAE: 0.3220280435735696
Median Absolute Error: 0.2740368444095296
R2: 0.9999941133444101
MAPE: 0.034415619466543465


In [9]:
theil_sen = TheilSenRegressor()
theil_sen.fit(X_train, y_train)
y_pred_theil_sen = theil_sen.predict(X_test)
print("\nTheil-Sen Regressor")
print("RMSE:", rmse(y_pred_theil_sen, y_test))
print("MSE:", mse(y_pred_theil_sen, y_test))
print("MAE:", mae(y_pred_theil_sen, y_test))
print("Median Absolute Error:", mdn(y_pred_theil_sen, y_test))
print("R2:", r2(y_pred_theil_sen, y_test))
print("MAPE:", mape(y_pred_theil_sen, y_test))


Theil-Sen Regressor
RMSE: 0.39849219799232144
MSE: 0.1587960318607515
MAE: 0.31927038375844236
Median Absolute Error: 0.2746559119995666
R2: 0.9999941112707568
MAPE: 0.03788794240223607


In [10]:
ransac = RANSACRegressor()
ransac.fit(X_train, y_train)
y_pred_ransac = ransac.predict(X_test)
print("\nRANSAC Regressor")
print("RMSE:", rmse(y_pred_ransac, y_test))
print("MSE:", mse(y_pred_ransac, y_test))
print("MAE:", mae(y_pred_ransac, y_test))
print("Median Absolute Error:", mdn(y_pred_ransac, y_test))
print("R2:", r2(y_pred_ransac, y_test))
print("MAPE:", mape(y_pred_ransac, y_test))


RANSAC Regressor
RMSE: 0.39766681767828455
MSE: 0.158138897882374
MAE: 0.32082997909219996
Median Absolute Error: 0.27348586363331506
R2: 0.9999941355653444
MAPE: 0.029254770436521804


In [11]:
predictions = [y_pred_linear, y_pred_ridge, y_pred_lasso, y_pred_huber, y_pred_theil_sen, y_pred_ransac]

predictions_df = pd.DataFrame(np.column_stack(predictions), columns=['Linear', 'Ridge', 'Lasso', 'Huber', 'Theil-Sen', 'RANSAC'])

predictions_df.head()

Unnamed: 0,Linear,Ridge,Lasso,Huber,Theil-Sen,RANSAC
0,45.27319,45.186793,45.198632,45.296469,45.253509,45.27319
1,53.541258,53.462357,53.399477,53.557928,53.522911,53.541258
2,-67.290441,-67.185825,-67.060019,-67.263225,-67.23385,-67.290441
3,-169.849909,-169.640109,-169.735428,-169.837365,-169.900177,-169.849909
4,-1.288842,-1.317155,-1.09642,-1.318591,-1.302651,-1.288842


In [12]:
predictions_df['Mean_pred'] = predictions_df.mean(axis = 1)

In [13]:
predictions_df.head(7)

Unnamed: 0,Linear,Ridge,Lasso,Huber,Theil-Sen,RANSAC,Mean_pred
0,45.27319,45.186793,45.198632,45.296469,45.253509,45.27319,45.246964
1,53.541258,53.462357,53.399477,53.557928,53.522911,53.541258,53.504198
2,-67.290441,-67.185825,-67.060019,-67.263225,-67.23385,-67.290441,-67.220634
3,-169.849909,-169.640109,-169.735428,-169.837365,-169.900177,-169.849909,-169.80215
4,-1.288842,-1.317155,-1.09642,-1.318591,-1.302651,-1.288842,-1.26875
5,69.977215,69.89314,69.773766,69.994789,69.989695,69.977215,69.934303
6,-154.543588,-154.354585,-154.274873,-154.538274,-154.578898,-154.543588,-154.472301


In [14]:
y_mean = predictions_df['Mean_pred']
print("RMSE:", rmse(y_test, y_mean))
print("MSE:", mse(y_mean, y_test))
print("MAE:", mae(y_mean, y_test))
print("Median Absolute Error:", mdn(y_mean, y_test))
print("R2:", r2(y_mean, y_test))
print("MAPE:", mape(y_mean, y_test))

RMSE: 0.39927940811773777
MSE: 0.15942404574685098
MAE: 0.32609366383632926
Median Absolute Error: 0.2741625123644873
R2: 0.9999940826969268
MAPE: 0.022930764078224274
