In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split,GridSearchCV 
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.ensemble import GradientBoostingRegressor,RandomForestRegressor,AdaBoostRegressor
from xgboost import XGBRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
df=pd.read_csv("cleaned_data.csv")

In [3]:
models={
    "LinearRegression":LinearRegression(),
    "Lasso":Lasso(),
    "Ridge":Ridge(),
    "RandomForestRegressor":RandomForestRegressor(),
    "DecisionTreeRegressor":DecisionTreeRegressor(),
    "KneighborsRegressor":KNeighborsRegressor(),
    "SVM Regressor":SVR(),
    "Adaboost":AdaBoostRegressor(),
    "gradientboost":GradientBoostingRegressor(),
    "xgboost":XGBRegressor()
}

In [4]:

param_grids = {
    "LinearRegression": {"fit_intercept": [True, False]},
    "Lasso": {"alpha": [0.01, 0.1, 1]},
    "Ridge": {"alpha": [0.01, 0.1, 1]},
    "RandomForestRegressor": {"n_estimators": [200, 300], "max_depth": [None, 10]},
    "DecisionTreeRegressor": {"max_depth": [None, 10], "min_samples_split": [3, 6]},
    "KneighborsRegressor": {"n_neighbors": [3, 5, 7]},
    "SVM Regressor": {"C": [0.1, 1, 5], "epsilon": [0.1, 0.2]},
    "Adaboost": {"n_estimators": [200, 300], "learning_rate": [0.1, 0.5]},
    "gradientboost": {"n_estimators": [200, 300], "learning_rate": [0.05, 0.1]},
    "xgboost": {"n_estimators": [200, 300], "learning_rate": [0.05, 0.1]}
}

In [5]:
x=df.drop(columns=["LandAverageTemperature"])
y=df["LandAverageTemperature"]
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)
scaler=StandardScaler()
x_train=scaler.fit_transform(x_train)
x_test=scaler.transform(x_test)

In [6]:
mse=[]
mae=[]
r2s=[]
adj_r2=[]
bestp=[]
n = x_test.shape[0]
p = x_train.shape[1]
for i,j in models.items():
    param_grid=param_grids[i]
    grid=GridSearchCV(j,cv=5,param_grid=param_grid,scoring="r2")
    grid.fit(x_train,y_train)
    best_param=grid.best_params_
    model=grid.best_estimator_
    y_pred=model.predict(x_test)
    r2=r2_score(y_test,y_pred)
    adjusted_r2 = 1 - (1 - r2) * (n - 1) / (n - p - 1)
    mse.append(mean_squared_error(y_test,y_pred))
    mae.append(mean_absolute_error(y_test,y_pred))
    r2s.append(r2)
    adj_r2.append(adjusted_r2)
    bestp.append(best_param)

In [7]:
model_names = list(models.keys())
result = pd.DataFrame({
    "MSE": mse,
    "MAE": mae,
    "R2": r2s,
    "Adjusted_R2": adj_r2,
    "Best Param":bestp
}, index=model_names)
result["RMSE"]=np.sqrt(result["MSE"])

In [8]:
result.head(10)

Unnamed: 0,MSE,MAE,R2,Adjusted_R2,Best Param,RMSE
LinearRegression,0.044746,0.15298,0.997561,0.997505,{'fit_intercept': True},0.211533
Lasso,0.043565,0.148142,0.997625,0.99757,{'alpha': 0.01},0.208723
Ridge,0.044742,0.152962,0.997561,0.997505,{'alpha': 0.01},0.211522
RandomForestRegressor,0.03167,0.112845,0.998274,0.998234,"{'max_depth': None, 'n_estimators': 200}",0.17796
DecisionTreeRegressor,0.087259,0.171675,0.995244,0.995134,"{'max_depth': None, 'min_samples_split': 6}",0.295396
KneighborsRegressor,0.117223,0.219547,0.99361,0.993463,{'n_neighbors': 3},0.342379
SVM Regressor,0.027854,0.101319,0.998482,0.998447,"{'C': 5, 'epsilon': 0.1}",0.166897
Adaboost,0.124129,0.278906,0.993234,0.993078,"{'learning_rate': 0.5, 'n_estimators': 300}",0.352319
gradientboost,0.035529,0.126177,0.998063,0.998019,"{'learning_rate': 0.1, 'n_estimators': 300}",0.188492
xgboost,0.03019,0.114834,0.998354,0.998316,"{'learning_rate': 0.05, 'n_estimators': 300}",0.173752
