In [1]:
import pandas as pd
dataset = pd.read_csv(r'dataset.csv')

# For feelslike_c and heatindex_c using temp_c and wind_kph

In [2]:
X = dataset[['temp_c' , 'wind_kph']]
y = dataset[['feelslike_c' , 'heatindex_c']]

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X , y , test_size=0.3 , random_state=45)

In [4]:
from sklearn.metrics import r2_score , mean_absolute_error , mean_squared_error

In [5]:
def model_selection(models):
    model = []
    score = []
    mae = []
    mse = []
    for mod in models:
        mod.fit(X_train , y_train)
        pred = mod.predict(X_test)
        s = r2_score(y_test , pred)
        ma = mean_absolute_error(y_test , pred)
        ms = mean_squared_error(y_test , pred)
        model.append(mod)
        score.append(s)
        mae.append(ma)
        mse.append(ms)
    model = { 'model' : model , 'r2_score' : score , 'mean_absolute_error' : mae , 'mean_squared_error' : mse }
    model = pd.DataFrame(model)
    return model

In [6]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import MultiTaskElasticNet
from sklearn.neighbors import KNeighborsRegressor
models = [LinearRegression() , Lasso() , Ridge() , MultiTaskElasticNet() , KNeighborsRegressor()]

In [7]:
result = model_selection(models)
result

Unnamed: 0,model,r2_score,mean_absolute_error,mean_squared_error
0,LinearRegression(),0.880333,1.205468,2.285113
1,Lasso(),0.876312,1.157756,2.361996
2,Ridge(),0.880334,1.205401,2.285089
3,MultiTaskElasticNet(),0.878576,1.166964,2.318907
4,KNeighborsRegressor(),0.871673,1.067312,2.44704


In [8]:
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import MultiTaskElasticNet
from sklearn.neighbors import KNeighborsRegressor
models = [Lasso() , Ridge() , MultiTaskElasticNet() , KNeighborsRegressor()]
params = {
    0 : dict(alpha = [1,2,3,4,5,10,20] ,
    selection = ['cyclic', 'random'] ) ,

    1 : dict(alpha = [1,2,3,4,5,10,20] ,
    solver = ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga', 'lbfgs'] ) ,

    2 : dict(l1_ratio = [.5, .7, .9, .95, .99, 1] ,
    selection = ['cyclic', 'random']) ,
    
    3 : dict(n_neighbors = [5,10,20, 50] ,
    weights = ['uniform', 'distance'] ,
    algorithm = ['auto', 'ball_tree', 'kd_tree', 'brute']  ) }

In [9]:
from sklearn.model_selection import GridSearchCV
def model_selection(models , params ):
    result = {'model' : [] , 'r2_score' : [] , 'mean_absolute_error' : [] ,
              'mean_squared_error' : [] , 'best_params_' : [] , 'best_score_' : [] }
    for i in range(len(models)):
        Grid = GridSearchCV(estimator=models[i] , param_grid=params[i] , scoring='r2')
        Grid.fit(X_train , y_train)
        result['model'].append(models[i])
        result['best_params_'].append(Grid.best_params_)
        result['best_score_'].append(Grid.best_score_)
        pred = Grid.predict(X_test)
        result['r2_score'].append(r2_score(y_test , pred))
        result['mean_absolute_error'].append(mean_absolute_error(y_test , pred))
        result['mean_squared_error'].append(mean_squared_error(y_test , pred))
    result = pd.DataFrame(result)
    return result

In [10]:
result2 = model_selection(models , params)
result2

35 fits failed out of a total of 280.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
35 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\ayush\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\ayush\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\ayush\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\linear_model\_ridge.py", line 1250, in fit
    return super().fit(X, y, sam

Unnamed: 0,model,r2_score,mean_absolute_error,mean_squared_error,best_params_,best_score_
0,Lasso(),0.876312,1.157756,2.361996,"{'alpha': 1, 'selection': 'cyclic'}",0.844611
1,Ridge(),0.880356,1.204135,2.284678,"{'alpha': 20, 'solver': 'auto'}",0.850178
2,MultiTaskElasticNet(),0.878576,1.166964,2.318907,"{'l1_ratio': 0.5, 'selection': 'cyclic'}",0.847803
3,KNeighborsRegressor(),0.881444,1.071218,2.261483,"{'algorithm': 'brute', 'n_neighbors': 20, 'wei...",0.864087


In [11]:
final_result = pd.concat([result , result2] , ignore_index=True)

In [12]:
final_result

Unnamed: 0,model,r2_score,mean_absolute_error,mean_squared_error,best_params_,best_score_
0,LinearRegression(),0.880333,1.205468,2.285113,,
1,Lasso(),0.876312,1.157756,2.361996,,
2,Ridge(),0.880334,1.205401,2.285089,,
3,MultiTaskElasticNet(),0.878576,1.166964,2.318907,,
4,KNeighborsRegressor(),0.871673,1.067312,2.44704,,
5,Lasso(),0.876312,1.157756,2.361996,"{'alpha': 1, 'selection': 'cyclic'}",0.844611
6,Ridge(),0.880356,1.204135,2.284678,"{'alpha': 20, 'solver': 'auto'}",0.850178
7,MultiTaskElasticNet(),0.878576,1.166964,2.318907,"{'l1_ratio': 0.5, 'selection': 'cyclic'}",0.847803
8,KNeighborsRegressor(),0.881444,1.071218,2.261483,"{'algorithm': 'brute', 'n_neighbors': 20, 'wei...",0.864087


#### For Predicting 'feelslike_c' , 'heatindex_c' we use kNeighborsRegressor by using best Parameters

# for humidity using wind_kph and pressure_mb

In [13]:
X = dataset[['pressure_mb' , 'wind_kph']]
y = dataset['humidity']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X , y , test_size=0.3 , random_state=45)

In [14]:
from sklearn.metrics import r2_score , mean_absolute_error , mean_squared_error
def model_selection(models):
    model = []
    score = []
    mae = []
    mse = []
    for mod in models:
        mod.fit(X_train , y_train)
        pred = mod.predict(X_test)
        s = r2_score(y_test , pred)
        ma = mean_absolute_error(y_test , pred)
        ms = mean_squared_error(y_test , pred)
        model.append(mod)
        score.append(s)
        mae.append(ma)
        mse.append(ms)
    model = { 'model' : model , 'r2_score' : score , 'mean_absolute_error' : mae , 'mean_squared_error' : mse }
    model = pd.DataFrame(model)
    return model

In [15]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet
from sklearn.neighbors import KNeighborsRegressor
models = [LinearRegression() , Lasso() , Ridge() , ElasticNet() , KNeighborsRegressor()]
result = model_selection(models)
result

Unnamed: 0,model,r2_score,mean_absolute_error,mean_squared_error
0,LinearRegression(),0.119205,19.394464,510.401815
1,Lasso(),0.114737,19.445099,512.990813
2,Ridge(),0.119198,19.394548,510.405643
3,ElasticNet(),0.11449,19.450641,513.133706
4,KNeighborsRegressor(),0.389049,14.749711,354.032948


In [16]:
from sklearn.model_selection import GridSearchCV
def model_selection(models , params ):
    result = {'model' : [] , 'r2_score' : [] , 'mean_absolute_error' : [] ,
              'mean_squared_error' : [] , 'best_params_' : [] , 'best_score_' : [] }
    for i in range(len(models)):
        Grid = GridSearchCV(estimator=models[i] , param_grid=params[i] , scoring='r2')
        Grid.fit(X_train , y_train)
        result['model'].append(models[i])
        result['best_params_'].append(Grid.best_params_)
        result['best_score_'].append(Grid.best_score_)
        pred = Grid.predict(X_test)
        result['r2_score'].append(r2_score(y_test , pred))
        result['mean_absolute_error'].append(mean_absolute_error(y_test , pred))
        result['mean_squared_error'].append(mean_squared_error(y_test , pred))
    result = pd.DataFrame(result)
    return result

In [17]:
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet
from sklearn.neighbors import KNeighborsRegressor
models = [Lasso() , Ridge() , ElasticNet() , KNeighborsRegressor()]
params = {
    0 : dict(alpha = [1,2,3,4,5,10,20] ,
    selection = ['cyclic', 'random'] ) ,

    1 : dict(alpha = [1,2,3,4,5,10,20] ,
    solver = ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga', 'lbfgs'] ) ,

    2 : dict(l1_ratio = [.5, .7, .9, .95, .99, 1] ,
    selection = ['cyclic', 'random']) ,
    
    3 : dict(n_neighbors = [5,10,20, 50] ,
    weights = ['uniform', 'distance'] ,
    algorithm = ['auto', 'ball_tree', 'kd_tree', 'brute']  ) }
result2 = model_selection(models , params)
result2

35 fits failed out of a total of 280.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
35 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\ayush\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\ayush\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\ayush\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\linear_model\_ridge.py", line 1250, in fit
    return super().fit(X, y, sam

Unnamed: 0,model,r2_score,mean_absolute_error,mean_squared_error,best_params_,best_score_
0,Lasso(),0.114737,19.445099,512.990784,"{'alpha': 1, 'selection': 'random'}",0.096134
1,Ridge(),0.119074,19.396158,510.477685,"{'alpha': 20, 'solver': 'sag'}",0.096469
2,ElasticNet(),0.11449,19.450641,513.133705,"{'l1_ratio': 0.5, 'selection': 'random'}",0.096464
3,KNeighborsRegressor(),0.40174,14.885405,346.678504,"{'algorithm': 'ball_tree', 'n_neighbors': 20, ...",0.378366


In [18]:
final_result2 = pd.concat([result , result2] , ignore_index=True)
final_result2

Unnamed: 0,model,r2_score,mean_absolute_error,mean_squared_error,best_params_,best_score_
0,LinearRegression(),0.119205,19.394464,510.401815,,
1,Lasso(),0.114737,19.445099,512.990813,,
2,Ridge(),0.119198,19.394548,510.405643,,
3,ElasticNet(),0.11449,19.450641,513.133706,,
4,KNeighborsRegressor(),0.389049,14.749711,354.032948,,
5,Lasso(),0.114737,19.445099,512.990784,"{'alpha': 1, 'selection': 'random'}",0.096134
6,Ridge(),0.119074,19.396158,510.477685,"{'alpha': 20, 'solver': 'sag'}",0.096469
7,ElasticNet(),0.11449,19.450641,513.133705,"{'l1_ratio': 0.5, 'selection': 'random'}",0.096464
8,KNeighborsRegressor(),0.40174,14.885405,346.678504,"{'algorithm': 'ball_tree', 'n_neighbors': 20, ...",0.378366


#### For Predicting humidity we use kNeighborsRegressor by using best Parameters

# Best Parameters

In [20]:
# best parameters of first test result
final_result.iloc[8]['best_params_']

{'algorithm': 'brute', 'n_neighbors': 20, 'weights': 'distance'}

In [21]:
# best parameters of second test result
final_result2.iloc[8]['best_params_']

{'algorithm': 'ball_tree', 'n_neighbors': 20, 'weights': 'uniform'}