In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score
from sklearn.feature_selection import SelectKBest,f_regression

from sklearn.feature_selection import chi2
from sklearn.feature_selection import RFE

from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

import pickle
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')




In [2]:
def selectkbest(indep_X, dep_Y, n, score_func=f_regression):
    test = SelectKBest(score_func=score_func, k=n)
    fit1 = test.fit(indep_X, dep_Y)
    selectk_features = fit1.transform(indep_X)
    return selectk_features


In [3]:
def split_scalar(indep_X,dep_Y):
        X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size = 0.25, random_state = 0)
        sc = StandardScaler()
        X_train = sc.fit_transform(X_train)
        X_test = sc.transform(X_test)    
        return X_train, X_test, y_train, y_test
    

In [4]:
def r2_prediction(regressor,X_test,y_test):
     y_pred = regressor.predict(X_test)
     r2=r2_score(y_test,y_pred)
     return r2

In [5]:
def Linear(X_train,y_train,X_test,y_test):       
        regressor = LinearRegression()
        regressor.fit(X_train, y_train)
        r2=r2_prediction(regressor,X_test,y_test)
        return  r2   

In [6]:
def svm_linear(X_train,y_train,X_test,y_test):
        
        
        param_grid = {'kernel':['rbf','poly','sigmoid','linear'],
                    'C':[10,100,1000,2000,3000],'gamma':['auto','scale']}
        
        grid = GridSearchCV(SVR(), param_grid, refit = True, verbose = 3,n_jobs=-1)
                
        grid.fit(X_train, y_train)
        r2=r2_prediction(grid,X_test,y_test)
        return  r2  

In [7]:
def svm_NL(X_train,y_train,X_test,y_test):
        regressor = SVR(kernel = 'rbf')
        regressor.fit(X_train, y_train)
        r2=r2_prediction(regressor,X_test,y_test)
        return  r2  
     

In [8]:
def Decision(X_train,y_train,X_test,y_test):
        
        
        param_grid = {'criterion':['mse','mae','friedman_mse'],'max_features': ['auto','sqrt','log2'],'splitter':['best','random']}
        regressor = GridSearchCV(DecisionTreeRegressor(), param_grid, refit = True, verbose= 3,n_jobs=-1)
        regressor.fit(X_train, y_train)
        r2=r2_prediction(regressor,X_test,y_test)
        return  r2  
     

In [9]:
def random(X_train,y_train,X_test,y_test):  

        param_grid = {'n_estimators': [50, 100, 200], 'max_depth': [None, 10, 20, 30], 'min_samples_split': [2, 5, 10], 'min_samples_leaf': [1, 2, 4]}
        regressor = GridSearchCV(RandomForestRegressor(), param_grid=param_grid, cv=3, n_jobs=-1, scoring='r2')
        regressor.fit(X_train, y_train)
        #param_grid = {'criterion':['friedman_mse','squared_error','absolute_error','poisson'],'max_features': ['auto','sqrt','log2'],'n_estimators':[10,100]}
        #regressor = GridSearchCV(RandomForestRegressor(),param_grid, refit = True, verbose= 3,n_jobs=-1)
        #regressor.fit(X_train, y_train)
        r2=r2_prediction(regressor,X_test,y_test)
        return  r2 

In [10]:
def selectk_regression(acclin,accsvml,accsvmnl,accdes,accrf): 
    
    dataframe=pd.DataFrame(index=['ChiSquare'],columns=['Linear','SVMl','SVMnl','Decision','Random'])
    for number,idex in enumerate(dataframe.index):
        
        dataframe['Linear'][idex]=acclin[number]       
        dataframe['SVMl'][idex]=accsvml[number]
        dataframe['SVMnl'][idex]=accsvmnl[number]
        dataframe['Decision'][idex]=accdes[number]
        dataframe['Random'][idex]=accrf[number]
    return dataframe

### Load dataset

In [11]:
dataset=pd.read_csv("data/climate_change_impact_on_agriculture_2024.csv",index_col=None)
df=dataset

# Convert categorical features to dummy variables
df = pd.get_dummies(df, drop_first=True)

# Feature and Target selection
indep_X=df.iloc[:,[1,14]].values
dep_Y=df['Economic_Impact_Million_USD']

# Make Input X non Negative
indep_X += abs(indep_X.min())  

# select top features using selectKBest
kbest = selectkbest(indep_X, dep_Y,5,score_func=f_regression)
    

# initialize empty lists to store results
acclin=[]
accsvml=[]
accsvmnl=[]
accdes=[]
accrf=[]

# Split data into train and test
X_train,X_test,y_train,y_test = split_scalar(kbest,dep_Y)


In [12]:
# Train and predict with different models  

for _ in range(kbest.shape[1]):   
    
    r2_lin=Linear(X_train,y_train,X_test,y_test)
    acclin.append(r2_lin)
    
    r2_sl=svm_linear(X_train,y_train,X_test,y_test)    
    accsvml.append(r2_sl)
    
    r2_NL=svm_NL(X_train,y_train,X_test,y_test)
    accsvmnl.append(r2_NL)
    
    r2_d=Decision(X_train,y_train,X_test,y_test)
    accdes.append(r2_d)
    
    r2_r=random(X_train,y_train,X_test,y_test)
    accrf.append(r2_r)
    
    
result=selectk_regression(acclin,accsvml,accsvmnl,accdes,accrf)


Fitting 5 folds for each of 40 candidates, totalling 200 fits
Fitting 5 folds for each of 18 candidates, totalling 90 fits
Fitting 5 folds for each of 40 candidates, totalling 200 fits
Fitting 5 folds for each of 18 candidates, totalling 90 fits


In [18]:
result # K= 8 

Unnamed: 0,Linear,SVMl,SVMnl,Decision,Random
ChiSquare,0.044867,0.094371,0.03147,-0.357086,0.152098


In [13]:
result #K=5

Unnamed: 0,Linear,SVMl,SVMnl,Decision,Random
ChiSquare,0.044867,0.094371,0.03147,-0.357938,0.151849


## Conclusion:

#### The regression model did not provide good accuracy, so we can consider using a boosting algorithm.