In [45]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.metrics import r2_score
from sklearn.feature_selection import SelectKBest,f_regression
from sklearn.feature_selection import chi2

import warnings
warnings.filterwarnings('ignore')


In [5]:
data=pd.read_csv("data/climate_change_impact_on_agriculture_2024.csv")

In [6]:
data.head()

Unnamed: 0,Year,Country,Region,Crop_Type,Average_Temperature_C,Total_Precipitation_mm,CO2_Emissions_MT,Crop_Yield_MT_per_HA,Extreme_Weather_Events,Irrigation_Access_%,Pesticide_Use_KG_per_HA,Fertilizer_Use_KG_per_HA,Soil_Health_Index,Adaptation_Strategies,Economic_Impact_Million_USD
0,2001,India,West Bengal,Corn,1.55,447.06,15.22,1.737,8,14.54,10.08,14.78,83.25,Water Management,808.13
1,2024,China,North,Corn,3.23,2913.57,29.82,1.737,8,11.05,33.06,23.25,54.02,Crop Rotation,616.22
2,2001,France,Ile-de-France,Wheat,21.11,1301.74,25.75,1.719,5,84.42,27.41,65.53,67.78,Water Management,796.96
3,2001,Canada,Prairies,Coffee,27.85,1154.36,13.91,3.89,5,94.06,14.38,87.58,91.39,No Adaptation,790.32
4,1998,India,Tamil Nadu,Sugarcane,2.19,1627.48,11.81,1.08,9,95.75,44.35,88.08,49.61,Crop Rotation,401.72


In [7]:
df=data

In [27]:
def selectkbest(indep_X,dep_Y,n):
        test = SelectKBest(score_func=f_regression, k=n)
        fit1= test.fit(indep_X,dep_Y)
        selectk_features = fit1.transform(indep_X)
        return selectk_features

In [9]:
def split_scalar(indep_x,dep_y):
    X_train,X_test,y_train,y_test=train_test_split(indep_x,dep_y,test_size=0.25,random_state=0)
    sc=StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test=sc.transform(X_test)
    return X_train,X_test,y_train,y_test

In [10]:
def r2_prediction(regressor,X_test,y_test):
     y_pred = regressor.predict(X_test)
     r2=r2_score(y_test,y_pred)
     return r2

In [46]:
def gradientboost(X_train,y_train,X_test,y_test):
    gb = GradientBoostingRegressor(random_state=42)
    param_grid = {
            'n_estimators': [100, 200, 300],
            'learning_rate': [0.01, 0.1, 0.2],
            'max_depth': [3, 5, 7],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 2, 4]
        }
    grid = GridSearchCV(estimator=gb, 
                               param_grid=param_grid , 
                               cv=3, scoring='neg_mean_squared_error', verbose=2, n_jobs=-1)
    grid.fit(X_train, y_train)
    r2=r2_prediction(grid,X_test,y_test)
    return  r2  

In [47]:
def xgboost(X_train,y_train,X_test,y_test):
    xgb = XGBRegressor(random_state=42)
    param_grid = {
            'n_estimators': [100, 200, 300],
            'learning_rate': [0.01, 0.1, 0.2],
            'max_depth': [3, 5, 7],
            'subsample': [0.8, 1.0],
            'colsample_bytree': [0.8, 1.0]
        }
    grid = GridSearchCV(estimator=xgb, 
                               param_grid= param_grid, 
                               cv=3, scoring='neg_mean_squared_error', verbose=2, n_jobs=-1)
    # Fit the model to the training data
    grid.fit(X_train, y_train)
    r2=r2_prediction(grid,X_test,y_test)
    return  r2  
    

In [56]:
def catboost(X_train,y_train,X_test,y_test):
    catboost = CatBoostRegressor(random_state=42, silent=True)
    param_grid = {
            'iterations': [100, 200, 300],
            'learning_rate': [0.01, 0.1, 0.2],
            'depth': [3, 5, 7],
            'l2_leaf_reg': [1, 3, 5]
        }
    grid = GridSearchCV(estimator=catboost, 
                               param_grid=param_grid , 
                               cv=3, scoring='neg_mean_squared_error', verbose=2, n_jobs=-1)
    grid.fit(X_train, y_train)
    r2=r2_prediction(grid,X_test,y_test)
    return  r2  
    

In [57]:
def selectk_regression(accgradient,accxgboost,acccatboost): 
    
    dataframe=pd.DataFrame(index=['ChiSquare'],columns=['GradientBoosting','XGBoost','CatBoost'])
    for number,idex in enumerate(dataframe.index):      
        dataframe['GradientBoosting'][idex]=accgradient[number]       
        dataframe['XGBoost'][idex]=accxgboost[number]
        dataframe['CatBoost'][idex]=acccatboost[number]
        
    return dataframe

In [58]:
# Convert categorical features to dummy variables
df = pd.get_dummies(df, drop_first=True)

In [59]:
indep_X=df.iloc[:,[1,14]].values
dep_Y=df['Economic_Impact_Million_USD']

In [60]:
# Make Input X non Negative
indep_X += abs(indep_X.min())  

In [61]:
kbest=selectkbest(indep_X,dep_Y,5)

accgradient=[]
accxgboost=[]
acccatboost=[]


In [62]:
X_train,X_test,y_train,y_test = train_test_split(indep_X,dep_Y,test_size=0.30,random_state=0)

In [63]:
for _ in range(kbest.shape[1]):   
    
    r2_gradient=gradientboost(X_train,y_train,X_test,y_test)
    accgradient.append(r2_gradient)
    
    r2_xgboost= xgboost(X_train,y_train,X_test,y_test)  
    accxgboost.append(r2_xgboost)
    
    r2_catboost=catboost(X_train,y_train,X_test,y_test)
    acccatboost.append(r2_catboost)
    
   
result=selectk_regression(accgradient,accxgboost,acccatboost)


Fitting 3 folds for each of 243 candidates, totalling 729 fits
Fitting 3 folds for each of 108 candidates, totalling 324 fits
Fitting 3 folds for each of 81 candidates, totalling 243 fits
Fitting 3 folds for each of 243 candidates, totalling 729 fits
Fitting 3 folds for each of 108 candidates, totalling 324 fits
Fitting 3 folds for each of 81 candidates, totalling 243 fits


In [64]:
result

Unnamed: 0,GradientBoosting,XGBoost,CatBoost
ChiSquare,0.161827,0.166566,0.163621
