In [5]:
import numpy as np
import pandas as pd
from sklearn.metrics import *;
import warnings
warnings.filterwarnings('ignore')
import pickle
from sklearn.ensemble import StackingRegressor,RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor

### Loading required models

In [210]:
with open("Models/RandomForest_CV_model.pkl",'rb') as rf_cv_file:
    rf_cv=pickle.load(rf_cv_file)

In [3]:
with open("Models/XGBoost_CV_model.pkl",'rb') as xgb_cv_file:
    xgb_cv=pickle.load(xgb_cv_file)

In [6]:
with open("Models/DecisionTreeRegressor_model.pkl",'rb') as dt_file:
    dt=pickle.load(dt_file)

In [4]:
X_train=pd.read_csv("encoded_data/X_train.csv")
y_train=pd.read_csv("encoded_data/y_train.csv")
X_test=pd.read_csv("encoded_data/X_test.csv")
y_test=pd.read_csv("encoded_data/y_test.csv")

In [12]:
rf=RandomForestRegressor(n_estimators=70,random_state=42)
xgb=XGBRegressor()

### Stacking rf,xgb,dt with rf

In [13]:
rf_xgb_dt_RF=StackingRegressor(estimators=[('xgb', xgb_cv), ('rf', rf_cv),('dt',dt)], final_estimator=rf)

In [14]:
rf_xgb_dt_RF.fit(X_train,y_train)

In [15]:
y_pred=rf_xgb_dt_RF.predict(X_test)

In [131]:
y_pred_r2=r2_score(y_test,y_pred)

In [132]:
y_pred_mae=mean_absolute_error(y_test,y_pred)

In [133]:
y_pred_mse=mean_squared_error(y_test,y_pred)

In [179]:
y_pred_rmse=np.sqrt(y_pred_mse)

In [159]:
with open("Models/Ensembled_Models/rf_xgb_dt_with_rf.pkl",'wb') as stack1:
    pickle.dump(rf_xgb_dt_RF,stack1)

### Stacking rf,xgb,dt with xgb

In [17]:
rf_xgb_dt_XGB=StackingRegressor(estimators=[('xgb',xgb_cv),('rf',rf_cv),('dt',dt)],final_estimator=xgb)

In [19]:
rf_xgb_dt_XGB.fit(X_train,y_train)

In [20]:
y_pred1=rf_xgb_dt_XGB.predict(X_test)

In [134]:
y_pred1_r2=r2_score(y_test,y_pred1)

In [136]:
y_pred1_mae=mean_absolute_error(y_test,y_pred1)

In [137]:
y_pred1_mse=mean_squared_error(y_test,y_pred1)

In [180]:
y_pred1_rmse=np.sqrt(y_pred1_mse)

In [160]:
with open("Models/Ensembled_Models/rf_xgb_dt_with_xgb.pkl",'wb') as stack2:
    pickle.dump(rf_xgb_dt_XGB,stack2)

### Stacking rf,xgb with rf

In [22]:
RF=RandomForestRegressor(n_estimators=60,random_state=42)

In [23]:
rf_xgb_RF=StackingRegressor(estimators=[('xgb', xgb_cv), ('rf', rf_cv)], final_estimator=RF)

In [24]:
rf_xgb_RF.fit(X_train,y_train)

In [25]:
y_pred2=rf_xgb_RF.predict(X_test)

In [138]:
y_pred2_r2=r2_score(y_test,y_pred2)

In [139]:
y_pred2_mae=mean_absolute_error(y_test,y_pred2)

In [140]:
y_pred2_mse=mean_squared_error(y_test,y_pred2)

In [181]:
y_pred2_rmse=np.sqrt(y_pred2_mse)

In [161]:
with open("Models/Ensembled_Models/rf_xgb_with_RF.pkl",'wb') as stack3:
    pickle.dump(rf_xgb_RF,stack3)

### Stacking rf,xgb with xgb

In [29]:
XGB=XGBRegressor()

In [30]:
rf_xgb_XGB=StackingRegressor(estimators=[('xgb', xgb_cv), ('rf', rf_cv)], final_estimator=XGB)

In [31]:
rf_xgb_XGB.fit(X_train,y_train)

In [32]:
y_pred3=rf_xgb_XGB.predict(X_test)

In [141]:
y_pred3_r2=r2_score(y_test,y_pred3)

In [142]:
y_pred3_mae=mean_absolute_error(y_test,y_pred3)

In [143]:
y_pred3_mse=mean_squared_error(y_test,y_pred3)

In [182]:
y_pred3_rmse=np.sqrt(y_pred3_mse)

In [162]:
with open("Models/Ensembled_Models/rf_xgb_with_XGB.pkl",'wb') as stack4:
    pickle.dump(rf_xgb_XGB,stack4)

### Stacking rf,xgb with rf with RandomizedSearch_CV

In [151]:
from sklearn.model_selection import RandomizedSearchCV

In [154]:
rf1=RandomForestRegressor()

In [152]:
### HyperParameter Tuning ###
n_estimators=[int(i) for i in np.linspace(start=40,stop=110,num=8)]

max_features=['auto','sqrt']

max_depth=[int(i) for i in np.linspace(5,30,num=6)]

min_samples_split = [2,5,10,15,100]

min_samples_leaf = [1,2,5,10]

In [153]:
random_grid={'n_estimators':n_estimators,
              'max_features':max_features,
              'max_depth':max_depth,
              'min_samples_split':min_samples_split,
              'min_samples_leaf':min_samples_leaf}

In [155]:
RF_cv=RandomizedSearchCV(estimator=rf1,param_distributions=random_grid,scoring="neg_mean_squared_error",n_iter=10,cv=5,verbose=2,random_state=42,n_jobs=1)

In [156]:
rf_xgb_RF_CV=StackingRegressor(estimators=[('xgb', xgb_cv), ('rf', rf_cv)], final_estimator=RF_cv)

In [157]:
rf_xgb_RF_CV.fit(X_train,y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] END max_depth=20, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=  32.9s
[CV] END max_depth=20, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=  32.7s
[CV] END max_depth=20, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=  32.9s
[CV] END max_depth=20, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=  32.8s
[CV] END max_depth=20, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=  33.0s
[CV] END max_depth=25, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=70; total time=  22.4s
[CV] END max_depth=25, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=70; total time=  22.5s
[CV] END max_depth=25, max_features=sqrt, min_samples_leaf=1, min_samples_split=10, n_estimators=70

In [171]:
y_pred4=rf_xgb_RF_CV.predict(X_test)

In [173]:
y_pred4_r2=r2_score(y_test,y_pred4)

In [174]:
y_pred4_mae=mean_absolute_error(y_test,y_pred4)

In [175]:
y_pred4_mse=mean_squared_error(y_test,y_pred)

In [184]:
y_pred4_rmse=np.sqrt(y_pred4_mse)

In [170]:
with open("Models/Ensembled_Models/rf_xgb_with_rf_cv.pkl",'wb')as stack5:
    pickle.dump(rf_xgb_RF_CV,stack5)

#### R2_scores

In [185]:
print(f'rf_dt_xgb_with_rf={y_pred_r2}')
print(f'rf_dt_xgb_with_xgb={y_pred1_r2}')
print(f'rf_xgb_with_rf={y_pred2_r2}')
print(f'rf_xgb_with_xgb={y_pred3_r2}')
print(f'rf_xgb_with_rf_cv={y_pred4_r2}')

rf_dt_xgb_with_rf=0.9245558613962133
rf_dt_xgb_with_xgb=0.919178650699349
rf_xgb_with_rf=0.9180679426485602
rf_xgb_with_xgb=0.9176496977710005
rf_xgb_with_rf_cv=0.9329887794428191


#### Mean Absolute Error

In [186]:
print(f'rf_dt_xgb_with_rf={y_pred_mae}')
print(f'rf_dt_xgb_with_xgb={y_pred1_mae}')
print(f'rf_xgb_with_rf={y_pred2_mae}')
print(f'rf_xgb_with_xgb={y_pred3_mae}')
print(f'rf_xgb_with_rf_cv={y_pred4_mae}')

rf_dt_xgb_with_rf=8768.969774376224
rf_dt_xgb_with_xgb=8680.716631337225
rf_xgb_with_rf=9226.681563209102
rf_xgb_with_xgb=8704.064472574504
rf_xgb_with_rf_cv=8423.664865396033


#### Mean Squared Error

In [187]:
print(f'rf_dt_xgb_with_rf={y_pred_mse}')
print(f'rf_dt_xgb_with_xgb={y_pred1_mse}')
print(f'rf_xgb_with_rf={y_pred2_mse}')
print(f'rf_xgb_with_xgb={y_pred3_mse}')
print(f'rf_xgb_with_rf_cv={y_pred4_mse}')

rf_dt_xgb_with_rf=1136410384.628233
rf_dt_xgb_with_xgb=1217406976.1373827
rf_xgb_with_rf=1234137502.6774957
rf_xgb_with_xgb=1240437499.350166
rf_xgb_with_rf_cv=1136410384.628233


### Root Mean Squared Error

In [188]:
print(f'rf_dt_xgb_with_rf={y_pred_rmse}')
print(f'rf_dt_xgb_with_xgb={y_pred1_rmse}')
print(f'rf_xgb_with_rf={y_pred2_rmse}')
print(f'rf_xgb_with_xgb={y_pred3_rmse}')
print(f'rf_xgb_with_rf_cv={y_pred4_rmse}')

rf_dt_xgb_with_rf=33710.686504849364
rf_dt_xgb_with_xgb=34891.35962007475
rf_xgb_with_rf=35130.293233582546
rf_xgb_with_xgb=35219.84524881059
rf_xgb_with_rf_cv=33710.686504849364


In [34]:
def initialize(state,crop,season,area,n,p,k,ph,rain,temp):
    input_data={
        'State_Name_':state.lower().replace(" ","_"),
        'Crop_':crop.lower(),
        'Crop_Type_':season.lower(),
        'N':n,
        'P':p,
        'K':k,
        'pH':ph,
        'rainfall':rain,
        'temperature':temp,
        'Area_in_hectares':area
    }
    return input_data

In [115]:
temp_df=pd.DataFrame(columns=[X_train.columns])

In [202]:
df_values=initialize('uttarakhand','potato','kharif',2234,180,60,90,4.94,1110.78,25.67)

In [203]:
temp_df.loc[0]=[0]*len(temp_df.columns)

In [204]:
temp_df

Unnamed: 0,N,P,K,pH,rainfall,temperature,Area_in_hectares,State_Name_andaman_and_nicobar_islands,State_Name_andhra_pradesh,State_Name_arunachal_pradesh,...,Crop_sweetpotato,Crop_tapioca,Crop_tomato,Crop_turmeric,Crop_watermelon,Crop_wheat,Crop_Type_kharif,Crop_Type_rabi,Crop_Type_summer,Crop_Type_whole year
0,0,0,0,0.0,0.0,0.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [205]:
for key,value in df_values.items():
    col_name=key
    if(type(value)==str):
        col_name+=value
        temp_df.at[0,col_name]=1
        continue
    temp_df.at[0,col_name]=value
    

In [206]:
temp_df

Unnamed: 0,N,P,K,pH,rainfall,temperature,Area_in_hectares,State_Name_andaman_and_nicobar_islands,State_Name_andhra_pradesh,State_Name_arunachal_pradesh,...,Crop_sweetpotato,Crop_tapioca,Crop_tomato,Crop_turmeric,Crop_watermelon,Crop_wheat,Crop_Type_kharif,Crop_Type_rabi,Crop_Type_summer,Crop_Type_whole year
0,180,60,90,4.94,1110.78,25.67,2234,0,0,0,...,0,0,0,0,0,0,1,0,0,0


#### model testing

In [207]:
rf_xgb_dt_RF_pred=rf_xgb_dt_RF.predict(temp_df)
rf_xgb_dt_XGB_pred=rf_xgb_dt_XGB.predict(temp_df)
rf_xgb_RF_pred=rf_xgb_RF.predict(temp_df)
rf_xgb_XGB_pred=rf_xgb_XGB.predict(temp_df)
rf_cv_pred=rf_cv.predict(temp_df)
xgb_cv_pred=xgb_cv.predict(temp_df)
rf_xgb_rf_cv=rf_xgb_RF_CV.predict(temp_df)

In [208]:
print(rf_xgb_dt_RF_pred)
print(rf_xgb_dt_XGB_pred)
print(rf_xgb_RF_pred)
print(rf_xgb_XGB_pred)
print(rf_cv_pred)
print(xgb_cv_pred)
print(rf_xgb_rf_cv)

[23845.422]
[22264.896]
[24273.23333333]
[22284.34]
[20413.57079126]
[21738.465]
[20382.06525615]
