In [7]:
import numpy as np 
import pandas as pd

In [8]:
crops = pd.read_csv("selected_crops_temp.csv")
potato_crop = crops.loc[crops["CROP TYPE"] == "POTATO"]
potato_crop.head()

Unnamed: 0,CROP TYPE,SOIL TYPE,REGION,WEATHER CONDITION,TEMP_MIN,TEMP_MAX,WATER REQUIREMENT
0,POTATO,DRY,DESERT,NORMAL,10,20,8.5
1,POTATO,DRY,DESERT,SUNNY,10,20,10.0
2,POTATO,DRY,DESERT,WINDY,10,20,9.5
3,POTATO,DRY,DESERT,RAINY,10,20,0.5
4,POTATO,DRY,DESERT,NORMAL,20,30,9.5


In [9]:
potato_crop.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 192 entries, 0 to 191
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   CROP TYPE          192 non-null    object 
 1   SOIL TYPE          192 non-null    object 
 2   REGION             192 non-null    object 
 3   WEATHER CONDITION  192 non-null    object 
 4   TEMP_MIN           192 non-null    int64  
 5   TEMP_MAX           192 non-null    int64  
 6   WATER REQUIREMENT  192 non-null    float64
dtypes: float64(1), int64(2), object(4)
memory usage: 12.0+ KB


In [10]:
potato_crop.isnull().sum()

CROP TYPE            0
SOIL TYPE            0
REGION               0
WEATHER CONDITION    0
TEMP_MIN             0
TEMP_MAX             0
WATER REQUIREMENT    0
dtype: int64

In [11]:
potato_crop = potato_crop.drop("CROP TYPE", axis = 1)

In [12]:
potato_crop.head()

Unnamed: 0,SOIL TYPE,REGION,WEATHER CONDITION,TEMP_MIN,TEMP_MAX,WATER REQUIREMENT
0,DRY,DESERT,NORMAL,10,20,8.5
1,DRY,DESERT,SUNNY,10,20,10.0
2,DRY,DESERT,WINDY,10,20,9.5
3,DRY,DESERT,RAINY,10,20,0.5
4,DRY,DESERT,NORMAL,20,30,9.5


In [13]:
X = potato_crop.drop("WATER REQUIREMENT", axis = 1)
y = potato_crop["WATER REQUIREMENT"]

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [15]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

one = OneHotEncoder(handle_unknown = "ignore", sparse = False)
categorical_features = ["SOIL TYPE", "REGION", "WEATHER CONDITION"]
transformer = ColumnTransformer([("one", one, categorical_features)], remainder = "passthrough")

transformed_X_train = transformer.fit_transform(X_train)
transformed_X_train



array([[ 0.,  0.,  1., ...,  1., 20., 30.],
       [ 1.,  0.,  0., ...,  0., 40., 50.],
       [ 0.,  1.,  0., ...,  0., 10., 20.],
       ...,
       [ 0.,  1.,  0., ...,  0., 20., 30.],
       [ 0.,  1.,  0., ...,  0., 30., 40.],
       [ 1.,  0.,  0., ...,  0., 20., 30.]])

In [16]:
transformed_X_train.shape

(153, 13)

In [17]:
X["WEATHER CONDITION"].value_counts()

NORMAL    48
SUNNY     48
WINDY     48
RAINY     48
Name: WEATHER CONDITION, dtype: int64

In [18]:
X["SOIL TYPE"].value_counts()

DRY      64
HUMID    64
WET      64
Name: SOIL TYPE, dtype: int64

In [19]:
X["REGION"].value_counts()

DESERT        48
SEMI ARID     48
SEMI HUMID    48
HUMID         48
Name: REGION, dtype: int64

In [20]:
transformed_X_test = transformer.transform(X_test)
transformed_X_test

array([[ 0.,  1.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  1.,  0., 10., 20.],
       [ 1.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  1., 20., 30.],
       [ 0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1., 20., 30.],
       [ 0.,  0.,  1.,  0.,  1.,  0.,  0.,  1.,  0.,  0.,  0., 30., 40.],
       [ 0.,  1.,  0.,  1.,  0.,  0.,  0.,  1.,  0.,  0.,  0., 40., 50.],
       [ 1.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.,  0.,  0., 20., 30.],
       [ 0.,  0.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  1.,  0., 40., 50.],
       [ 1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  1.,  0., 30., 40.],
       [ 0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.,  0.,  1.,  0., 10., 20.],
       [ 1.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  0., 10., 20.],
       [ 0.,  1.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  0.,  0., 10., 20.],
       [ 0.,  1.,  0.,  1.,  0.,  0.,  0.,  1.,  0.,  0.,  0., 20., 30.],
       [ 1.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  0.,  0., 30., 40.],
       [ 0.,  0.,  1.,  0.,  1.,  0., 

In [21]:
transformed_X_test.shape

(39, 13)

In [22]:
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()
model.fit(transformed_X_train, y_train)
model.score(transformed_X_test, y_test)*100

95.58201417137803

In [23]:
y_preds = model.predict(transformed_X_test)
y_preds

array([ 7.6749 ,  8.677  ,  6.51455,  5.0328 ,  8.754  ,  1.0205 ,
        8.79   , 11.061  ,  5.97815,  0.4685 ,  5.2804 ,  7.0705 ,
        9.6645 ,  6.172  ,  0.8865 ,  2.5155 ,  7.9225 ,  8.0309 ,
        3.9801 ,  6.82655, 10.1202 ,  9.5635 ,  0.4005 ,  5.44675,
        0.211  ,  7.45945,  1.117  ,  8.077  ,  0.9395 ,  0.7625 ,
        9.1945 ,  9.464  ,  0.299  ,  7.5629 ,  8.989  ,  7.1081 ,
        7.2815 ,  7.4885 ,  5.5738 ])

In [24]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test, y_preds)
mse

0.48433404416666587

In [25]:
from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(y_test, y_preds)
mae

0.5487192307692303

In [26]:
from sklearn.metrics import r2_score
score = r2_score(y_test, y_preds)
score

0.9558201417137804

In [27]:
adjusted_r2 = 1 - (1-score)*(len(y_test)-1)/(len(y_test)-transformed_X_test.shape[1]-1)
adjusted_r2

0.9328466154049462

In [28]:
## with linear regression
from sklearn.linear_model import Ridge

ridgeModel = Ridge()
ridgeModel.fit(transformed_X_train, y_train)
ridgeModel.score(transformed_X_test, y_test)

0.934096491355056

In [29]:
## with SVM
from sklearn.svm import SVR
svmModel = SVR()
svmModel.fit(transformed_X_train, y_train)
svmModel.score(transformed_X_test, y_test)*100

3.263092845749571

In [30]:
## with nuSVM
from sklearn.svm import NuSVR
nuSVMModel = NuSVR()
nuSVMModel.fit(transformed_X_train, y_train)
nuSVMModel.score(transformed_X_test, y_test)

0.10070019264453867

In [31]:
X.head()

Unnamed: 0,SOIL TYPE,REGION,WEATHER CONDITION,TEMP_MIN,TEMP_MAX
0,DRY,DESERT,NORMAL,10,20
1,DRY,DESERT,SUNNY,10,20
2,DRY,DESERT,WINDY,10,20
3,DRY,DESERT,RAINY,10,20
4,DRY,DESERT,NORMAL,20,30


In [32]:
X.value_counts()

SOIL TYPE  REGION      WEATHER CONDITION  TEMP_MIN  TEMP_MAX
DRY        DESERT      NORMAL             10        20          1
                                          20        30          1
HUMID      SEMI HUMID  SUNNY              30        40          1
                                          40        50          1
                       WINDY              10        20          1
                                                               ..
           DESERT      NORMAL             30        40          1
                                          40        50          1
                       RAINY              10        20          1
                                          20        30          1
WET        SEMI HUMID  WINDY              40        50          1
Length: 192, dtype: int64

In [33]:
temp = ["DRY", "DESERT", "WINDY", 35, 55]

In [34]:
temp = np.array(temp)
temp

array(['DRY', 'DESERT', 'WINDY', '35', '55'], dtype='<U11')

In [35]:
temp = temp.reshape(1, -1)
temp.shape

(1, 5)

In [36]:
temp_df = pd.DataFrame(temp, columns = X.columns )
temp_df

Unnamed: 0,SOIL TYPE,REGION,WEATHER CONDITION,TEMP_MIN,TEMP_MAX
0,DRY,DESERT,WINDY,35,55


In [37]:
transformed_temp = transformer.transform(temp_df)
transformed_temp.shape

(1, 13)

In [38]:
X.columns

Index(['SOIL TYPE', 'REGION', 'WEATHER CONDITION', 'TEMP_MIN', 'TEMP_MAX'], dtype='object')

In [39]:
temp_preds = model.predict(transformed_temp)
temp_preds

array([12.059])

In [40]:
## pickling 
import pickle

In [41]:
pickle.dump(model, open("potato_model.pkl", "wb"))

In [42]:
pickle.dump(transformer, open("potato_tranformer.pkl", "wb"))

In [43]:
pickle.dump(X.columns, open("crop_columns.pkl", "wb"))

In [44]:
from joblib import dump, load
dump(transformer, filename = "potato_transformer.joblib")

['potato_transformer.joblib']

In [45]:
tranformer_test = load(filename = "potato_transformer.joblib")


In [46]:
## old ways
from sklearn.preprocessing import OneHotEncoder
one = OneHotEncoder(handle_unknown = "ignore", sparse = False)

In [47]:
X_new = X[["SOIL TYPE", "REGION", "WEATHER CONDITION"]]
X_new

Unnamed: 0,SOIL TYPE,REGION,WEATHER CONDITION
0,DRY,DESERT,NORMAL
1,DRY,DESERT,SUNNY
2,DRY,DESERT,WINDY
3,DRY,DESERT,RAINY
4,DRY,DESERT,NORMAL
...,...,...,...
187,WET,HUMID,RAINY
188,WET,HUMID,NORMAL
189,WET,HUMID,SUNNY
190,WET,HUMID,WINDY


In [48]:
one.fit(X_new)



In [49]:
X_new_trans = one.transform(X_new)
X_new_trans

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 1., 0.],
       [1., 0., 0., ..., 0., 0., 1.],
       ...,
       [0., 0., 1., ..., 0., 1., 0.],
       [0., 0., 1., ..., 0., 0., 1.],
       [0., 0., 1., ..., 1., 0., 0.]])

In [50]:
X_new_trans.shape

(192, 11)

In [51]:
import sklearn
sklearn.__version__


'1.2.2'

In [52]:
pd.__version__

'1.5.3'

In [53]:
np.__version__

'1.24.2'

In [54]:
import joblib
joblib.__version__

'1.2.0'