### Import Dependencies

In [231]:
import pandas as pd
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.metrics import accuracy_score
import numpy as np
import joblib

# **Data Exploraton and Preprocessing:**

• Explore the dataset to understand its structure, features, and distributons.

• Perform any necessary preprocessing steps such as handling missing values, encoding categorical variables, and scaling numerical features.

### Loading dataset

In [232]:
crop_data = pd.read_csv('/content/datasets/Crop_Dataset.csv')

### Preliminary investigation

In [233]:
crop_data.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,Total_Nutrients,Temperature_Humidity,Log_Rainfall,Label,Label_Encoded
0,90,42,43,20.879744,82.002744,6.502985,202.935536,175,1712.196283,5.317804,wheat,0
1,85,58,41,21.770462,80.319644,7.038096,226.655537,184,1748.595734,5.427834,wheat,0
2,60,55,44,23.004459,82.320763,7.840207,263.964248,159,1893.744627,5.579595,wheat,0
3,74,35,40,26.491096,80.158363,6.980401,242.864034,149,2123.482908,5.496611,wheat,0
4,78,42,42,20.130175,81.604873,7.628473,262.71734,162,1642.720357,5.574878,wheat,0


In [234]:
crop_data.describe()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,Total_Nutrients,Temperature_Humidity,Log_Rainfall,Label_Encoded
count,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0
mean,50.551818,53.362727,48.149091,25.616244,71.481779,6.46948,103.463655,152.063636,1854.231566,4.50873,10.5
std,36.917334,32.985883,50.647931,5.063749,22.263812,0.773938,54.958389,79.918669,699.366334,0.5454,6.345731
min,0.0,5.0,5.0,8.825675,14.25804,3.504752,20.211267,17.0,247.613182,3.054533,0.0
25%,21.0,28.0,20.0,22.769375,60.261953,5.971693,64.551686,94.0,1479.558114,4.182839,5.0
50%,37.0,51.0,32.0,25.598693,80.473146,6.425045,94.867624,146.0,1927.88334,4.562968,10.5
75%,84.25,68.0,49.0,28.561654,89.948771,6.923643,124.267508,179.0,2255.752423,4.830451,16.0
max,140.0,145.0,205.0,43.675493,99.981876,9.935091,298.560117,385.0,4073.159566,5.702315,21.0


Checking for missing data

In [235]:
crop_data.isnull().sum()

N                       0
P                       0
K                       0
temperature             0
humidity                0
ph                      0
rainfall                0
Total_Nutrients         0
Temperature_Humidity    0
Log_Rainfall            0
Label                   0
Label_Encoded           0
dtype: int64

Searching for categorical variables

In [236]:
categorical_variables = (crop_data.dtypes == 'object')
categorical_columns = list(categorical_variables[categorical_variables].index)
print(categorical_columns)

['Label']


So there are no missing values in the dataset. We can now preprocess data.

### Preprocessing Dataset

Choosing dependent and independent variables

In [237]:
y = crop_data.Label.astype('category')
X = crop_data.drop(['Label', 'Label_Encoded'], axis=1)
y_cl = crop_data.Label_Encoded

Splitting dataset to train and test

In [238]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y)
X_train_cl, X_valid_cl, y_train_cl, y_valid_cl = train_test_split(X, y_cl)

Create XGBoost Matrices

In [239]:
dtrain = xgb.DMatrix(X_train_cl, y_train_cl)
dtest = xgb.DMatrix(X_valid_cl, y_valid_cl)

# **Model Training:**

• Choose an appropriate machine learning algorithm (e.g., Decision Trees, Random Forests, Support Vector Machines) for building the predictive model.

• Split the dataset into training and testng sets.

• Train the model using the training data.

In [240]:
params = {"objective": "multi:softprob", "num_class": 22}

results = xgb.cv(params, dtrain,
                 num_boost_round=1000,
                 nfold=5,
                 early_stopping_rounds=5)

model_xgb = xgb.XGBClassifier(n_estimators=1000, early_stopping_rounds=5)
model_xgb.fit(X_train_cl, y_train_cl,
             eval_set=[(X_valid_cl, y_valid_cl)], verbose=False)

In [241]:
results.head()

Unnamed: 0,train-mlogloss-mean,train-mlogloss-std,test-mlogloss-mean,test-mlogloss-std
0,0.96582,0.008688,1.070182,0.025884
1,0.679316,0.00576,0.781162,0.018078
2,0.495229,0.003804,0.593015,0.016301
3,0.367904,0.002717,0.462278,0.017414
4,0.276434,0.002069,0.366995,0.017916


In [242]:
pipeline = make_pipeline(RandomForestClassifier(random_state=1))
pipeline.fit(X_train, y_train)

# **Model Evaluaton:**

• Evaluate the trained model's accuracy in predictng the crop labels using the testing dataset.

• Provide insights into how well the model performs in suggestng appropriate crops based on
the given environmental conditons.

In [243]:
y_pred = model_xgb.predict(X_valid_cl)
predictions = model_xgb.classes_[np.argsort(model_xgb.predict_proba(X_valid_cl))[:, :-4:-1]]
classes = dict(zip(crop_data['Label_Encoded'],crop_data['Label'],))
[classes.get(key) for key in predictions[0]]

['barley', 'rapeseed', 'cauliflower']

In [244]:
accuracy = accuracy_score(y_valid_cl, y_pred)
print(accuracy)

0.9945454545454545


In [245]:
y_pred = pipeline.predict(X_valid)
predictions = pipeline.classes_[np.argsort(pipeline.predict_proba(X_valid))[:, :-4:-1]]

In [246]:
accuracy = accuracy_score(y_valid, y_pred)
print(accuracy)

0.9963636363636363


# **Joblib Model Creaton and Predicton:**

• Create a joblib model (.joblib) from the trained model.

• Use the created joblib model to make predictons on new environmental conditons.

• Calculate the accuracy of the predictons.

In [261]:
class model:
  def predict_crops(X_valid_cl):
    y_pred = model_xgb.predict(X_valid_cl)
    predictions = model_xgb.classes_[np.argsort(model_xgb.predict_proba(X_valid_cl))[:, :-4:-1]]
    classes = dict(zip(crop_data['Label_Encoded'],crop_data['Label'],))
    print([classes.get(key) for key in predictions[0]])

In [262]:
class model_rf:
  def predict_crops(X_valid_cl):
    y_pred = pipeline.predict(X_valid)
    predictions = pipeline.classes_[np.argsort(pipeline.predict_proba(X_valid))[:, :-4:-1]]
    print(predictions[0])

In [268]:
filename = 'model_rfc.sav'
joblib.dump(model, filename)

['model_rfc.sav']

In [269]:
loaded_model = joblib.load(filename)
result = loaded_model.predict_crops(X_valid)

['strawberries', 'wheat', 'rapeseed']
