In [1]:
import numpy as np
import pandas as pd
import warnings

warnings.filterwarnings("ignore")

# Reading dataset
df = pd.read_csv("Fertilizer Prediction.csv")

df.head()
df.describe()
df['Soil Type'].unique()

# Preprocessing using One-Hot Encoder
y = df['Fertilizer Name'].copy()
X = df.drop('Fertilizer Name', axis=1).copy()

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [3,4])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

X[0]

# Train-test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=42)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

X_train[0]

# Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=100, criterion='gini', random_state=42)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

# Creating confusion matrix
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

classifier.score(X_test, y_test)

# Preprocessing using Label Encoder
from sklearn.preprocessing import LabelEncoder

encode_soil = LabelEncoder()
df['Soil Type'] = encode_soil.fit_transform(df['Soil Type'])
Soil_Type = pd.DataFrame(zip(encode_soil.classes_, encode_soil.transform(encode_soil.classes_)), columns=['Original', 'Encoded']).set_index('Original')

encode_crop = LabelEncoder()
df['Crop Type'] = encode_crop.fit_transform(df['Crop Type'])
Crop_Type = pd.DataFrame(zip(encode_crop.classes_, encode_crop.transform(encode_crop.classes_)), columns=['Original', 'Encoded']).set_index('Original')

encode_ferti = LabelEncoder()
df['Fertilizer Name'] = encode_ferti.fit_transform(df['Fertilizer Name'])
Fertilizer = pd.DataFrame(zip(encode_ferti.classes_, encode_ferti.transform(encode_ferti.classes_)), columns=['Original', 'Encoded']).set_index('Original')

# Splitting the data into train and test
x_train, x_test, y_train, y_test = train_test_split(df.drop('Fertilizer Name', axis=1), df['Fertilizer Name'], test_size=0.2, random_state=1)
print('Shape of Splitting :')
print(f'x_train = {x_train.shape}, y_train = {y_train.shape}, x_test = {x_test.shape}, y_test = {y_test.shape}')

x_train.info()

# Random Forest Classifier
rand = RandomForestClassifier(random_state=42)
rand.fit(x_train, y_train)

pred_rand = rand.predict(x_test)

# Hyperparameter tuning with GridSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report

params = {
    'n_estimators': [300, 400, 500],
    'max_depth': [5, 10, 15],
    'min_samples_split': [2, 5, 8]
}

grid_rand = GridSearchCV(rand, params, cv=3, verbose=3, n_jobs=-1)
grid_rand.fit(x_train, y_train)

pred_rand = grid_rand.predict(x_test)
print(classification_report(y_test, pred_rand))
print('Best score:', grid_rand.best_score_)
print('Best params:', grid_rand.best_params_)

# Pickling the file
import pickle
pickle_out = open('classifier.pkl', 'wb')
pickle.dump(grid_rand, pickle_out)
pickle_out.close()

df.head()

# Predictive system
model = pickle.load(open('classifier.pkl', 'rb'))
ans = model.predict([[2, 62, 34, 3, 9, 22, 0, 20]])

fertilizer_dict = {0: "10-26-26", 1: "14-35-14", 2: "17-17-17", 3: "20-20", 4: "28-28", 5: "DAP", 6: "Urea"}
print(fertilizer_dict.get(ans[0], "Unknown Fertilizer"))


[[2 0 1 0 0 0 0]
 [0 4 0 0 0 0 0]
 [0 0 2 0 0 0 0]
 [0 0 0 1 0 0 0]
 [0 0 0 0 6 0 0]
 [0 0 0 0 0 7 0]
 [0 0 0 0 0 0 7]]
Shape of Splitting :
x_train = (79, 8), y_train = (79,), x_test = (20, 8), y_test = (20,)
<class 'pandas.core.frame.DataFrame'>
Index: 79 entries, 2 to 37
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   Temparature  79 non-null     int64
 1   Humidity     79 non-null     int64
 2   Moisture     79 non-null     int64
 3   Soil Type    79 non-null     int64
 4   Crop Type    79 non-null     int64
 5   Nitrogen     79 non-null     int64
 6   Potassium    79 non-null     int64
 7   Phosphorous  79 non-null     int64
dtypes: int64(8)
memory usage: 5.6 KB
Fitting 3 folds for each of 27 candidates, totalling 81 fits
              precision    recall  f1-score   support

           0       1.00      0.33      0.50         3
           1       0.75      1.00      0.86         3
           2       0.67      1.