In [None]:
# import the necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import warnings
warnings.filterwarnings('ignore')

In [None]:
# load the fake_dataset
fake_fertilizer = pd.read_csv("fake_dataset/Fertilizer Prediction.csv")
fake_fertilizer.head()

DAP - Diammoniun Phosphate; 14-35-14 - NPK; 28-28 - NP; and so

In [None]:
# check the shape of the fake_dataset
fake_fertilizer.shape

In [None]:
# check the basic info of the fake_dataset
fake_fertilizer.info()

In [None]:
# check the missing values in the fake_dataset
fake_fertilizer.isnull().sum()

In [None]:
# check the duplicated values in the fake_dataset
fake_fertilizer.duplicated().sum()

In [None]:
# check the basic statistics of the fake_dataset
fake_fertilizer.describe()

In [None]:
# Only select the numerical columns
fake_fertilizer_numeric = fake_fertilizer.select_dtypes(include=[np.number])
corr = fake_fertilizer_numeric.corr()
corr

In [None]:
sns.heatmap(corr,annot=True,cbar=True,cmap='coolwarm')
plt.show()

In [None]:
fake_fertilizer['Fertilizer Name'].value_counts()

In [None]:
# check the distribution of the temperature column
sns.histplot(fake_fertilizer['Temparature'], kde=True)
plt.show()

In [None]:
# Only select the numerical columns 
features = fake_fertilizer.select_dtypes(include=[np.number]).columns.tolist()
print(features)

In [None]:
# visualize the distribution of each feature
fig, ax = plt.subplots(2, 3, figsize=(15, 10))
for i, subplot in zip(features, ax.flatten()):
    sns.histplot(fake_fertilizer[i], ax=subplot, kde=True)
    subplot.set_title(i)
plt.tight_layout()
plt.show()

In [None]:
# plot scatter plot of each feature against the target
fig, ax = plt.subplots(2, 3, figsize=(15, 10))
for i, subplot in zip(features, ax.flatten()):
    sns.scatterplot(x=i, y='Fertilizer Name', fake_data=fake_fertilizer, ax=subplot)
plt.tight_layout()
plt.show()

#### Encoding the target column

In [None]:
fert_dict = {
'Urea':1,
'DAP':2,
'14-35-14':3,
'28-28':4,
'17-17-17':5,
'20-20':6,
'10-26-26':7,
}

In [None]:
fake_fertilizer['fert_no'] = fake_fertilizer['Fertilizer Name'].map(fert_dict)

In [None]:
fake_fertilizer['fert_no'].value_counts()

In [None]:
# drop the target column with name and keep the target column with numbers
fake_fertilizer.drop('Fertilizer Name',axis=1,inplace=True)
fake_fertilizer.head()

In [None]:
# Select the object columns
fake_fertilizer.select_dtypes(include=['object']).columns

In [None]:
# convert the categorical columns to numerical columns using labelencoder
lb = LabelEncoder()
fake_fertilizer["Soil Type"]=lb.fit_transform(fake_fertilizer['Soil Type'])
fake_fertilizer['Crop Type']=lb.fit_transform(fake_fertilizer['Crop Type'])

In [None]:
fake_fertilizer.head()

In [None]:
# split the fake_dataset into features and target
x = fake_fertilizer.drop('fert_no',axis=1)
y = fake_fertilizer['fert_no']
# print the shape of features and target
print(f"The shape of features is: {x.shape}")
print(f"The shape of target is: {y.shape}")

In [None]:
# split the fake_dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=42)

# print the shape of training and testing sets
print(f"The shape of x_train is: {x_train.shape}")
print(f"The shape of x_test is: {x_test.shape}")
print(f"The shape of y_train is: {y_train.shape}")
print(f"The shape of y_test is: {y_test.shape}")

### Scaling

In [None]:
# Scale the features using StandardScaler
sc = StandardScaler()

x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

### Training Models

In [None]:
# insialize the DecisionTreeClassifier
model = DecisionTreeClassifier()

In [None]:
# train the model
model.fit(x_train, y_train)

In [None]:
# evaluate the model on the test set and print the accuracy
accuracy = model.score(x_test, y_test)
print(f"The accuracy of the model is: {accuracy*100:.2f}%")

In [None]:
# evaluate the model on the training set and print the accuracy
accuracy = model.score(x_train, y_train)
print(f"The accuracy of the model on the training set is: {accuracy*100:.2f}%")

## Predictive System

In [None]:
def recommend_fake_fertilizer(Temparature, Humidity, Moisture, Soil_Type, Crop_Type, Nitrogen, Potassium, Phosphorous):
    features = np.array([[Temparature, Humidity, Moisture, Soil_Type, Crop_Type, Nitrogen, Potassium, Phosphorous]])
    transformed_features = sc.transform(features)
    prediction = model.predict(transformed_features).reshape(1,-1)
    fert_dict = {1: 'Urea', 2: 'DAP', 3: '14-35-14', 4: '28-28', 5: '17-17-17', 6: '20-20', 7: '10-26-26'}
    fake_fertilizer = [fert_dict[i] for i in prediction[0]]
    
    return f"{fake_fertilizer} is a best fake_fertilizer for the given conditions" 

In [None]:
# Given input values
Temparature = 26
Humidity = 0.5
Moisture = 0.6
Soil_Type = 2
Crop_Type = 3
Nitrogen = 10
Potassium = 15
Phosphorous = 6
    
# Use the recommendation function to get a prediction
recommend_fake_fertilizer(Temparature, Humidity, Moisture, Soil_Type, Crop_Type, Nitrogen, Potassium, Phosphorous)