In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.tree import DecisionTreeClassifier
import pickle

In [3]:
df = pd.read_csv("Crop_recommendation.csv")

# Encode labels
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])

# Show class distribution
label_counts = df['label'].value_counts()
label_names = le.inverse_transform(label_counts.index)
for name, count in zip(label_names, label_counts):
    print(f"There are  {count} {name} samples")

There are  100 rice samples
There are  100 maize samples
There are  100 jute samples
There are  100 cotton samples
There are  100 coconut samples
There are  100 papaya samples
There are  100 orange samples
There are  100 apple samples
There are  100 muskmelon samples
There are  100 watermelon samples
There are  100 grapes samples
There are  100 mango samples
There are  100 banana samples
There are  100 pomegranate samples
There are  100 lentil samples
There are  100 blackgram samples
There are  100 mungbean samples
There are  100 mothbeans samples
There are  100 pigeonpeas samples
There are  100 kidneybeans samples
There are  100 chickpea samples
There are  100 coffee samples


In [4]:
X = df.drop('label', axis=1)
y = df['label']

# # Scale the features using StandardScaler
# scaler = StandardScaler()
# X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)



X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Count of each class in training labels
train_counts = pd.Series(y_train).value_counts().sort_index()

# Map encoded labels back to class names
train_counts.index = le.inverse_transform(train_counts.index)

# Count of each class in testing labels
test_counts = pd.Series(y_test).value_counts().sort_index()
test_counts.index = le.inverse_transform(test_counts.index)

print("Class distribution in Training Dataset:")
print(train_counts)

print("\nClass distribution in Testing Dataset:")
print(test_counts)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Class distribution in Training Dataset:
apple          77
banana         79
blackgram      80
chickpea       74
coconut        73
coffee         83
cotton         83
grapes         86
jute           77
kidneybeans    80
lentil         89
maize          79
mango          81
mothbeans      76
mungbean       81
muskmelon      83
orange         86
papaya         77
pigeonpeas     77
pomegranate    77
rice           81
watermelon     81
Name: count, dtype: int64

Class distribution in Testing Dataset:
apple          23
banana         21
blackgram      20
chickpea       26
coconut        27
coffee         17
cotton         17
grapes         14
jute           23
kidneybeans    20
lentil         11
maize          21
mango          19
mothbeans      24
mungbean       19
muskmelon      17
orange         14
papaya         23
pigeonpeas     23
pomegranate    23
rice           19
watermelon     19
Name: count, dtype: int64


In [5]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train_scaled, y_train)

# Predict on test set
y_pred = dt.predict(X_test_scaled)

# Evaluate model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro', zero_division=0)
recall = recall_score(y_test, y_pred, average='macro', zero_division=0)
f1 = f1_score(y_test, y_pred, average='macro', zero_division=0)

print("\n\tDecision Tree Classification Report\n")
print(classification_report(y_test, y_pred, target_names=le.classes_, zero_division=0))

print("\nDecision Tree Model Evaluation")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")



	Decision Tree Classification Report

              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       0.95      1.00      0.98        20
    chickpea       1.00      1.00      1.00        26
     coconut       0.96      1.00      0.98        27
      coffee       1.00      1.00      1.00        17
      cotton       1.00      1.00      1.00        17
      grapes       1.00      1.00      1.00        14
        jute       0.92      0.96      0.94        23
 kidneybeans       1.00      1.00      1.00        20
      lentil       0.92      1.00      0.96        11
       maize       1.00      0.95      0.98        21
       mango       1.00      1.00      1.00        19
   mothbeans       1.00      0.92      0.96        24
    mungbean       1.00      1.00      1.00        19
   muskmelon       1.00      1.00      1.00        17
      orange       1.00      1.00      1.0

In [6]:
print("\nEnter the following inputs:")

features = ["Nitrogen (N)", "Phosphorus (P)", "Potassium (K)", "Temperature (°C)",
            "Humidity (%)", "pH", "Rainfall (mm)"]

user_input = []
for feature in features:
    value = float(input(f"{feature}: "))
    user_input.append(value)

input_df = pd.DataFrame([user_input], columns=X.columns)

# Scale user input with the same scaler
input_scaled = scaler.transform(input_df)

# Predict label
prediction = dt.predict(input_scaled)
predicted_crop = le.inverse_transform(prediction)[0]

print("Predicted Crop:", predicted_crop)


Enter the following inputs:


Nitrogen (N):  2
Phosphorus (P):  32
Potassium (K):  45
Temperature (°C):  32
Humidity (%):  82
pH:  5.03
Rainfall (mm):  203.5


Predicted Crop: coconut
