In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib
import warnings
warnings.filterwarnings('ignore')

print("All libraries imported successfully!")

All libraries imported successfully!


In [2]:
df = pd.read_csv('Crop_recommendation.csv')
print("Dataset Shape:", df.shape)
print("\nFirst 5 rows:")
print(df.head())
print("\nDataset Info:")
print(df.info())
print("\nUnique Crops:")
print(df['label'].unique())
print("\nTotal Crops:", df['label'].nunique())

Dataset Shape: (2200, 8)

First 5 rows:
    N   P   K  temperature   humidity        ph    rainfall label
0  90  42  43    20.879744  82.002744  6.502985  202.935536  rice
1  85  58  41    21.770462  80.319644  7.038096  226.655537  rice
2  60  55  44    23.004459  82.320763  7.840207  263.964248  rice
3  74  35  40    26.491096  80.158363  6.980401  242.864034  rice
4  78  42  42    20.130175  81.604873  7.628473  262.717340  rice

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            2200 non-null   int64  
 1   P            2200 non-null   int64  
 2   K            2200 non-null   int64  
 3   temperature  2200 non-null   float64
 4   humidity     2200 non-null   float64
 5   ph           2200 non-null   float64
 6   rainfall     2200 non-null   float64
 7   label        2200 non-null   object 
dtypes: float64(4), int

In [3]:
# Separate features and target
X = df[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
y = df['label']

print("Features Shape:", X.shape)
print("Target Shape:", y.shape)

print("\nCrop Distribution:")
print(y.value_counts())

# Encode target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

print("\nLabel Encoding Mapping:")
for i, crop in enumerate(label_encoder.classes_):
    print(f"{i}: {crop}")

Features Shape: (2200, 7)
Target Shape: (2200,)

Crop Distribution:
label
rice           100
maize          100
chickpea       100
kidneybeans    100
pigeonpeas     100
mothbeans      100
mungbean       100
blackgram      100
lentil         100
pomegranate    100
banana         100
mango          100
grapes         100
watermelon     100
muskmelon      100
apple          100
orange         100
papaya         100
coconut        100
cotton         100
jute           100
coffee         100
Name: count, dtype: int64

Label Encoding Mapping:
0: apple
1: banana
2: blackgram
3: chickpea
4: coconut
5: coffee
6: cotton
7: grapes
8: jute
9: kidneybeans
10: lentil
11: maize
12: mango
13: mothbeans
14: mungbean
15: muskmelon
16: orange
17: papaya
18: pigeonpeas
19: pomegranate
20: rice
21: watermelon


In [4]:
# Split data: 80% training, 20% testing
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, 
    test_size=0.2, 
    random_state=42
)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Training Set Size:", X_train_scaled.shape)
print("Testing Set Size:", X_test_scaled.shape)

Training Set Size: (1760, 7)
Testing Set Size: (440, 7)


In [5]:
# Create and train Random Forest model
model = RandomForestClassifier(
    n_estimators=100,
    random_state=42,
    n_jobs=-1
)

print("Training model...")
model.fit(X_train_scaled, y_train)
print("Model trained successfully!")

# predictions
y_pred = model.predict(X_test_scaled)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy: {accuracy * 100:.2f}%")

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

Training model...
Model trained successfully!

Model Accuracy: 99.32%

Classification Report:
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       1.00      1.00      1.00        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      1.00      1.00        27
      coffee       1.00      1.00      1.00        17
      cotton       1.00      1.00      1.00        17
      grapes       1.00      1.00      1.00        14
        jute       0.92      1.00      0.96        23
 kidneybeans       1.00      1.00      1.00        20
      lentil       0.92      1.00      0.96        11
       maize       1.00      1.00      1.00        21
       mango       1.00      1.00      1.00        19
   mothbeans       1.00      0.96      0.98        24
    mungbean       1.00      1.00      1.00        19
   muskmelon       1.00      1.00      1.

In [6]:
# Check which features are most important
feature_importance = pd.DataFrame({
    'Feature': X.columns,
    'Importance': model.feature_importances_
}).sort_values('Importance', ascending=False)

print("Feature Importance:")
print(feature_importance)

Feature Importance:
       Feature  Importance
6     rainfall    0.227036
4     humidity    0.211279
2            K    0.181222
1            P    0.143622
0            N    0.108859
3  temperature    0.075682
5           ph    0.052301


In [7]:
# trained model
joblib.dump(model, 'crop_recommendation_model.pkl')
print("✓ Saved: crop_recommendation_model.pkl")

# scaler
joblib.dump(scaler, 'scaler.pkl')
print("✓ Saved: scaler.pkl")

# Save the label encoder
joblib.dump(label_encoder, 'label_encoder.pkl')
print("✓ Saved: label_encoder.pkl")

# Save feature names
feature_names = ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']
joblib.dump(feature_names, 'feature_names.pkl')
print("✓ Saved: feature_names.pkl")

# Save crops list
crops_list = list(label_encoder.classes_)
joblib.dump(crops_list, 'crops_list.pkl')
print("✓ Saved: crops_list.pkl")

print(f"\n✅ All files saved successfully!")
print(f"Available crops: {crops_list}")

Saving model files...
✓ Saved: crop_recommendation_model.pkl
✓ Saved: scaler.pkl
✓ Saved: label_encoder.pkl
✓ Saved: feature_names.pkl
✓ Saved: crops_list.pkl

✅ All files saved successfully!
Available crops: ['apple', 'banana', 'blackgram', 'chickpea', 'coconut', 'coffee', 'cotton', 'grapes', 'jute', 'kidneybeans', 'lentil', 'maize', 'mango', 'mothbeans', 'mungbean', 'muskmelon', 'orange', 'papaya', 'pigeonpeas', 'pomegranate', 'rice', 'watermelon']


In [8]:
# Test with sample data
test_input = np.array([[90, 42, 43, 20.88, 82.00, 6.50, 202.93]])

# Scale the input
test_input_scaled = scaler.transform(test_input)

# Predict
prediction = model.predict(test_input_scaled)[0]
probabilities = model.predict_proba(test_input_scaled)[0]

# Get crop name
predicted_crop = label_encoder.classes_[prediction]
confidence = np.max(probabilities) * 100

print("Test Input:")
print(f"N=90, P=42, K=43, Temp=20.88°C, Humidity=82%, pH=6.50, Rainfall=202.93mm")
print(f"\nPredicted Crop: {predicted_crop.upper()}")
print(f"Confidence: {confidence:.2f}%")

Test Input:
N=90, P=42, K=43, Temp=20.88°C, Humidity=82%, pH=6.50, Rainfall=202.93mm

Predicted Crop: RICE
Confidence: 99.00%
