In [1]:
# Install required packages (run this first in Colab)
!pip install scikit-learn pandas numpy

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pickle

# Load the dataset
df = pd.read_csv('crop_dataset2.csv')

# Display basic info
print("Dataset shape:", df.shape)
print("\nFirst few rows:")
print(df.head())
print("\nCrop distribution:")
print(df['label'].value_counts())

# Prepare features and target
features = ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']
X = df[features]
y = df['label']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = rf_model.predict(X_test_scaled)

# Performance Metrics
print("\n" + "="*50)
print("MODEL PERFORMANCE METRICS")
print("="*50)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Feature importance
feature_importance = pd.DataFrame({
    'feature': features,
    'importance': rf_model.feature_importances_
}).sort_values('importance', ascending=False)

print("\nFeature Importance:")
print(feature_importance)

# Save the model and scaler
model_data = {
    'model': rf_model,
    'scaler': scaler,
    'features': features
}

with open('crop_recommendation_model.pkl', 'wb') as f:
    pickle.dump(model_data, f)

print("\n" + "="*50)
print("Model saved as 'crop_recommendation_model.pkl'")
print("="*50)

# Example prediction function
def predict_crop(N, P, K, temperature, humidity, ph, rainfall):
    """
    Predict crop recommendation based on input parameters
    """
    # Load the saved model
    with open('crop_recommendation_model.pkl', 'rb') as f:
        model_data = pickle.load(f)

    model = model_data['model']
    scaler = model_data['scaler']

    # Prepare input data
    input_data = np.array([[N, P, K, temperature, humidity, ph, rainfall]])
    input_scaled = scaler.transform(input_data)

    # Make prediction
    prediction = model.predict(input_scaled)[0]
    probability = model.predict_proba(input_scaled)[0].max()

    return prediction, probability

# Test the prediction function
print("\nExample Prediction:")
sample_prediction, confidence = predict_crop(90, 42, 43, 20.88, 82.00, 6.50, 202.94)
print(f"Predicted Crop: {sample_prediction}")
print(f"Confidence: {confidence:.4f}")

Dataset shape: (2200, 8)

First few rows:
    N   P   K  temperature   humidity        ph    rainfall label
0  90  42  43    20.879744  82.002744  6.502985  202.935536  rice
1  85  58  41    21.770462  80.319644  7.038096  226.655537  rice
2  60  55  44    23.004459  82.320763  7.840207  263.964248  rice
3  74  35  40    26.491096  80.158363  6.980401  242.864034  rice
4  78  42  42    20.130175  81.604873  7.628473  262.717340  rice

Crop distribution:
label
rice           100
maize          100
chickpea       100
kidneybeans    100
pigeonpeas     100
mothbeans      100
mungbean       100
blackgram      100
lentil         100
pomegranate    100
banana         100
mango          100
grapes         100
watermelon     100
muskmelon      100
apple          100
orange         100
papaya         100
coconut        100
cotton         100
jute           100
coffee         100
Name: count, dtype: int64

MODEL PERFORMANCE METRICS
Accuracy: 0.9955

Classification Report:
              precision 



In [2]:
# Test the prediction function
print("\nExample Prediction:")
sample_prediction, confidence = predict_crop(14, 15, 12, 20.88, 82.00, 6.50, 202.94)
print(f"Predicted Crop: {sample_prediction}")
print(f"Confidence: {confidence:.4f}")


Example Prediction:
Predicted Crop: orange
Confidence: 0.3800


