# Step 1: Import libraries

In [43]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib
from sklearn.metrics import classification_report
import os


# Step 2: Load dataset

In [44]:
df = pd.read_csv(r"C:\Users\rajes\Downloads\Crop_recommendation.csv")  # Ensure this file is in the same folder
print("Dataset loaded successfully!")
print("Shape:", df.shape)
print(df.head())

Dataset loaded successfully!
Shape: (2200, 8)
    N   P   K  temperature   humidity        ph    rainfall label
0  90  42  43    20.879744  82.002744  6.502985  202.935536  rice
1  85  58  41    21.770462  80.319644  7.038096  226.655537  rice
2  60  55  44    23.004459  82.320763  7.840207  263.964248  rice
3  74  35  40    26.491096  80.158363  6.980401  242.864034  rice
4  78  42  42    20.130175  81.604873  7.628473  262.717340  rice


# Step 3: Define features and target

In [45]:
feature_cols = ['N','P','K','temperature','humidity','ph','rainfall']
X = df[feature_cols]
y = df['label']

# Step 4: Train-test split

In [46]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)

# Step 5: Build pipeline (scaler + random forest)

In [47]:
preprocess = ColumnTransformer([
    ("scale", StandardScaler(), feature_cols)
], remainder="drop")

pipe = Pipeline([
    ("prep", preprocess),
    ("rf", RandomForestClassifier(n_estimators=300, random_state=42))
])

# Step 6: Train model

In [48]:
pipe.fit(X_train, y_train)

In [49]:
y_pred = pipe.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9954545454545455

Classification Report:
               precision    recall  f1-score   support

       apple       1.00      1.00      1.00        20
      banana       1.00      1.00      1.00        20
   blackgram       1.00      0.95      0.97        20
    chickpea       1.00      1.00      1.00        20
     coconut       1.00      1.00      1.00        20
      coffee       1.00      1.00      1.00        20
      cotton       1.00      1.00      1.00        20
      grapes       1.00      1.00      1.00        20
        jute       0.95      1.00      0.98        20
 kidneybeans       1.00      1.00      1.00        20
      lentil       1.00      1.00      1.00        20
       maize       0.95      1.00      0.98        20
       mango       1.00      1.00      1.00        20
   mothbeans       1.00      1.00      1.00        20
    mungbean       1.00      1.00      1.00        20
   muskmelon       1.00      1.00      1.00        20
      orange       1.00    

# Step 7: Save trained model

In [50]:
model_bundle = {
    "pipeline": pipe,
    "feature_order": feature_cols
}
joblib.dump(model_bundle, "model.pkl")

print(" Model trained and saved as model.pkl")
print(" Model size:", os.path.getsize("model.pkl"), "bytes")

 Model trained and saved as model.pkl
 Model size: 11031845 bytes


# Step 8: Verify saved model

In [51]:
bundle = joblib.load("model.pkl")
print(" Loaded model keys:", bundle.keys())

 Loaded model keys: dict_keys(['pipeline', 'feature_order'])


# Step 10: Define recommend_crop functio

In [52]:
def recommend_crop(N, P, K, temperature, humidity, ph, rainfall):
    values = [[N, P, K, temperature, humidity, ph, rainfall]]
    input_df = pd.DataFrame(values, columns=feature_cols)  # correct format
    prediction = pipe.predict(input_df)[0]
    confidence = pipe.predict_proba(input_df).max()
    return prediction, confidence


 # Step 11: Test sample prediction

In [53]:
crop, confidence = recommend_crop(90, 40, 40, 25, 80, 6.5, 200)
print(f"Recommended Crop: {crop} (Confidence: {confidence*100:.1f}%)")

 Recommended Crop: rice (Confidence: 53.7%)


# Step 12: Interactive sliders for testing

In [54]:
import ipywidgets as widgets
from ipywidgets import interact, FloatSlider, IntSlider

def interactive_recommend(N, P, K, temperature, humidity, ph, rainfall):
    crop, confidence = recommend_crop(N, P, K, temperature, humidity, ph, rainfall)
    print(f"Recommended Crop: {crop}")
    print(f"Confidence: {confidence*100:.1f}%")

interact(
    interactive_recommend,
    N=IntSlider(min=0, max=150, step=1, value=90, description="Nitrogen"),
    P=IntSlider(min=0, max=150, step=1, value=40, description="Phosphorus"),
    K=IntSlider(min=0, max=150, step=1, value=40, description="Potassium"),
    temperature=FloatSlider(min=0, max=50, step=0.5, value=25, description="Temp (°C)"),
    humidity=FloatSlider(min=0, max=100, step=0.5, value=80, description="Humidity %"),
    ph=FloatSlider(min=0, max=14, step=0.1, value=6.5, description="pH"),
    rainfall=FloatSlider(min=0, max=300, step=1, value=200, description="Rainfall (mm)")
)

interactive(children=(IntSlider(value=90, description='Nitrogen', max=150), IntSlider(value=40, description='P…

<function __main__.interactive_recommend(N, P, K, temperature, humidity, ph, rainfall)>