In [2]:
import pandas as pd
import numpy as np
import joblib
import os

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

print("=" * 70)
print("CROP RECOMMENDATION MODEL TRAINING")
print("=" * 70)

CROP RECOMMENDATION MODEL TRAINING


In [4]:
# ----------------------------------------------------------------------------
# PATH SETUP (RELATIVE & CONSISTENT)
# ----------------------------------------------------------------------------

if "_file_" in globals():
    CURRENT_DIR = os.path.dirname(os.path.abspath(_file_))
else:
    CURRENT_DIR = os.getcwd()

# Project root
BASE_DIR = os.path.dirname(CURRENT_DIR)

# CSV path (consistent everywhere)
DATA_PATH = os.path.join(
    BASE_DIR,
    "data",
    "processed",
    "Crop_recommendation.csv"
)

# Model paths
MODELS_DIR = os.path.join(BASE_DIR, "models")
MODEL_PATH = os.path.join(MODELS_DIR, "crop_recommendation_model.pkl")
SCALER_PATH = os.path.join(MODELS_DIR, "scaler2.pkl")

os.makedirs(MODELS_DIR, exist_ok=True)

print(f"Dataset path : {DATA_PATH}")
print(f"Model path   : {MODEL_PATH}")
print(f"Scaler path  : {SCALER_PATH}")
print()


Dataset path : C:\Users\KIIT\Downloads\AgriShield\data\processed\Crop_recommendation.csv
Model path   : C:\Users\KIIT\Downloads\AgriShield\models\crop_recommendation_model.pkl
Scaler path  : C:\Users\KIIT\Downloads\AgriShield\models\scaler2.pkl



In [8]:
# ----------------------------------------------------------------------------
# LOAD DATA
# ----------------------------------------------------------------------------

print("Loading dataset...")

try:
    df = pd.read_csv(DATA_PATH)
    print("Dataset loaded successfully")
    print(f"Shape: {df.shape}")
    print()

except FileNotFoundError:
    print("ERROR: Crop_recommendation.csv not found")
    print(f"Expected location: {DATA_PATH}")
    print("Ensure the file exists inside data/processed/")
    exit(1)

Loading dataset...
Dataset loaded successfully
Shape: (2200, 8)



In [12]:
# ----------------------------------------------------------------------------
# DATA PREPROCESSING
# ----------------------------------------------------------------------------

X = df.drop(columns=["label"])
y = df["label"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [14]:
# ----------------------------------------------------------------------------
# MODEL TRAINING
# ----------------------------------------------------------------------------

model = RandomForestClassifier(
    n_estimators=200,
    random_state=42,
    n_jobs=-1
)

model.fit(X_train_scaled, y_train)


In [16]:
# ----------------------------------------------------------------------------
# EVALUATION
# ----------------------------------------------------------------------------

y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)

print(f"Model Accuracy: {accuracy:.4f}")
print()
print("Classification Report:")
print(classification_report(y_test, y_pred))
print()


Model Accuracy: 0.9932

Classification Report:
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       1.00      1.00      1.00        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      1.00      1.00        27
      coffee       1.00      1.00      1.00        17
      cotton       1.00      1.00      1.00        17
      grapes       1.00      1.00      1.00        14
        jute       0.92      1.00      0.96        23
 kidneybeans       1.00      1.00      1.00        20
      lentil       0.92      1.00      0.96        11
       maize       1.00      1.00      1.00        21
       mango       1.00      1.00      1.00        19
   mothbeans       1.00      0.96      0.98        24
    mungbean       1.00      1.00      1.00        19
   muskmelon       1.00      1.00      1.00        17
      orange       1.00      1.00 

In [18]:
# ----------------------------------------------------------------------------
# SAVE MODEL & SCALER
# ----------------------------------------------------------------------------

try:
    joblib.dump(model, MODEL_PATH)
    joblib.dump(scaler, SCALER_PATH)
    print("Model and scaler saved successfully")

except Exception as e:
    print("Error saving model/scaler")
    print(e)

print("=" * 70)
print("TRAINING COMPLETED SUCCESSFULLY")
print("=" * 70)

Model and scaler saved successfully
TRAINING COMPLETED SUCCESSFULLY


In [17]:
# ============================================================================
# EVALUATE MODEL
# ============================================================================

print("üìä Evaluating model performance...")
print()

# Training accuracy
train_preds = model.predict(X_train_scaled)
train_accuracy = accuracy_score(y_train, train_preds)

# Testing accuracy
test_preds = model.predict(X_test_scaled)
test_accuracy = accuracy_score(y_test, test_preds)

print(f"üéØ Training Accuracy: {train_accuracy:.4f} ({train_accuracy*100:.2f}%)")
print(f"üéØ Testing Accuracy:  {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print()

# Detailed classification report
print("üìã Detailed Classification Report:")
print("=" * 80)
print(classification_report(y_test, test_preds))
print()

# Feature importance
feature_importance = pd.DataFrame({
    'feature': X.columns,
    'importance': model.feature_importances_
}).sort_values('importance', ascending=False)

print("üåü Feature Importance:")
print(feature_importance.to_string(index=False))
print()

üìä Evaluating model performance...



[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s


üéØ Training Accuracy: 1.0000 (100.00%)
üéØ Testing Accuracy:  0.9955 (99.55%)

üìã Detailed Classification Report:
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        20
      banana       1.00      1.00      1.00        20
   blackgram       1.00      0.95      0.97        20
    chickpea       1.00      1.00      1.00        20
     coconut       1.00      1.00      1.00        20
      coffee       1.00      1.00      1.00        20
      cotton       1.00      1.00      1.00        20
      grapes       1.00      1.00      1.00        20
        jute       0.95      1.00      0.98        20
 kidneybeans       1.00      1.00      1.00        20
      lentil       1.00      1.00      1.00        20
       maize       0.95      1.00      0.98        20
       mango       1.00      1.00      1.00        20
   mothbeans       1.00      1.00      1.00        20
    mungbean       1.00      1.00      1.00        20
   muskmelon    

[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:    0.0s finished


In [12]:
# ============================================================================
# SAVE MODELS
# ============================================================================

print("üíæ Saving trained model and scaler...")

try:
    # Save model
    joblib.dump(model, MODEL_SAVE_PATH)
    print(f"   ‚úÖ Model saved: {MODEL_SAVE_PATH}")
    
    # Save scaler
    joblib.dump(scaler, SCALER_SAVE_PATH)
    print(f"   ‚úÖ Scaler saved: {SCALER_SAVE_PATH}")
    
    # Verify files were created
    if os.path.exists(MODEL_SAVE_PATH) and os.path.exists(SCALER_SAVE_PATH):
        model_size = os.path.getsize(MODEL_SAVE_PATH) / 1024  # KB
        scaler_size = os.path.getsize(SCALER_SAVE_PATH) / 1024  # KB
        print(f"   üì¶ Model size: {model_size:.2f} KB")
        print(f"   üì¶ Scaler size: {scaler_size:.2f} KB")
    
except Exception as e:
    print(f"‚ùå Error saving models: {e}")
    exit(1)

print()
print("=" * 80)
print("‚úÖ TRAINING COMPLETE!")
print("=" * 80)
print()
print("Next steps:")
print("  1. Start the FastAPI server: uvicorn main:app --reload")
print("  2. Test the API at: http://localhost:8000/docs")
print("  3. Use the /api/recommend-crop endpoint")
print()

üíæ Saving trained model and scaler...
   ‚úÖ Model saved: C:\Users\KIIT\Downloads\AgriShield\backend\models\crop_recommendation_model.pkl
‚ùå Error saving models: name 'scaler' is not defined

‚úÖ TRAINING COMPLETE!

Next steps:
  1. Start the FastAPI server: uvicorn main:app --reload
  2. Test the API at: http://localhost:8000/docs
  3. Use the /api/recommend-crop endpoint

