In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import joblib

# Load dataset
df = pd.read_csv("data/crop_recommendation.csv")

# Define crops grown in Nigeria
nigerian_crops = [
    "maize", "rice", "sorghum", "millet", "cowpea", "groundnut",
    "cassava", "yam", "sweet potato", "tomato", "pepper", "okra",
    "banana", "pineapple", "palm oil", "soybean", "cocoa", "rubber"
]

# Filter dataset for Nigerian crops
df = df[df["label"].isin(nigerian_crops)]

# Select only weather-related features
X = df[["temperature", "humidity", "rainfall"]]  # Ensure dataset has these columns
y = df["label"]

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Save the trained model & scaler
joblib.dump(model, "models/crop_recommendation_model.pkl")
joblib.dump(scaler, "models/scaler.pkl")

print("✅ Model retrained using only weather features & saved successfully!")


✅ Model retrained using only weather features & saved successfully!
