In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv("final_merged_crop_dataset.csv")

X = df[["Soil Moisture", "Environment Temperature", "Environment Humidity", "Environment Light Intensity"]]
y = df["Crop"]

le = LabelEncoder()
y_encoded = le.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))


Confusion Matrix:
[[328   0  10   0   0  12]
 [  0 336   7   0   0   0]
 [  3  33 306   0   0   0]
 [  0   0   0 329  17   0]
 [  0   0   0  13 282   0]
 [  3   0   0   0   0 322]]

Classification Report:
              precision    recall  f1-score   support

      Barley       0.98      0.94      0.96       350
      Cotton       0.91      0.98      0.94       343
       Maize       0.95      0.89      0.92       342
        Rice       0.96      0.95      0.96       346
   Sugarcane       0.94      0.96      0.95       295
       Wheat       0.96      0.99      0.98       325

    accuracy                           0.95      2001
   macro avg       0.95      0.95      0.95      2001
weighted avg       0.95      0.95      0.95      2001



In [3]:
import joblib

joblib.dump(model, "crop_recommendation_model.pkl")

print("Model saved successfully!")


Model saved successfully!
