In [1]:
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load the dataset
df = pd.read_csv("/content/new_crop_recommendation.csv")

# Encode Soilcolor and label
soil_encoder = LabelEncoder()
df['Soilcolor'] = soil_encoder.fit_transform(df['Soilcolor'])

label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

# Separate features and target
X = df.drop("label", axis=1)
y = df["label"]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save model and encoders
joblib.dump(model, "crop_model.pkl")
joblib.dump(soil_encoder, "soil_encoder.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")

# Optional: print accuracy
print("Model trained and saved successfully!")
print(f"Training Accuracy: {model.score(X_train, y_train)*100:.2f}%")
print(f"Test Accuracy: {model.score(X_test, y_test)*100:.2f}%")


Model trained and saved successfully!
Training Accuracy: 100.00%
Test Accuracy: 71.30%
