In [None]:
df = pd.read_csv('/content/Crop_recommendation (1).csv')

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PowerTransformer

# Assuming your dataset is in a pandas DataFrame named 'df'
# Columns n, p, k, humidity, temperature, rainfall, ph are your features
# and the label column is named 'label'

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df.drop('label', axis=1), df['label'], test_size=0.2, random_state=42)

# Create a pipeline with the Yeo-Johnson transformation and the random forest model
pipeline = Pipeline([
    ('yeo-johnson', PowerTransformer()), # Apply Yeo-Johnson transformation
    ('rf', RandomForestClassifier(random_state=42)) # Random forest model
])

# Define the hyperparameters to be tuned using grid search
param_grid = {
    'rf__n_estimators': [50, 100, 200],
    'rf__max_depth': [None, 5, 10, 20],
    'rf__min_samples_split': [2, 5, 10],
    'rf__min_samples_leaf': [1, 2, 4],
    'rf__max_features': ['sqrt', 'log2']
}

# Perform grid search with 5-fold cross-validation
grid_search = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1)

# Fit the grid search to the training data
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_

# Print the best hyperparameters and the corresponding score
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)


Best parameters:  {'rf__max_depth': None, 'rf__max_features': 'sqrt', 'rf__min_samples_leaf': 1, 'rf__min_samples_split': 2, 'rf__n_estimators': 200}
Best score:  0.9954545454545455


In [None]:
from sklearn.model_selection import cross_val_score,cross_validate
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
scoring = ['precision_macro', 'recall_macro', 'f1_macro', 'accuracy']
# Perform 5-fold cross-validation with the best model
scores = cross_validate(best_model, X, y, cv=5, scoring=scoring)

print("Precision scores:", scores['test_precision_macro'])
print("Recall scores:", scores['test_recall_macro'])
print("F1 scores:", scores['test_f1_macro'])
print("Accuracy scores:", scores['test_accuracy'])

Precision scores: [0.9978355  0.99350649 0.99586777 0.99586777 0.99153876]
Recall scores: [0.99772727 0.99318182 0.99545455 0.99545455 0.99090909]
F1 scores: [0.99772585 0.99317755 0.99544315 0.99544315 0.99089486]
Accuracy scores: [0.99772727 0.99318182 0.99545455 0.99545455 0.99090909]


In [None]:
import joblib
joblib.dump(best_model, 'best_model.pkl')

['best_model.pkl']