In [61]:
## Implementation of Bayesian Optimization and Random Forests (bagging) 

In [8]:
!python -m pip uninstall bayesian-optimization --yes

Found existing installation: bayesian-optimization 1.4.3
Uninstalling bayesian-optimization-1.4.3:
  Successfully uninstalled bayesian-optimization-1.4.3


In [9]:
!pip cache purge

Files removed: 6


In [10]:
%pip install bayesian-optimization==1.4.1

Collecting bayesian-optimization==1.4.1
  Downloading bayesian_optimization-1.4.1-py3-none-any.whl.metadata (508 bytes)
Downloading bayesian_optimization-1.4.1-py3-none-any.whl (18 kB)
Installing collected packages: bayesian-optimization
Successfully installed bayesian-optimization-1.4.1
Note: you may need to restart the kernel to use updated packages.


Reason for being yanked: https://github.com/fmfn/BayesianOptimization/pull/388


In [25]:
pip show bayesian-optimization

Name: bayesian-optimization
Version: 1.4.1
Summary: Bayesian Optimization package
Home-page: https://github.com/fmfn/BayesianOptimization
Author: Fernando Nogueira
Author-email: fmfnogueira@gmail.com
License: 
Location: C:\Users\ADMIN\AppData\Local\Programs\Python\Python312\Lib\site-packages
Requires: colorama, numpy, scikit-learn, scipy
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [160]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from bayes_opt import BayesianOptimization
from sklearn.preprocessing import LabelEncoder

In [161]:
df = pd.read_csv('Crop_recommendation.csv')

In [162]:
crop_data = pd.read_csv('Crop_recommendation.csv')

In [163]:
X = crop_data.drop(columns = ['crop_label'])

In [164]:
y = crop_data['crop_label']

In [165]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [166]:
# Define the objective function for Bayesian Optimization
def rf_bo(n_estimators, max_depth, min_samples_split, min_samples_leaf, max_features):
    # Convert hyperparameters to integers where necessary
    n_estimators = int(n_estimators)
    max_depth = int(max_depth)
    min_samples_split = int(min_samples_split)
    min_samples_leaf = int(min_samples_leaf)
    
    # Initialize Random Forest classifier with current hyperparameters
    rf_model = RandomForestClassifier(n_estimators=n_estimators,
                                      max_depth=max_depth,
                                      min_samples_split=min_samples_split,
                                      min_samples_leaf=min_samples_leaf,
                                      random_state=42)
    
    # Fit the model
    rf_model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = rf_model.predict(X_test)
    
    # Calculate evaluation metrics (F1-score weighted)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=1)
    
    return f1

In [167]:
pbounds = {
    'n_estimators': (50, 200),
    'max_depth': (5, 20),
    'min_samples_split': (2, 10),
    'min_samples_leaf': (1, 5),
    'max_features': (0.1, 0.999)
}

In [168]:
# Perform Bayesian Optimization
optimizer = BayesianOptimization(f=rf_bo, pbounds=pbounds, random_state=42, verbose=2)

In [169]:
optimizer.maximize(init_points=5, n_iter=10)

|   iter    |  target   | max_depth | max_fe... | min_sa... | min_sa... | n_esti... |
-------------------------------------------------------------------------------------
| [0m1        [0m | [0m0.9151   [0m | [0m10.62    [0m | [0m0.9547   [0m | [0m3.928    [0m | [0m6.789    [0m | [0m73.4     [0m |
| [95m2        [0m | [95m0.9175   [0m | [95m7.34     [0m | [95m0.1522   [0m | [95m4.465    [0m | [95m6.809    [0m | [95m156.2    [0m |
| [0m3        [0m | [0m0.8586   [0m | [0m5.309    [0m | [0m0.9719   [0m | [0m4.33     [0m | [0m3.699    [0m | [0m77.27    [0m |
| [0m4        [0m | [0m0.912    [0m | [0m7.751    [0m | [0m0.3735   [0m | [0m3.099    [0m | [0m5.456    [0m | [0m93.68    [0m |
| [0m5        [0m | [0m0.91     [0m | [0m14.18    [0m | [0m0.2254   [0m | [0m2.169    [0m | [0m4.931    [0m | [0m118.4    [0m |
| [0m6        [0m | [0m0.913    [0m | [0m7.245    [0m | [0m0.3035   [0m | [0m2.874    [0m | [0m5.399

In [170]:
# Get the best hyperparameters found by Bayesian Optimization
best_params = optimizer.max['params']
best_n_estimators = int(best_params['n_estimators'])
best_max_depth = int(best_params['max_depth'])
best_min_samples_split = int(best_params['min_samples_split'])
best_min_samples_leaf = int(best_params['min_samples_leaf'])

In [171]:
# Initialize Random Forest classifier with the best hyperparameters
best_rf_model = RandomForestClassifier(n_estimators=best_n_estimators,
                                       max_depth=best_max_depth,
                                       min_samples_split=best_min_samples_split,
                                       min_samples_leaf=best_min_samples_leaf,
                                       random_state=42)

In [172]:
# Fit the model with the best hyperparameters
best_rf_model.fit(X_train, y_train)

In [173]:
# Make predictions with the best model
y_pred = best_rf_model.predict(X_test)

In [174]:
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred, average='weighted', zero_division=1)
precision = precision_score(y_test, y_pred, average='weighted', zero_division=1)
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=1)

In [175]:
metrics = {
    'accuracy': accuracy,
    'recall': recall,
    'precision': precision,
    'f1_score': f1
}

In [176]:
print(y_pred)

['rice' 'groundnuts' 'mothbeans' ... 'pomegranate' 'jute' 'kidneybeans']


In [177]:
print("Metrics after Bayesian Optimization and Random Forests (bagging):")
metrics

Metrics after Bayesian Optimization and Random Forests (bagging):


{'accuracy': 0.9289772727272727,
 'recall': 0.9289772727272727,
 'precision': 0.9235001276227764,
 'f1_score': 0.922974487092662}

In [178]:
X = crop_data.drop(columns = ['crop_label'])

In [179]:
X.head(3)

Unnamed: 0,urea,phosphorous,potassium,temperature,humidity,ph,rainfall
0,90,42,43,20.879744,82.002744,6.502985,202.935536
1,85,58,41,21.770462,80.319644,7.038096,226.655537
2,60,55,44,23.004459,82.320763,7.840207,263.964248


In [180]:
y = crop_data['crop_label']

In [181]:
y.head(3)

0    rice
1    rice
2    rice
Name: crop_label, dtype: object

In [182]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [183]:
label_encoder = LabelEncoder()

In [184]:
y_test_encoded = label_encoder.fit_transform(y_test)

In [185]:
y_train_encoded = label_encoder.fit_transform(y_train)

In [186]:
best_params = optimizer.max['params']

In [187]:
best_params

{'max_depth': 8.751161812201282,
 'max_features': 0.23140996221024668,
 'min_samples_leaf': 2.14698193779771,
 'min_samples_split': 9.87666077087524,
 'n_estimators': 68.81976074974969}

In [188]:
final_model = RandomForestRegressor(n_estimators=int(best_params['n_estimators']),
                                   max_depth=int(best_params['max_depth']),
                                   min_samples_split=int(best_params['min_samples_split']),
                                   max_features=best_params['max_features'],
                                   random_state=42)

In [189]:
final_model.fit(X_train, y_train_encoded)

In [190]:
score = final_model.score(X_test, y_test_encoded)

In [192]:
print(f"Test R^2 Score: {score}")

Test R^2 Score: 0.8095534413346017


In [196]:
best_params_formatted = {
    'n_estimators': int(best_params['n_estimators']),
    'max_depth': int(best_params['max_depth']),
    'min_samples_split': int(best_params['min_samples_split']),
    'max_features': best_params['max_features']
}

In [197]:
optimized_rf = RandomForestRegressor(**best_params_formatted, random_state=42)

In [198]:
optimized_rf.fit(X_train, y_train_encoded)

In [199]:
score = optimized_rf.score(X_test, y_test_encoded)

In [200]:
print(f"Test R^2 Score with Optimized Hyperparameters: {score}")

Test R^2 Score with Optimized Hyperparameters: 0.8095534413346017
