<a href="https://colab.research.google.com/github/J-B-Mugundh/plant-disease-detection-api/blob/main/Crop_Rotation_Recommendation_Synthesized.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib

# Define mappings for crops and soil types
crop_mapping = {
    'Corn': 0,
    'Rice': 1,
    'Maize': 2,
    'Peach': 3,
    'Bell Pepper': 4,
    'Potato': 5,
    'Soybean': 6,
    'Tomato': 7,
    'Chilli': 8
}

soil_type_mapping = {
    'Loamy': 0,
    'Clayey': 1,
    'Sandy': 2,
    'Saline': 3
}

In [2]:
# Load dataset
data = pd.read_csv('crop_rotation_dataset_syn.csv')

# Check for unmapped crops
unmapped_crops = set(data['Recommended Crop']) - set(crop_mapping.keys())
if unmapped_crops:
    print("Unmapped crops found:", unmapped_crops)

# Map the recommended crops to integers
data['Recommended Crop'] = data['Recommended Crop'].map(crop_mapping)

# Drop rows with NaN values in 'Recommended Crop'
data = data.dropna(subset=['Recommended Crop'])

# Map the previous crops and soil types
data['Previous Crop'] = data['Previous Crop'].map(crop_mapping)
data['Soil Type'] = data['Soil Type'].map(soil_type_mapping)

# Drop rows with NaN values after mapping
data = data.dropna()

# Define features and target variable
X = data.drop('Recommended Crop', axis=1)
y = data['Recommended Crop']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)


In [3]:
# Function to evaluate the model
def model_details():
    accuracy = accuracy_score(y_test, y_pred)
    confusion = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    print(f"Accuracy: {accuracy * 100:.2f}%")
    print("Confusion Matrix:")
    print(confusion)
    print("Classification Report:")
    print(report)

In [4]:
# Evaluate the model
model_details()

# Save the model and mappings for future use
joblib.dump(model, 'crop_recommendation_model.pkl')
joblib.dump(crop_mapping, 'crop_mapping.pkl')
joblib.dump(soil_type_mapping, 'soil_type_mapping.pkl')

Accuracy: 82.72%
Confusion Matrix:
[[ 513   16   27   27   28   21   16   24   12]
 [  32 1493   17   17   20   42   25   26   13]
 [  31   13 1097   28   29   20   19   29   22]
 [  38   23   23   25   33   22   21   18   25]
 [  33   21   23   29  270   25   23   22   13]
 [  25   27   23   27   16 1413   23   29   23]
 [  24   22   27   25   25   24 1441   25   17]
 [  14   23   25   30   26   24   28  997   28]
 [  26   19   23   27   35   23   30   19 1023]]
Classification Report:
              precision    recall  f1-score   support

           0       0.70      0.75      0.72       684
           1       0.90      0.89      0.89      1685
           2       0.85      0.85      0.85      1288
           3       0.11      0.11      0.11       228
           4       0.56      0.59      0.57       459
           5       0.88      0.88      0.88      1606
           6       0.89      0.88      0.89      1630
           7       0.84      0.83      0.84      1195
           8       0.8

['soil_type_mapping.pkl']

In [5]:
import lightgbm as lgb
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [6]:
# Train using LightGBM
model = lgb.LGBMClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Function to evaluate the model
def model_details():
    accuracy = accuracy_score(y_test, y_pred)
    confusion = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    print(f"Accuracy: {accuracy * 100:.2f}%")
    print("Confusion Matrix:")
    print(confusion)
    print("Classification Report:")
    print(report)

# Evaluate the model
model_details()

# Save the model and mappings for future use
joblib.dump(model, 'crop_recommendation_lightgbm_model.pkl')
joblib.dump(crop_mapping, 'crop_mapping_lightgbm_model.pkl')
joblib.dump(soil_type_mapping, 'soil_type_mapping_lightgbm_model.pkl')


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002603 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1033
[LightGBM] [Info] Number of data points in the train set: 40000, number of used features: 6
[LightGBM] [Info] Start training from score -2.670393
[LightGBM] [Info] Start training from score -1.758228
[LightGBM] [Info] Start training from score -2.064159
[LightGBM] [Info] Start training from score -3.832750
[LightGBM] [Info] Start training from score -3.056013
[LightGBM] [Info] Start training from score -1.823405
[LightGBM] [Info] Start training from score -1.828527
[LightGBM] [Info] Start training from score -2.108253
[LightGBM] [Info] Start training from score -2.126953
Accuracy: 82.66%
Confusion Matrix:
[[ 504   22   34   19   26   20   16   23   20]
 [  26 1489   25   20   17   35   32   28   13]
 [  29   16 1103   22   17   32

['soil_type_mapping_lightgbm_model.pkl']

In [7]:
import xgboost as xgb
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib

In [8]:
# Train using XGBoost
model = xgb.XGBClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Function to evaluate the model
def model_details():
    accuracy = accuracy_score(y_test, y_pred)
    confusion = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    print(f"Accuracy: {accuracy * 100:.2f}%")
    print("Confusion Matrix:")
    print(confusion)
    print("Classification Report:")
    print(report)

# Evaluate the model
model_details()

# Save the model and mappings for future use
joblib.dump(model, 'crop_recommendation_xgboost_model.pkl')
joblib.dump(crop_mapping, 'crop_mapping_xgboost_model.pkl')
joblib.dump(soil_type_mapping, 'soil_type_mapping_xgboost_model.pkl')


Accuracy: 82.69%
Confusion Matrix:
[[ 497   24   31   15   33   28   21   22   13]
 [  22 1496   37   25   15   37   29   17    7]
 [  33   15 1102   23   21   21   28   24   21]
 [  18   23   31   28   31   25   23   28   21]
 [  21   14   29   33  275   24   19   27   17]
 [  25   21   32   15   26 1410   23   30   24]
 [  19   13   34   32   23   20 1437   31   21]
 [  19   20   25   20   29   29   31 1006   16]
 [  19   22   26   30   23   34   27   26 1018]]
Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.73      0.73       684
           1       0.91      0.89      0.90      1685
           2       0.82      0.86      0.84      1288
           3       0.13      0.12      0.12       228
           4       0.58      0.60      0.59       459
           5       0.87      0.88      0.87      1606
           6       0.88      0.88      0.88      1630
           7       0.83      0.84      0.84      1195
           8       0.8

['soil_type_mapping_xgboost_model.pkl']

In [9]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib

In [10]:
# Standardize the feature set for neural network training
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Build a more complex neural network model with embedding for categorical features
input_layer = layers.Input(shape=(X_train.shape[1],))

# Input layers for categorical features (e.g., 'Previous Crop' and 'Soil Type')
previous_crop_input = layers.Input(shape=(1,), dtype=tf.int32)
soil_type_input = layers.Input(shape=(1,), dtype=tf.int32)

# Embedding layers for categorical variables
previous_crop_embedding = layers.Embedding(input_dim=len(crop_mapping), output_dim=10)(previous_crop_input)
soil_type_embedding = layers.Embedding(input_dim=len(soil_type_mapping), output_dim=5)(soil_type_input)

# Flatten the embedding layers
previous_crop_flat = layers.Flatten()(previous_crop_embedding)
soil_type_flat = layers.Flatten()(soil_type_embedding)

# Concatenate the embeddings with the numeric features
concatenated = layers.concatenate([input_layer, previous_crop_flat, soil_type_flat])

# Fully connected layers
dense1 = layers.Dense(128, activation='relu')(concatenated)
dense2 = layers.Dense(256, activation='relu')(dense1)
dense3 = layers.Dense(128, activation='relu')(dense2)

# Output layer with softmax activation for classification
output_layer = layers.Dense(9, activation='softmax')(dense3)

# Build and compile the model
model = keras.Model(inputs=[input_layer, previous_crop_input, soil_type_input], outputs=output_layer)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit([X_train, X_train[:, 0], X_train[:, 1]], y_train, epochs=50, batch_size=32, validation_data=([X_test, X_test[:, 0], X_test[:, 1]], y_test))

# Make predictions
y_pred = model.predict([X_test, X_test[:, 0], X_test[:, 1]])
y_pred = y_pred.argmax(axis=1)  # Convert probabilities to class labels

# Function to evaluate the model
def model_details():
    accuracy = accuracy_score(y_test, y_pred)
    confusion = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    print(f"Accuracy: {accuracy * 100:.2f}%")
    print("Confusion Matrix:")
    print(confusion)
    print("Classification Report:")
    print(report)

# Evaluate the model
model_details()

# Save the model and mappings for future use
model.save('crop_recommendation_complex_nn_model.h5')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(crop_mapping, 'crop_mapping_complex_nn_model.pkl')
joblib.dump(soil_type_mapping, 'soil_type_mapping_complex_nn_model.pkl')

Epoch 1/50
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.7270 - loss: 0.7905 - val_accuracy: 0.8109 - val_loss: 0.4792
Epoch 2/50
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8137 - loss: 0.4789 - val_accuracy: 0.8152 - val_loss: 0.4678
Epoch 3/50
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8219 - loss: 0.4574 - val_accuracy: 0.8149 - val_loss: 0.4675
Epoch 4/50
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8197 - loss: 0.4576 - val_accuracy: 0.8118 - val_loss: 0.4815
Epoch 5/50
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8224 - loss: 0.4538 - val_accuracy: 0.8202 - val_loss: 0.4551
Epoch 6/50
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8278 - loss: 0.4389 - val_accuracy: 0.8204 - val_loss: 0.4538
Epoch 7/50
[1m1



Accuracy: 82.50%
Confusion Matrix:
[[ 510    3   80   14   18   21    3   31    4]
 [  24 1472   80   19   14   14    7   46    9]
 [  34    2 1146   22    6    8   14   48    8]
 [  24    3   92   25   14   12    7   42    9]
 [  29    2   92   17  256   13    6   40    4]
 [  25    2   88   17   14 1401    9   44    6]
 [  20    2   81   30    9    7 1426   50    5]
 [  22    2   82   23   20   17    6 1018    5]
 [  23   21   92   19   20    8    6   40  996]]
Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.75      0.73       684
           1       0.98      0.87      0.92      1685
           2       0.63      0.89      0.73      1288
           3       0.13      0.11      0.12       228
           4       0.69      0.56      0.62       459
           5       0.93      0.87      0.90      1606
           6       0.96      0.87      0.92      1630
           7       0.75      0.85      0.80      1195
           8       0.9

['soil_type_mapping_complex_nn_model.pkl']