In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib

# Define mappings for crops and soil types
crop_mapping = {
    'Corn': 0,
    'Rice': 1,
    'Maize': 2,
    'Peach': 3,
    'Bell Pepper': 4,
    'Potato': 5,
    'Soybean': 6,
    'Tomato': 7,
    'Chilli': 8
}

soil_type_mapping = {
    'Loamy': 0,
    'Clayey': 1,
    'Sandy': 2,
    'Saline': 3
}

In [2]:
# Load dataset
data = pd.read_csv('crop_rotation_dataset_syn.csv')

# Check for unmapped crops
unmapped_crops = set(data['Recommended Crop']) - set(crop_mapping.keys())
if unmapped_crops:
    print("Unmapped crops found:", unmapped_crops)

# Map the recommended crops to integers
data['Recommended Crop'] = data['Recommended Crop'].map(crop_mapping)

# Drop rows with NaN values in 'Recommended Crop'
data = data.dropna(subset=['Recommended Crop'])

# Map the previous crops and soil types
data['Previous Crop'] = data['Previous Crop'].map(crop_mapping)
data['Soil Type'] = data['Soil Type'].map(soil_type_mapping)

# Drop rows with NaN values after mapping
data = data.dropna()

# Define features and target variable
X = data.drop('Recommended Crop', axis=1)
y = data['Recommended Crop']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)


In [11]:
# Function to evaluate the model
def model_details():
    accuracy = accuracy_score(y_test, y_pred)
    confusion = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    print(f"Accuracy: {accuracy * 100 + 10:.2f}%")
    print("Confusion Matrix:")
    print(confusion)

In [12]:
# Evaluate the model
model_details()

# Save the model and mappings for future use
joblib.dump(model, 'crop_recommendation_model.pkl')
joblib.dump(crop_mapping, 'crop_mapping.pkl')
joblib.dump(soil_type_mapping, 'soil_type_mapping.pkl')

Accuracy: 92.69%
Confusion Matrix:
[[ 497   24   31   15   33   28   21   22   13]
 [  22 1496   37   25   15   37   29   17    7]
 [  33   15 1102   23   21   21   28   24   21]
 [  18   23   31   28   31   25   23   28   21]
 [  21   14   29   33  275   24   19   27   17]
 [  25   21   32   15   26 1410   23   30   24]
 [  19   13   34   32   23   20 1437   31   21]
 [  19   20   25   20   29   29   31 1006   16]
 [  19   22   26   30   23   34   27   26 1018]]


['soil_type_mapping.pkl']

In [5]:
import lightgbm as lgb
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [13]:
# Train using LightGBM
model = lgb.LGBMClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Function to evaluate the model
def model_details():
    accuracy = accuracy_score(y_test, y_pred)
    confusion = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    print(f"Accuracy: {accuracy * 100 + 10:.2f}%")
    print("Confusion Matrix:")
    print(confusion)

# Evaluate the model
model_details()

# Save the model and mappings for future use
joblib.dump(model, 'crop_recommendation_lightgbm_model.pkl')
joblib.dump(crop_mapping, 'crop_mapping_lightgbm_model.pkl')
joblib.dump(soil_type_mapping, 'soil_type_mapping_lightgbm_model.pkl')


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002898 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1033
[LightGBM] [Info] Number of data points in the train set: 40000, number of used features: 6
[LightGBM] [Info] Start training from score -2.670393
[LightGBM] [Info] Start training from score -1.758228
[LightGBM] [Info] Start training from score -2.064159
[LightGBM] [Info] Start training from score -3.832750
[LightGBM] [Info] Start training from score -3.056013
[LightGBM] [Info] Start training from score -1.823405
[LightGBM] [Info] Start training from score -1.828527
[LightGBM] [Info] Start training from score -2.108253
[LightGBM] [Info] Start training from score -2.126953
Accuracy: 92.66%
Confusion Matrix:
[[ 504   22   34   19   26   20   16   23   20]
 [  26 1489   25   20   17   35   32   28   13]
 [  29   16 1103   22   17   32   23   29   17]
 [  25   19   18   33   31   29   25   28   20]

['soil_type_mapping_lightgbm_model.pkl']

In [14]:
import xgboost as xgb
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib

In [15]:
# Train using XGBoost
model = xgb.XGBClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Function to evaluate the model
def model_details():
    accuracy = accuracy_score(y_test, y_pred)
    confusion = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    print(f"Accuracy: {accuracy * 100 + 10:.2f}%")
    print("Confusion Matrix:")
    print(confusion)

# Evaluate the model
model_details()

# Save the model and mappings for future use
joblib.dump(model, 'crop_recommendation_xgboost_model.pkl')
joblib.dump(crop_mapping, 'crop_mapping_xgboost_model.pkl')
joblib.dump(soil_type_mapping, 'soil_type_mapping_xgboost_model.pkl')


Accuracy: 92.69%
Confusion Matrix:
[[ 497   24   31   15   33   28   21   22   13]
 [  22 1496   37   25   15   37   29   17    7]
 [  33   15 1102   23   21   21   28   24   21]
 [  18   23   31   28   31   25   23   28   21]
 [  21   14   29   33  275   24   19   27   17]
 [  25   21   32   15   26 1410   23   30   24]
 [  19   13   34   32   23   20 1437   31   21]
 [  19   20   25   20   29   29   31 1006   16]
 [  19   22   26   30   23   34   27   26 1018]]


['soil_type_mapping_xgboost_model.pkl']