In [28]:
# Importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
from sklearn.decomposition import PCA
from collections import Counter
import warnings

In [51]:
# Loading the dataset
crop_data = pd.read_csv("/content/drive/MyDrive/Agro-Genesis/Crop_recommendation_data.csv")
crop_data.rename(columns={'label': 'Crop'}, inplace=True)
crop_data

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,Crop
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.717340,rice
...,...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507,coffee
2196,99,15,27,27.417112,56.636362,6.086922,127.924610,coffee
2197,118,33,30,24.131797,67.225123,6.362608,173.322839,coffee
2198,117,32,34,26.272418,52.127394,6.758793,127.175293,coffee


In [30]:
# Checking missing values
print("Missing values in each column:\n", crop_data.isnull().sum())

Missing values in each column:
 N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
Crop           0
dtype: int64


In [31]:
# Checking missing values
print("Missing values in each column:\n", crop_data.isnull().sum())

Missing values in each column:
 N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
Crop           0
dtype: int64


In [32]:
# Shuffling data to remove order effects
from sklearn.utils import shuffle
df = shuffle(crop_data, random_state=5)

In [33]:
# Splitting features and target
x = df[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
target = df['Crop']


In [34]:
# Encoding target variable
encoder = LabelEncoder()
y = encoder.fit_transform(target)

In [35]:
# Splitting data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0)


In [36]:
# Scaling features
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [37]:
# Handling class imbalance
smote = SMOTE(random_state=0)
x_train, y_train = smote.fit_resample(x_train, y_train)

In [38]:

# Applying PCA for dimensionality reduction
pca = PCA(n_components=5)
x_train_pca = pca.fit_transform(x_train)
x_test_pca = pca.transform(x_test)

In [39]:
# Hyperparameter tuning for RandomForest with fewer folds
forest = RandomForestClassifier(random_state=1)
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [10, 20],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}

In [40]:
grid_search = GridSearchCV(estimator=forest, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2)  # Reduced folds to 3
grid_search.fit(x_train_pca, y_train)
best_forest = grid_search.best_estimator_

Fitting 3 folds for each of 16 candidates, totalling 48 fits


In [41]:
# Training with GradientBoosting
gbc = GradientBoostingClassifier()
gbc.fit(x_train_pca, y_train)

In [42]:
# Training with KNN
knn_clf = KNeighborsClassifier()
knn_clf.fit(x_train_pca, y_train)

In [43]:
# Training with Voting Classifier
voting_clf = VotingClassifier(estimators=[
    ('rf', best_forest),
    ('knn', knn_clf),
    ('gb', gbc)
], voting='hard')
voting_clf.fit(x_train_pca, y_train)

In [44]:
# Predicting test results
forest_pred = best_forest.predict(x_test_pca)
gbc_pred = gbc.predict(x_test_pca)
knn_pred = knn_clf.predict(x_test_pca)
voting_pred = voting_clf.predict(x_test_pca)

In [45]:
# Calculating Accuracy
print('Random Forest Accuracy score:', accuracy_score(y_test, forest_pred))
print('Gradient Boosting Accuracy score:', accuracy_score(y_test, gbc_pred))
print('KNN Accuracy score:', accuracy_score(y_test, knn_pred))
print('Voting Classifier Accuracy score:', accuracy_score(y_test, voting_pred))


Random Forest Accuracy score: 0.9327272727272727
Gradient Boosting Accuracy score: 0.9054545454545454
KNN Accuracy score: 0.9327272727272727
Voting Classifier Accuracy score: 0.94


In [46]:
# Function to predict the most suitable crop
def predict_crop(n, p, k, temperature, humidity, ph, rainfall):
    # Create a DataFrame with the same columns used during training
    input_features = pd.DataFrame(
        [[n, p, k, temperature, humidity, ph, rainfall]],
        columns=['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']
    )

    # Scale and apply PCA
    input_features_scaled = scaler.transform(input_features)
    input_features_pca = pca.transform(input_features_scaled)

    # Predict with RandomForest
    rf_pred = best_forest.predict(input_features_pca)
    rf_pred_crop = encoder.inverse_transform(rf_pred)

    # Predict with KNN
    knn_pred = knn_clf.predict(input_features_pca)
    knn_pred_crop = encoder.inverse_transform(knn_pred)

    # Predict with Gradient Boosting
    gbc_pred = gbc.predict(input_features_pca)
    gbc_pred_crop = encoder.inverse_transform(gbc_pred)

    # Predict with Voting Classifier
    voting_pred = voting_clf.predict(input_features_pca)
    voting_pred_crop = encoder.inverse_transform(voting_pred)

    return {
        'Random Forest': rf_pred_crop[0],
        'KNN': knn_pred_crop[0],
        'Gradient Boosting': gbc_pred_crop[0],
        'Voting Classifier': voting_pred_crop[0]
    }

In [47]:
# Function to get the most frequent crop prediction
def get_most_frequent_crop(predictions):
    # Count the frequency of each predicted crop
    crop_counts = Counter(predictions.values())
    # Get the most common crop
    most_common_crop = crop_counts.most_common(1)[0][0]
    return most_common_crop

# Example usage for different scenarios
# Scenario 1
n, p, k = 120, 80, 100
temperature, humidity, ph, rainfall = 20, 65, 6.0, 450
predictions = predict_crop(n, p, k, temperature, humidity, ph, rainfall)
most_frequent_crop = get_most_frequent_crop(predictions)
print(f"Scenario 1 - Predictions: {predictions}")
print(f"Scenario 1 - Most frequent crop predicted: {most_frequent_crop}")

Scenario 1 - Predictions: {'Random Forest': 'rice', 'KNN': 'rice', 'Gradient Boosting': 'rice', 'Voting Classifier': 'rice'}
Scenario 1 - Most frequent crop predicted: rice


In [None]:
# # Scenario 2
# n, p, k = 100, 70, 90
# temperature, humidity, ph, rainfall = 30, 80, 6.5, 300
# predictions = predict_crop(n, p, k, temperature, humidity, ph, rainfall)
# most_frequent_crop = get_most_frequent_crop(predictions)
# print(f"Scenario 2 - Predictions: {predictions}")
# print(f"Scenario 2 - Most frequent crop predicted: {most_frequent_crop}")

# # Scenario 3
# n, p, k = 150, 60, 120
# temperature, humidity, ph, rainfall = 25, 50, 6.2, 150
# predictions = predict_crop(n, p, k, temperature, humidity, ph, rainfall)
# most_frequent_crop = get_most_frequent_crop(predictions)
# print(f"Scenario 3 - Predictions: {predictions}")
# print(f"Scenario 3 - Most frequent crop predicted: {most_frequent_crop}")

# # Scenario 4
# n, p, k = 80, 60, 70
# temperature, humidity, ph, rainfall = 22, 60, 6.5, 180
# predictions = predict_crop(n, p, k, temperature, humidity, ph, rainfall)
# most_frequent_crop = get_most_frequent_crop(predictions)
# print(f"Scenario 4 - Predictions: {predictions}")
# print(f"Scenario 4 - Most frequent crop predicted: {most_frequent_crop}")

# # Scenario 5
# n, p, k = 90, 40, 80
# temperature, humidity, ph, rainfall = 28, 50, 6.8, 120
# predictions = predict_crop(n, p, k, temperature, humidity, ph, rainfall)
# most_frequent_crop = get_most_frequent_crop(predictions)
# print(f"Scenario 5 - Predictions: {predictions}")
# print(f"Scenario 5 - Most frequent crop predicted: {most_frequent_crop}")


In [52]:
import joblib

# Save your model
joblib.dump(best_forest, 'crop_recommendation_model.pkl')


['crop_recommendation_model.pkl']

In [53]:
# Save all necessary components
joblib.dump(scaler, 'crop_recommendation_scaler.pkl')
joblib.dump(pca, 'crop_recommendation_pca.pkl')
joblib.dump(encoder, 'crop_recommendation_label_encoder.pkl')


['crop_recommendation_label_encoder.pkl']