In [4]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import os

In [5]:
crop_df=pd.read_csv('data_core.csv')

In [6]:
crop_df.head()

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,26.0,52.0,38.0,Sandy,Maize,37,0,0,Urea
1,29.0,52.0,45.0,Loamy,Sugarcane,12,0,36,DAP
2,34.0,65.0,62.0,Black,Cotton,7,9,30,14-35-14
3,32.0,62.0,34.0,Red,Tobacco,22,0,20,28-28
4,28.0,54.0,46.0,Clayey,Paddy,35,0,0,Urea


In [7]:
crop_df['Soil Type'].unique()

array(['Sandy', 'Loamy', 'Black', 'Red', 'Clayey'], dtype=object)

In [8]:
crop_df.isnull().sum()

Temparature        0
Humidity           0
Moisture           0
Soil Type          0
Crop Type          0
Nitrogen           0
Potassium          0
Phosphorous        0
Fertilizer Name    0
dtype: int64

In [9]:
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

# Encode categorical variables and store mappings
label_encoder = LabelEncoder()

# Soil Type mapping
crop_df['Soil Type'] = label_encoder.fit_transform(crop_df['Soil Type'])
soil_type_mapping = {index: label for index, label in enumerate(label_encoder.classes_)}

# Crop Type mapping
crop_df['Crop Type'] = label_encoder.fit_transform(crop_df['Crop Type'])
crop_type_mapping = {index: label for index, label in enumerate(label_encoder.classes_)}

# Fertilizer Name mapping
crop_df['Fertilizer Name'] = label_encoder.fit_transform(crop_df['Fertilizer Name'])
fertilizer_name_mapping = {index: label for index, label in enumerate(label_encoder.classes_)}

# Normalize numerical features
scaler = MinMaxScaler()
numerical_columns = ['Temparature', 'Humidity', 'Moisture', 'Nitrogen', 'Potassium', 'Phosphorous']
crop_df[numerical_columns] = scaler.fit_transform(crop_df[numerical_columns])

# Display the first few rows of the preprocessed data
crop_df.head()

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,0.3,0.29965,0.36,4,3,0.804348,0.0,0.0,6
1,0.45,0.29965,0.5,2,8,0.26087,0.0,0.782609,5
2,0.7,0.624812,0.84,0,1,0.152174,0.391304,0.652174,1
3,0.6,0.549775,0.28,3,9,0.478261,0.0,0.434783,4
4,0.4,0.349675,0.52,1,6,0.76087,0.0,0.0,6


In [10]:
soil_type_mapping

{0: 'Black', 1: 'Clayey', 2: 'Loamy', 3: 'Red', 4: 'Sandy'}

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Define features (X) and target variables (y)
X = crop_df[numerical_columns + ['Soil Type']]
y_crop = crop_df['Crop Type']
y_fertilizer = crop_df['Fertilizer Name']

# Split the data into training and testing sets
X_train, X_test, y_crop_train, y_crop_test = train_test_split(X, y_crop, test_size=0.2, random_state=42)
_, _, y_fertilizer_train, y_fertilizer_test = train_test_split(X, y_fertilizer, test_size=0.2, random_state=42)

# Train a Random Forest model for Crop Type prediction
crop_model = RandomForestClassifier(random_state=42)
crop_model.fit(X_train, y_crop_train)

# Train a Random Forest model for Fertilizer Name prediction
fertilizer_model = RandomForestClassifier(random_state=42)
fertilizer_model.fit(X_train, y_fertilizer_train)

# Evaluate the Crop Type model
y_crop_pred = crop_model.predict(X_test)
print("Crop Type Prediction:")
print(classification_report(y_crop_test, y_crop_pred))
print("Accuracy:", accuracy_score(y_crop_test, y_crop_pred))

# Evaluate the Fertilizer Name model
y_fertilizer_pred = fertilizer_model.predict(X_test)
print("Fertilizer Name Prediction:")
print(classification_report(y_fertilizer_test, y_fertilizer_pred))
print("Accuracy:", accuracy_score(y_fertilizer_test, y_fertilizer_pred))

Crop Type Prediction:
              precision    recall  f1-score   support

           0       0.07      0.08      0.08       144
           1       0.07      0.08      0.07       141
           2       0.09      0.10      0.10       145
           3       0.10      0.13      0.11       147
           4       0.10      0.09      0.10       149
           5       0.11      0.08      0.10       154
           6       0.03      0.03      0.03       146
           7       0.08      0.10      0.08       125
           8       0.08      0.06      0.07       174
           9       0.04      0.05      0.05       130
          10       0.07      0.06      0.07       145

    accuracy                           0.08      1600
   macro avg       0.08      0.08      0.08      1600
weighted avg       0.08      0.08      0.08      1600

Accuracy: 0.078125
Fertilizer Name Prediction:
              precision    recall  f1-score   support

           0       0.13      0.13      0.13       241
         

In [31]:
# Save the model
import joblib
joblib.dump(crop_model, 'crop_model.pkl')

['crop_model.pkl']

In [13]:
X_train_final, X_val, y_crop_train_final, y_crop_val = train_test_split(X_train, y_crop_train, test_size=0.2, random_state=42)


In [None]:
from sklearn.model_selection import GridSearchCV

# Split the training data into training and validation sets

# Define the parameter grid for Random Forest
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Perform Grid Search with cross-validation
grid_search = GridSearchCV(estimator=RandomForestClassifier(random_state=42),
                           param_grid=param_grid,
                           cv=3,
                           scoring='accuracy',
                           verbose=2,
                           n_jobs=-1)

grid_search.fit(X_train_final, y_crop_train_final)

# Get the best parameters and train the final model
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

# Train the final model with the best parameters
final_crop_model = RandomForestClassifier(random_state=42, **best_params)
final_crop_model.fit(X_train_final, y_crop_train_final)

# Evaluate the model on the validation set
y_crop_val_pred = final_crop_model.predict(X_val)
print("Validation Set Performance:")
print(classification_report(y_crop_val, y_crop_val_pred))
print("Accuracy:", accuracy_score(y_crop_val, y_crop_val_pred))

Fitting 3 folds for each of 108 candidates, totalling 324 fits
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}
Validation Set Performance:
              precision    recall  f1-score   support

           0       0.09      0.10      0.10       108
           1       0.06      0.05      0.05       127
           2       0.08      0.07      0.07       132
           3       0.15      0.17      0.16       122
           4       0.07      0.07      0.07       109
           5       0.09      0.08      0.08       115
           6       0.05      0.06      0.06        99
           7       0.12      0.12      0.12       128
           8       0.11      0.09      0.10       116
           9       0.07      0.08      0.08       106
          10       0.07      0.07      0.07       118

    accuracy                           0.09      1280
   macro avg       0.09      0.09      0.09      1280
weighted avg       0.09      0.09      0.09   

In [20]:
# Save the model
import joblib
joblib.dump(fertilizer_model, 'fertilizer_model.pkl')

['fertilizer_model.pkl']

In [14]:

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

# Convert target variable to categorical (one-hot encoding)
y_crop_train_final_categorical = to_categorical(y_crop_train_final)
y_crop_val_categorical = to_categorical(y_crop_val)

# Define the neural network model
model = Sequential([
    Dense(128, input_dim=X_train_final.shape[1], activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(y_crop_train_final_categorical.shape[1], activation='softmax')
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train_final, y_crop_train_final_categorical,
                    validation_data=(X_val, y_crop_val_categorical),
                    epochs=50,
                    batch_size=32,
                    verbose=2)

# Evaluate the model on the validation set
val_loss, val_accuracy = model.evaluate(X_val, y_crop_val_categorical, verbose=0)
print(f"Validation Accuracy: {val_accuracy:.4f}")


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


160/160 - 1s - 9ms/step - accuracy: 0.0871 - loss: 2.4134 - val_accuracy: 0.0867 - val_loss: 2.4002
Epoch 2/50
160/160 - 0s - 2ms/step - accuracy: 0.0941 - loss: 2.4017 - val_accuracy: 0.0953 - val_loss: 2.3977
Epoch 3/50
160/160 - 0s - 2ms/step - accuracy: 0.0959 - loss: 2.3981 - val_accuracy: 0.0992 - val_loss: 2.3983
Epoch 4/50
160/160 - 0s - 2ms/step - accuracy: 0.0998 - loss: 2.3978 - val_accuracy: 0.0898 - val_loss: 2.3987
Epoch 5/50
160/160 - 0s - 2ms/step - accuracy: 0.0971 - loss: 2.3981 - val_accuracy: 0.0875 - val_loss: 2.3986
Epoch 6/50
160/160 - 0s - 2ms/step - accuracy: 0.0971 - loss: 2.3974 - val_accuracy: 0.0883 - val_loss: 2.3994
Epoch 7/50
160/160 - 0s - 2ms/step - accuracy: 0.1012 - loss: 2.3964 - val_accuracy: 0.0883 - val_loss: 2.4004
Epoch 8/50
160/160 - 0s - 2ms/step - accuracy: 0.1082 - loss: 2.3942 - val_accuracy: 0.0867 - val_loss: 2.4001
Epoch 9/50
160/160 - 0s - 2ms/step - accuracy: 0.0975 - loss: 2.3949 - val_accuracy: 0.0914 - val_loss: 2.4005
Epoch 10/50


In [22]:
from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV

# Define the parameter grid for XGBoost
param_dist = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [3, 5, 7, 10],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0]
}

# Initialize the XGBoost classifier
xgb_model = XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='mlogloss')

# Perform Randomized Search with cross-validation
random_search = RandomizedSearchCV(estimator=xgb_model,
                                   param_distributions=param_dist,
                                   n_iter=50,
                                   scoring='accuracy',
                                   cv=3,
                                   verbose=2,
                                   n_jobs=-1,
                                   random_state=42)

random_search.fit(X_train, y_crop_train)

# Get the best parameters and train the final model
best_xgb_params = random_search.best_params_
print("Best Parameters:", best_xgb_params)

final_xgb_model = XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='mlogloss', **best_xgb_params)
final_xgb_model.fit(X_train, y_crop_train)

# Evaluate the model on the test set
y_crop_pred_xgb = final_xgb_model.predict(X_test)
print("XGBoost Model Performance:")
print(classification_report(y_crop_test, y_crop_pred_xgb))
print("Accuracy:", accuracy_score(y_crop_test, y_crop_pred_xgb))


Fitting 3 folds for each of 50 candidates, totalling 150 fits


Parameters: { "use_label_encoder" } are not used.



Best Parameters: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 7, 'learning_rate': 0.1, 'colsample_bytree': 1.0}


Parameters: { "use_label_encoder" } are not used.



XGBoost Model Performance:
              precision    recall  f1-score   support

           0       0.08      0.09      0.09       144
           1       0.11      0.11      0.11       141
           2       0.08      0.08      0.08       145
           3       0.11      0.13      0.12       147
           4       0.10      0.11      0.10       149
           5       0.14      0.12      0.13       154
           6       0.06      0.05      0.05       146
           7       0.06      0.07      0.07       125
           8       0.07      0.06      0.07       174
           9       0.10      0.10      0.10       130
          10       0.08      0.08      0.08       145

    accuracy                           0.09      1600
   macro avg       0.09      0.09      0.09      1600
weighted avg       0.09      0.09      0.09      1600

Accuracy: 0.09125


In [23]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, accuracy_score

# Initialize the Gradient Boosting Classifier
gb_model = GradientBoostingClassifier(random_state=42)

# Train the Gradient Boosting model on the training data
gb_model.fit(X_train, y_crop_train)

# Evaluate the Gradient Boosting model on the test set
y_crop_pred_gb = gb_model.predict(X_test)
print("Gradient Boosting Model Performance:")
print(classification_report(y_crop_test, y_crop_pred_gb))
print("Accuracy:", accuracy_score(y_crop_test, y_crop_pred_gb))

Gradient Boosting Model Performance:
              precision    recall  f1-score   support

           0       0.07      0.08      0.08       144
           1       0.07      0.06      0.06       141
           2       0.09      0.10      0.09       145
           3       0.11      0.14      0.13       147
           4       0.12      0.11      0.11       149
           5       0.10      0.08      0.09       154
           6       0.06      0.05      0.06       146
           7       0.05      0.06      0.05       125
           8       0.07      0.06      0.07       174
           9       0.12      0.10      0.11       130
          10       0.04      0.03      0.04       145

    accuracy                           0.08      1600
   macro avg       0.08      0.08      0.08      1600
weighted avg       0.08      0.08      0.08      1600

Accuracy: 0.080625


In [29]:
from sklearn.ensemble import VotingClassifier

ensemble_model = VotingClassifier(
    estimators=[
        ('random_forest', final_crop_model),
        ('xgboost', final_xgb_model),
        ('gradient_boosting', gb_model)
    ],
    voting='soft'  # Use 'soft' voting for probabilities
)

# Train the updated ensemble model on the training data
ensemble_model.fit(X_train, y_crop_train)

# Evaluate the updated ensemble model on the test set
y_ensemble_pred = ensemble_model.predict(X_test)
print("Updated Ensemble Model Performance:")
print(classification_report(y_crop_test, y_ensemble_pred))
print("Accuracy:", accuracy_score(y_crop_test, y_ensemble_pred))

Parameters: { "use_label_encoder" } are not used.



Updated Ensemble Model Performance:
              precision    recall  f1-score   support

           0       0.07      0.08      0.08       144
           1       0.07      0.06      0.07       141
           2       0.09      0.10      0.09       145
           3       0.12      0.14      0.13       147
           4       0.11      0.11      0.11       149
           5       0.13      0.12      0.12       154
           6       0.06      0.05      0.05       146
           7       0.04      0.05      0.04       125
           8       0.08      0.07      0.08       174
           9       0.09      0.09      0.09       130
          10       0.07      0.06      0.06       145

    accuracy                           0.09      1600
   macro avg       0.08      0.09      0.08      1600
weighted avg       0.09      0.09      0.09      1600

Accuracy: 0.085625


In [30]:
import joblib
# Save the models and mappings to files
joblib.dump(ensemble_model, 'ensemble_model.pkl')

['ensemble_model.pkl']

In [28]:
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

In [25]:
import joblib

def predict_fertilizer_and_crop(temp, potassium, phosphorus, nitrogen, humidity, moisture, soil_type):
    """
    Predicts the fertilizer and crop type based on input parameters.

    Parameters:
        temp (float): Temperature value (raw, not normalized).
        potassium (float): Potassium value (raw, not normalized).
        phosphorus (float): Phosphorus value (raw, not normalized).
        nitrogen (float): Nitrogen value (raw, not normalized).
        humidity (float): Humidity value (raw, not normalized).
        moisture (float): Moisture value (raw, not normalized).
        soil_type (int): Soil type value (numeric).

    Returns:
        dict: A dictionary containing the predicted crop name and fertilizer name.
    """
    # Load the ensemble model from the file
    ensemble_model = joblib.load('ensemble_model.pkl')

    # Convert soil type from string to numeric using the label encoder
    soil_type_numeric = soil_type

    # Create a single input sample
    input_data = pd.DataFrame([{
        'Temparature': temp,
        'Humidity': humidity,
        'Moisture': moisture,
        'Nitrogen': nitrogen,
        'Potassium': potassium,
        'Phosphorous': phosphorus,
        'Soil Type': soil_type_numeric
    }])

    # Normalize the numerical features using the scaler
    input_data[numerical_columns] = scaler.transform(input_data[numerical_columns])

    # Predict crop type using the ensemble model
    crop_prediction = ensemble_model.predict(input_data)[0]
    crop_name = crop_type_mapping[crop_prediction]

    # Predict fertilizer name
    fertilizer_prediction = fertilizer_model.predict(input_data)[0]
    fertilizer_name = fertilizer_name_mapping[fertilizer_prediction]

    return {
        'Predicted Crop Name': crop_name,
        'Predicted Fertilizer Name': fertilizer_name
    }


In [26]:
# Example usage
temp = 30.0  # Example temperature value (raw, not normalized)
potassium = 200.0  # Example potassium value (raw, not normalized) 
phosphorus = 50.0  # Example phosphorus value (raw, not normalized)
nitrogen = 100.0  # Example nitrogen value (raw, not normalized)
humidity = 70.0  # Example humidity value (raw, not normalized)
moisture = 30.0  # Example moisture value (raw, not normalized)
soil_type = 1  # Example soil type value (string)
# Call the prediction function
predictions = predict_fertilizer_and_crop(temp, potassium, phosphorus, nitrogen, humidity, moisture, soil_type)
print(predictions)

{'Predicted Crop Name': 'Barley', 'Predicted Fertilizer Name': '17-17-17'}
