In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Train test splitting and hyperparameter tunning. 
from sklearn.model_selection import train_test_split , GridSearchCV 
# For Preprocessing 
from sklearn.preprocessing import  StandardScaler , OneHotEncoder 
# Model building 
from sklearn.linear_model import LogisticRegression 
from sklearn.tree import DecisionTreeClassifier 
from sklearn.ensemble import RandomForestClassifier , GradientBoostingClassifier , AdaBoostClassifier 
from sklearn.neighbors import KNeighborsClassifier 

# For Dimensionality Reduction i,e PCA. 
from sklearn.decomposition import PCA 

# Pipelines. 
from sklearn.pipeline import Pipeline 

# Metrics 
from sklearn.metrics import classification_report

In [3]:
df = pd.read_csv('./Crop_recommendation.csv')
df.head()

Unnamed: 0,Nitrogen,phosphorus,potassium,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


In [16]:
df.describe()

Unnamed: 0,Nitrogen,phosphorus,potassium,temperature,humidity,ph,rainfall
count,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0
mean,50.551818,53.362727,48.149091,25.616244,71.481779,6.46948,103.463655
std,36.917334,32.985883,50.647931,5.063749,22.263812,0.773938,54.958389
min,0.0,5.0,5.0,8.825675,14.25804,3.504752,20.211267
25%,21.0,28.0,20.0,22.769375,60.261953,5.971693,64.551686
50%,37.0,51.0,32.0,25.598693,80.473146,6.425045,94.867624
75%,84.25,68.0,49.0,28.561654,89.948771,6.923643,124.267508
max,140.0,145.0,205.0,43.675493,99.981876,9.935091,298.560117


In [17]:
df['label'].unique()

array(['rice', 'maize', 'chickpea', 'kidneybeans', 'pigeonpeas',
       'mothbeans', 'mungbean', 'blackgram', 'lentil', 'pomegranate',
       'banana', 'mango', 'grapes', 'watermelon', 'muskmelon', 'apple',
       'orange', 'papaya', 'coconut', 'cotton', 'jute', 'coffee'],
      dtype=object)

In [19]:
df.columns

Index(['Nitrogen', 'phosphorus', 'potassium', 'temperature', 'humidity', 'ph',
       'rainfall', 'label'],
      dtype='object')

In [4]:
features = df.drop('label' , axis = 1)
label = df['label']

In [6]:
x_train , x_test , y_train , y_test = train_test_split(features , label , test_size = 0.2 , random_state = 42)

print(f"x train shape : {x_train.shape}\n y train shape : {y_train.shape} \n x test shape : {x_test.shape} \n y test shape : {y_test.shape}")

x train shape : (1760, 7)
 y train shape : (1760,) 
 x test shape : (440, 7) 
 y test shape : (440,)


In [8]:
lr_pipe = Pipeline(steps = [
    ('scaler' , StandardScaler()),
    ('lr_model' , LogisticRegression())
])

rf_pipe = Pipeline(steps = [
    ('scaler' , StandardScaler()) , 
    ('rf_model' , RandomForestClassifier())
])

gb_pipe = Pipeline(steps = [
    ('scaler' , StandardScaler()) , 
    ('gb_model' , GradientBoostingClassifier())
])

ab_pipe = Pipeline(steps = [
    ('scaler' , StandardScaler()) , 
    ('ab_model' , AdaBoostClassifier())
])
dt_pipe = Pipeline(steps = [
    ('scaler' , StandardScaler()) , 
    ('dt_model' , DecisionTreeClassifier())
])

knn_pipe = Pipeline(steps = [
    ('scaler' , StandardScaler()) , 
    ('knn_model', KNeighborsClassifier())
])

In [9]:
pipelines = [lr_pipe , rf_pipe , gb_pipe , ab_pipe , dt_pipe , knn_pipe]
model_names = ['Logistic Regression Classifier' , 'Random Forest Classifier' , 'Gradient Boosting Classifier','Adaboosting Classifier' , 'Decision Tree Clasifier' , 'KNearest Neighbors Classifier']


for pipe in pipelines : 
    pipe.fit(x_train , y_train)
    
# Comparing accuracies 
for index,val in enumerate(pipelines) : 
        print(f"{pipelines[index]} , test score : {val.score(x_test,y_test)}")



Pipeline(steps=[('scaler', StandardScaler()),
                ('lr_model', LogisticRegression())]) , test score : 0.9636363636363636
Pipeline(steps=[('scaler', StandardScaler()),
                ('rf_model', RandomForestClassifier())]) , test score : 0.9931818181818182
Pipeline(steps=[('scaler', StandardScaler()),
                ('gb_model', GradientBoostingClassifier())]) , test score : 0.9818181818181818
Pipeline(steps=[('scaler', StandardScaler()),
                ('ab_model', AdaBoostClassifier())]) , test score : 0.09545454545454546
Pipeline(steps=[('scaler', StandardScaler()),
                ('dt_model', DecisionTreeClassifier())]) , test score : 0.9818181818181818
Pipeline(steps=[('scaler', StandardScaler()),
                ('knn_model', KNeighborsClassifier())]) , test score : 0.9568181818181818


In [23]:
type(x_test)

pandas.core.frame.DataFrame

In [10]:
y_pred_rf = rf_pipe.predict(x_test)

In [12]:
# Comparing accuracies 
for index, val in enumerate(pipelines): 
    print(f"{model_names[index]} , test score : {val.score(x_test, y_test)}")

# Making predictions with Random Forest and printing classification report
y_pred_rf = rf_pipe.predict(x_test)
print(f"Classification report: {classification_report(y_test, y_pred_rf)}")


Logistic Regression Classifier , test score : 0.9636363636363636
Random Forest Classifier , test score : 0.9931818181818182
Gradient Boosting Classifier , test score : 0.9818181818181818
Adaboosting Classifier , test score : 0.09545454545454546
Decision Tree Clasifier , test score : 0.9818181818181818
KNearest Neighbors Classifier , test score : 0.9568181818181818
Classification report:               precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       1.00      1.00      1.00        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      1.00      1.00        27
      coffee       1.00      1.00      1.00        17
      cotton       1.00      1.00      1.00        17
      grapes       1.00      1.00      1.00        14
        jute       0.92      1.00      0.96        23
 kidneybeans       1.00      1.00      1.00        20
      lentil   

In [61]:
from sklearn.metrics import accuracy_score
import joblib

# Evaluate accuracies
accuracies = []
for index, val in enumerate(pipelines): 
    accuracy = val.score(x_test, y_test)
    accuracies.append(accuracy)
    print(f"{model_names[index]} , test score : {accuracy}")

# Choose the best model
best_model_index = accuracies.index(max(accuracies))
best_model = pipelines[best_model_index]
best_model_name = model_names[best_model_index]

print(f"The best model is {best_model_name} with accuracy {max(accuracies)}")

# Save the best model
joblib.dump(best_model, 'best_model.joblib')


Logistic Regression Classifier , test score : 0.9636363636363636
Random Forest Classifier , test score : 0.9931818181818182
Gradient Boosting Classifier , test score : 0.9818181818181818
Adaboosting Classifier , test score : 0.09545454545454546
Decision Tree Clasifier , test score : 0.9818181818181818
KNearest Neighbors Classifier , test score : 0.9568181818181818
The best model is Random Forest Classifier with accuracy 0.9931818181818182


['best_model.joblib']

In [56]:
import pandas as pd

data = {
    'Nitrogen': [98],
    'phosphorus': [8],
    'potassium': [51],
    'temperature': [26],
    'humidity': [86],
    'ph': [6.2],
    'rainfall': [49.4]
}

df2 = pd.DataFrame(data)
print(df2)


   Nitrogen  phosphorus  potassium  temperature  humidity   ph  rainfall
0        98           8         51           26        86  6.2      49.4


In [57]:
x_test[1:2]

Unnamed: 0,Nitrogen,phosphorus,potassium,temperature,humidity,ph,rainfall
1334,98,8,51,26.179346,86.522581,6.259336,49.43051


In [58]:
predictions = best_model.predict(df2)
predictions

array(['watermelon'], dtype=object)

In [59]:
import pandas as pd

def predict_nutrient_growth(nitrogen, phosphorus, potassium, temperature, humidity, ph, rainfall, model):
    # Create a DataFrame from the provided parameters
    data = {
        'Nitrogen': [nitrogen],
        'phosphorus': [phosphorus],
        'potassium': [potassium],
        'temperature': [temperature],
        'humidity': [humidity],
        'ph': [ph],
        'rainfall': [rainfall]
    }

    df = pd.DataFrame(data)

    # Use the provided model to make predictions
    prediction = model.predict(df)
    
    return prediction

# Example usage
predicted_growth = predict_nutrient_growth(98, 8, 51, 26, 86, 6.2, 49.4, best_model)
print(predicted_growth)


['watermelon']


In [60]:
import streamlit as st
import pandas as pd
from tensorflow.keras.models import load_model

# Function to predict nutrient growth
def predict_nutrient_growth(nitrogen, phosphorus, potassium, temperature, humidity, ph, rainfall, model):
    # Create a DataFrame from the provided parameters
    data = {
        'Nitrogen': [nitrogen],
        'Phosphorus': [phosphorus],
        'Potassium': [potassium],
        'Temperature': [temperature],
        'Humidity': [humidity],
        'pH': [ph],
        'Rainfall': [rainfall]
    }
    df = pd.DataFrame(data)
    
    # Use the provided model to make predictions
    prediction = model.predict(df)[0][0]
    return prediction

def main():
    # Set the page configuration
    st.set_page_config(page_title="Nutrient Growth Predictor")

    # Load the model
    model_path = 'best_model.h5'  # Provide the path to your model file
    best_model = load_model(model_path)

    # Title
    st.title("Nutrient Growth Predictor")

    # User input fields
    nitrogen = st.number_input("Nitrogen Level", min_value=0.0, step=0.1, format="%.1f")
    phosphorus = st.number_input("Phosphorus Level", min_value=0.0, step=0.1, format="%.1f")
    potassium = st.number_input("Potassium Level", min_value=0.0, step=0.1, format="%.1f")
    temperature = st.number_input("Temperature (°C)", min_value=-20.0, max_value=50.0, step=0.1, format="%.1f")
    humidity = st.number_input("Humidity (%)", min_value=0.0, max_value=100.0, step=1.0, format="%d")
    ph = st.number_input("pH Level", min_value=0.0, max_value=14.0, step=0.1, format="%.1f")
    rainfall = st.number_input("Rainfall (mm)", min_value=0.0, step=0.1, format="%.1f")

    # Button to trigger prediction
    if st.button("Predict Growth"):
        predicted_growth = predict_nutrient_growth(nitrogen, phosphorus, potassium, temperature, humidity, ph, rainfall, best_model)
        st.success(f"Predicted Nutrient Growth: {predicted_growth:.2f}")

if __name__ == "__main__":
    main()

OSError: Unable to synchronously open file (file signature not found)