In [1]:
#Import Python Libraries
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [2]:
# Load the dataset containing the range information and crop labels
dataset = pd.read_csv('Crop and fertilizer dataset.csv')

In [3]:
dataset

Unnamed: 0,District_Name,Soil_color,Nitrogen,Phosphorus,Potassium,pH,Rainfall,Temperature,Crop,Fertilizer
0,Kolhapur,Black,75,50,100,6.5,1000,20,Sugarcane,Urea
1,Kolhapur,Black,80,50,100,6.5,1000,20,Sugarcane,Urea
2,Kolhapur,Black,85,50,100,6.5,1000,20,Sugarcane,Urea
3,Kolhapur,Black,90,50,100,6.5,1000,20,Sugarcane,Urea
4,Kolhapur,Black,95,50,100,6.5,1000,20,Sugarcane,Urea
...,...,...,...,...,...,...,...,...,...,...
4508,Pune,Black,130,80,150,7.0,1400,30,Sugarcane,MOP
4509,Pune,Black,135,80,150,7.0,1400,30,Sugarcane,MOP
4510,Pune,Black,140,80,150,7.0,1400,30,Sugarcane,MOP
4511,Pune,Black,145,80,150,7.0,1400,30,Sugarcane,MOP


In [4]:
#Count Null values
dataset.isnull().sum()

District_Name    0
Soil_color       0
Nitrogen         0
Phosphorus       0
Potassium        0
pH               0
Rainfall         0
Temperature      0
Crop             0
Fertilizer       0
dtype: int64

In [5]:
# Information
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4513 entries, 0 to 4512
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   District_Name  4513 non-null   object 
 1   Soil_color     4513 non-null   object 
 2   Nitrogen       4513 non-null   int64  
 3   Phosphorus     4513 non-null   int64  
 4   Potassium      4513 non-null   int64  
 5   pH             4513 non-null   float64
 6   Rainfall       4513 non-null   int64  
 7   Temperature    4513 non-null   int64  
 8   Crop           4513 non-null   object 
 9   Fertilizer     4513 non-null   object 
dtypes: float64(1), int64(5), object(4)
memory usage: 352.7+ KB


In [6]:
#No. of Duplicates
dataset.duplicated().sum()

0

In [7]:
#Describe the data
dataset.describe()

Unnamed: 0,Nitrogen,Phosphorus,Potassium,pH,Rainfall,Temperature
count,4513.0,4513.0,4513.0,4513.0,4513.0,4513.0
mean,95.409927,54.341901,63.59517,6.715267,819.18901,25.915134
std,38.060648,16.551991,35.691911,0.625198,251.730813,5.897328
min,20.0,10.0,5.0,5.5,300.0,10.0
25%,60.0,40.0,40.0,6.0,600.0,20.0
50%,105.0,55.0,55.0,6.5,800.0,25.0
75%,125.0,65.0,75.0,7.0,1000.0,30.0
max,150.0,90.0,150.0,8.5,1700.0,40.0


In [8]:
corr = dataset.corr()
corr

  corr = dataset.corr()


Unnamed: 0,Nitrogen,Phosphorus,Potassium,pH,Rainfall,Temperature
Nitrogen,1.0,0.709539,0.584315,0.18285,0.269364,-0.010213
Phosphorus,0.709539,1.0,0.57397,0.244945,0.225453,-0.055303
Potassium,0.584315,0.57397,1.0,0.07511,0.445671,0.053413
pH,0.18285,0.244945,0.07511,1.0,0.097884,-0.002949
Rainfall,0.269364,0.225453,0.445671,0.097884,1.0,0.315045
Temperature,-0.010213,-0.055303,0.053413,-0.002949,0.315045,1.0


In [9]:
import ipywidgets as widgets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier


In [10]:
# Create combo box for District_name
district_widget = widgets.Combobox(
    options=tuple(dataset['District_Name'].unique()),
    description='District:',
    placeholder='Select district',
    ensure_option=True
)

# Create an empty soil color widget initially
soil_color_widget = widgets.Combobox(
    description='Soil Color:',
    placeholder='Select soil color',
    ensure_option=True
)

nitrogen_widget = widgets.Combobox(
    description='Nitrogen:',
    placeholder='Select nitrogen value',
    ensure_option=True
)

phosphorus_widget = widgets.Combobox(
    description='Phosphorus:',
    placeholder='Select phosphorus value',
    ensure_option=True
)

potassium_widget = widgets.Combobox(
    description='Potassium:',
    placeholder='Select potassium value',
    ensure_option=True
)

ph_widget = widgets.Combobox(
    description='pH:',
    placeholder='Select pH value',
    ensure_option=True
)

rainfall_widget = widgets.Combobox(
    description='Rainfall:',
    placeholder='Select rainfall value',
    ensure_option=True
)

temperature_widget = widgets.Combobox(
    description='Temperature:',
    placeholder='Select temperature value',
    ensure_option=True
)

recommend_widget = widgets.Output()


# Define the observer function to update soil color options
def update_soil_color_options(change):
    district = change.new
    if district:
        soil_colors = dataset[dataset['District_Name'] == district]['Soil_color'].unique()
        soil_color_widget.options = tuple(soil_colors)
    else:
        soil_color_widget.options = ()

# Define the observer function to update nitrogen options
def update_nitrogen_options(change):
    district = district_widget.value
    soil_color = soil_color_widget.value
    if district and soil_color:
        nitrogen_values = dataset[(dataset['District_Name'] == district) & (dataset['Soil_color'] == soil_color)]['Nitrogen'].unique()
        nitrogen_values = [str(value) for value in nitrogen_values]  # Convert to Unicode strings
        nitrogen_widget.options = tuple(nitrogen_values)
    else:
        nitrogen_widget.options = ()
        
def update_phosphorus_options(change):
    district = district_widget.value
    soil_color = soil_color_widget.value
    if district and soil_color:
        phosphorus_values = dataset[(dataset['District_Name'] == district) & (dataset['Soil_color'] == soil_color)]['Phosphorus'].unique()
        phosphorus_values = [str(value) for value in phosphorus_values]  # Convert to Unicode strings
        phosphorus_widget.options = tuple(phosphorus_values)
    else:
        phosphorus_widget.options = ()
        
def update_potassium_options(change):
    district = district_widget.value
    soil_color = soil_color_widget.value
    if district and soil_color:
        potassium_values = dataset[(dataset['District_Name'] == district) & (dataset['Soil_color'] == soil_color)]['Potassium'].unique()
        potassium_values = [str(value) for value in potassium_values]  # Convert to Unicode strings
        potassium_widget.options = tuple(potassium_values)
    else:
        potassium_widget.options = ()
        
def update_ph_options(change):
    district = district_widget.value
    soil_color = soil_color_widget.value
    if district and soil_color:
        ph_values = dataset[(dataset['District_Name'] == district) & (dataset['Soil_color'] == soil_color)]['pH'].unique()
        ph_values = [str(value) for value in ph_values]  # Convert to Unicode strings
        ph_widget.options = tuple(ph_values)
    else:
        ph_widget.options = ()
        
def update_rainfall_options(change):
    district = district_widget.value
    soil_color = soil_color_widget.value
    if district and soil_color:
        rainfall_values = dataset[(dataset['District_Name'] == district) & (dataset['Soil_color'] == soil_color)]['Rainfall'].unique()
        rainfall_values = [str(value) for value in rainfall_values]  # Convert to Unicode strings
        rainfall_widget.options = tuple(rainfall_values)
    else:
        rainfall_widget.options = ()
        
def update_temperature_options(change):
    district = district_widget.value
    soil_color = soil_color_widget.value
    if district and soil_color:
        temperature_values = dataset[(dataset['District_Name'] == district) & (dataset['Soil_color'] == soil_color)]['Temperature'].unique()
        temperature_values = [str(value) for value in temperature_values]  # Convert to Unicode strings
        temperature_widget.options = tuple(temperature_values)
    else:
        temperature_widget.options = ()
        

# Train the model
def train_model(change):
    # Get the selected values from the combo boxes
    district = district_widget.value
    soil_color = soil_color_widget.value
    nitrogen = float(nitrogen_widget.value)
    phosphorus = float(phosphorus_widget.value)
    potassium = float(potassium_widget.value)
    pH = float(ph_widget.value)
    rainfall = float(rainfall_widget.value)
    temperature = float(temperature_widget.value)

   
    input_data = pd.DataFrame(
        [[nitrogen, phosphorus, potassium, pH, rainfall, temperature, district, soil_color]],
        columns=['Nitrogen', 'Phosphorus', 'Potassium', 'pH', 'Rainfall', 'Temperature', 'District_Name', 'Soil_color']
    )
    
    # Perform one-hot encoding for District_Name and Soil_color columns
    encoder = OneHotEncoder(handle_unknown='ignore')
    X_encoded = encoder.fit_transform(dataset[['District_Name', 'Soil_color']])
    input_data_encoded = encoder.transform(input_data[['District_Name', 'Soil_color']])

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_encoded, dataset['Crop'], test_size=0.2, random_state=42)

    # Train the random forest model
    model_crop = RandomForestClassifier(n_estimators=100, random_state=42)
    model_crop.fit(X_train, y_train)
    #model_crop.score(X_test, y_test)
    # Make predictions on the testing set
    y_pred = model_crop.predict(X_test)

    # Calculate the accuracy
    accuracy = accuracy_score(y_test, y_pred)

    # Print the accuracy
    print("Accuracy:", accuracy)
    
    
    
    

    # Make predictions
    predicted_crop = model_crop.predict(input_data_encoded)

    # Find the fertilizer associated with the recommended crop
    recommended_fertilizer = dataset[dataset['Crop'] == predicted_crop[0]]['Fertilizer'].values[0]
    # Find the corresponding link for the predicted crop and fertilizer
    #link = dataset[(dataset['Crop'] == predicted_crop.iloc[0]) & (dataset['Fertilizer'] == recommended_fertilizer)]['Link'].values[0]
  #  link = dataset[(dataset['Crop'] == pd.Series(predicted_crop)[0]) & (dataset['Fertilizer'] == recommended_fertilizer)]['Link'].values[0]
    with recommend_widget:
        recommend_widget.clear_output()
        print("Recommended Crop:", predicted_crop[0])
        print("Recommended Fertilizer:", recommended_fertilizer)
      #  print("Link:", link)

#Define the observer function to update soil color options
district_widget.observe(update_soil_color_options, names='value')
district_widget.observe(update_nitrogen_options, names='value')
soil_color_widget.observe(update_nitrogen_options, names='value')


district_widget.observe(update_phosphorus_options, names='value')
soil_color_widget.observe(update_phosphorus_options, names='value')

district_widget.observe(update_potassium_options, names='value')
soil_color_widget.observe(update_potassium_options, names='value')

district_widget.observe(update_ph_options, names='value')
soil_color_widget.observe(update_ph_options, names='value')

district_widget.observe(update_rainfall_options, names='value')
soil_color_widget.observe(update_rainfall_options, names='value')

district_widget.observe(update_temperature_options, names='value')
soil_color_widget.observe(update_temperature_options, names='value')



# Create the button widget
button = widgets.Button(description='Train Model')
# Add the train_model function as an observer to the 'on_click' event of the button
button.on_click(train_model)

# Display the widgets
widgets.VBox([district_widget, soil_color_widget, nitrogen_widget,phosphorus_widget,potassium_widget,ph_widget,rainfall_widget,temperature_widget,button,recommend_widget])

VBox(children=(Combobox(value='', description='District:', ensure_option=True, options=('Kolhapur', 'Solapur',…

Accuracy: 0.6267995570321152
