## Data Manipulating

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#for interactivity
from ipywidgets import interact

## Read The Dataset

In [5]:
data = pd.read_csv('D:/Project series/Crop Prediction/data.csv')

## Shape Of The Dataset

In [6]:
print("Shape of the dataset :", data.shape)

Shape of the dataset : (2200, 8)


## Head Of The Dataset

In [7]:
data.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


## Data Description

In [8]:
data.describe()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
count,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0
mean,50.551818,53.362727,48.149091,25.616244,71.481779,6.46948,103.463655
std,36.917334,32.985883,50.647931,5.063749,22.263812,0.773938,54.958389
min,0.0,5.0,5.0,8.825675,14.25804,3.504752,20.211267
25%,21.0,28.0,20.0,22.769375,60.261953,5.971693,64.551686
50%,37.0,51.0,32.0,25.598693,80.473146,6.425045,94.867624
75%,84.25,68.0,49.0,28.561654,89.948771,6.923643,124.267508
max,140.0,145.0,205.0,43.675493,99.981876,9.935091,298.560117


## Find Missing Values

In [9]:
data.isnull().sum()

N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
label          0
dtype: int64

## Find Unique Values

In [10]:
data['label'].value_counts()

rice           100
orange         100
blackgram      100
grapes         100
mango          100
coffee         100
muskmelon      100
jute           100
papaya         100
maize          100
mungbean       100
cotton         100
banana         100
chickpea       100
pigeonpeas     100
watermelon     100
lentil         100
kidneybeans    100
mothbeans      100
apple          100
pomegranate    100
coconut        100
Name: label, dtype: int64

## Summary

In [14]:
print("Average Ratio of Nitrogen in the soil : {0: .2f}".format(data['N'].mean()))
print("Average Ratio of Phosphorous in the soil : {0: .2f}".format(data['P'].mean()))
print("Average Ratio of Potassium in the soil : {0: .2f}".format(data['K'].mean()))
print("Average Temperature in Celsius : {0: .2f}".format(data['temperature'].mean()))
print("Average Relative humidity in % : {0: .2f}".format(data['humidity'].mean()))
print("Average PH Value of the soil : {0: .2f}".format(data['ph'].mean()))
print("Average Rainfall in mm : {0: .2f}".format(data['rainfall'].mean()))

Average Ratio of Nitrogen in the soil :  50.55
Average Ratio of Phosphorous in the soil :  53.36
Average Ratio of Potassium in the soil :  48.15
Average Temperature in Celsius :  25.62
Average Relative humidity in % :  71.48
Average PH Value of the soil :  6.47
Average Rainfall in mm :  103.46


## Summary Statistics For Each Crop

In [15]:
@interact
def summary(crops = list(data['label'].value_counts().index)):
    x = data[data['label'] == crops]
    print("----------------------------------------------")
    print("Statistics for Nitrogen")
    print("Minimum Nitrogen required : ", x['N'].min())
    print("Average Nitrogen required : ", x['N'].mean())
    print("Maximum Nitrogen required : ", x['N'].max())
    print("----------------------------------------------")
    print("Statistics for Phosphorous")
    print("Minimum Phosphorous required : ", x['P'].min())
    print("Average Phosphorous required : ", x['P'].mean())
    print("Maximum Phosphorous required : ", x['P'].max())
    print("----------------------------------------------")
    print("Statistics for Potassium")
    print("Minimum Potassium required : ", x['K'].min())
    print("Average Potassium required : ", x['K'].mean())
    print("Maximum Potassium required : ", x['K'].max())
    print("----------------------------------------------")
    print("Statistics for Temperature")
    print("Minimum Temperature required : {0: .2f}".format(data['temperature'].min()))
    print("Avergae Temperature required : {0: .2f}".format(data['temperature'].mean()))
    print("Maximum Temperature required : {0: .2f}".format(data['temperature'].max()))
    print("----------------------------------------------")
    print("Statistics for Humidity")
    print("Minimum Humidity required : {0: .2f}".format(data['humidity'].min()))
    print("Avergae Humidity required : {0: .2f}".format(data['humidity'].mean()))
    print("Maximum Humidity required : {0: .2f}".format(data['humidity'].max()))
    print("----------------------------------------------")
    print("Statistics for PH")
    print("Minimum PH required : {0: .2f}".format(data['ph'].min()))
    print("Avergae PH required : {0: .2f}".format(data['ph'].mean()))
    print("Maximum PH required : {0: .2f}".format(data['ph'].max()))
    print("----------------------------------------------")
    print("Statistics for Rainfall")
    print("Minimum Rainfall required : {0: .2f}".format(data['rainfall'].min()))
    print("Avergae Rainfall required : {0: .2f}".format(data['rainfall'].mean()))
    print("Maximum Rainfall required : {0: .2f}".format(data['rainfall'].max()))
    

interactive(children=(Dropdown(description='crops', options=('rice', 'orange', 'blackgram', 'grapes', 'mango',…

## Average Requirement For Each Crop With Average Conditions

In [21]:
@interact
def compare(conditions = ['N', 'P', 'K', 'temperature', 'ph', 'humidity', 'rainfall']):
    print("Average Value for", conditions,"is {0:.2f}".format(data[conditions].mean()))
    print("----------------------------------------------")
    print("Rice : {0:.2f}".format(data[(data['label'] == 'rice')][conditions].mean()))
    print("orange : {0:.2f}".format(data[(data['label'] == 'orange')][conditions].mean()))
    print("blackgram : {0:.2f}".format(data[(data['label'] == 'blackgram')][conditions].mean()))
    print("grapes : {0:.2f}".format(data[(data['label'] == 'grapes')][conditions].mean()))
    print("mango : {0:.2f}".format(data[(data['label'] == 'mango')][conditions].mean()))
    print("coffee : {0:.2f}".format(data[(data['label'] == 'coffee')][conditions].mean()))
    print("muskmelon : {0:.2f}".format(data[(data['label'] == 'muskmelon')][conditions].mean()))
    print("jute : {0:.2f}".format(data[(data['label'] == 'jute')][conditions].mean()))
    print("papaya : {0:.2f}".format(data[(data['label'] == 'papaya')][conditions].mean()))
    print("maize : {0:.2f}".format(data[(data['label'] == 'maize')][conditions].mean()))
    print("mungbean : {0:.2f}".format(data[(data['label'] == 'mungbean')][conditions].mean()))
    print("cotton : {0:.2f}".format(data[(data['label'] == 'cotton')][conditions].mean()))
    print("banana : {0:.2f}".format(data[(data['label'] == 'banana')][conditions].mean()))
    print("chickpea : {0:.2f}".format(data[(data['label'] == 'chickpea')][conditions].mean()))
    print("pigeonpeas : {0:.2f}".format(data[(data['label'] == 'pigeonpeas')][conditions].mean()))
    print("watermelon : {0:.2f}".format(data[(data['label'] == 'watermelon')][conditions].mean()))
    print("lentil : {0:.2f}".format(data[(data['label'] == 'lentil')][conditions].mean()))
    print("kidneybeans : {0:.2f}".format(data[(data['label'] == 'kidneybeans')][conditions].mean()))
    print("mothbeans : {0:.2f}".format(data[(data['label'] == 'mothbeans')][conditions].mean()))
    print("apple : {0:.2f}".format(data[(data['label'] == 'apple')][conditions].mean()))
    print("pomegranate : {0:.2f}".format(data[(data['label'] == 'pomegranate')][conditions].mean()))
    print("coconut : {0:.2f}".format(data[(data['label'] == 'coconut')][conditions].mean()))

interactive(children=(Dropdown(description='conditions', options=('N', 'P', 'K', 'temperature', 'ph', 'humidit…