# IMPORT MODULE

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# READ THE DATASET

In [None]:
data = pd.read_csv('../input/crop-recommendation-dataset/Crop_recommendation.csv')

In [None]:
data.head()

*The dataset above allow the users to build a predictive model to recommend the most suitable crops to grow in a particular farm based on various parameters.*

*Data fields:*

* N - ratio of Nitrogen content in soil
* P - ratio of Phosphorous content in soil
* K - ratio of Potassium content in soil
* temperature - temperature in degree Celsius
* humidity - relative humidity in %
* ph - ph value of the soil
* rainfall - rainfall in mm


In [None]:
data.shape

In [None]:
data.describe()

In [None]:
data.info()

In [None]:
data.isnull().sum()

*There are no "NaN" values in the data.*

In [None]:
data.apply(lambda x: len(x.unique()))

In [None]:
data['label'].value_counts()

# EXPLORATORY DATA ANALYSIS

In [None]:
f,ax=plt.subplots(figsize=(12,6))
corr=data.corr()
sns.heatmap(corr , ax=ax ,   cmap="rocket_r")

In [None]:
data.columns

In [None]:
f= plt.figure(figsize=(20,5))
ax=f.add_subplot(121)
sns.distplot(data['N'] , color ='red',ax=ax)

ax=f.add_subplot(122)
sns.distplot(data['P'] , color ='green' , ax = ax)
plt.tight_layout()

In [None]:
f= plt.figure(figsize=(20,5))
ax=f.add_subplot(121)
sns.distplot(data['K'] , color ='red',ax=ax)

ax=f.add_subplot(122)
sns.distplot(data['temperature'] , color ='green' , ax = ax)
plt.tight_layout()

In [None]:
f= plt.figure(figsize=(20,5))
ax=f.add_subplot(121)
sns.distplot(data['humidity'] , color ='red',ax=ax)

ax=f.add_subplot(122)
sns.distplot(data['ph'] , color ='green' , ax = ax)
plt.tight_layout()

In [None]:
sns.distplot(data['rainfall'],color ='red')

In [None]:
f= plt.figure(figsize=(15,5))
sns.countplot(data['label'] , palette = 'Spectral')
plt.xticks(rotation=90)
plt.show()

# SEPERATING FEATURES AND TARGET LABEL

In [None]:
X = data.drop('label' ,axis =1)

In [None]:
X.head()

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
le = LabelEncoder()

In [None]:
data['label'] = le.fit_transform(data['label'])

In [None]:
data.head()

In [None]:
y = data['label']

In [None]:
y.head()

# TRAIN TEST SPLIT

In [None]:
model = []
accuracy = []

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2, random_state=42)

# DECISION TREE

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
DT = DecisionTreeClassifier()

In [None]:
DT.fit(X_train , y_train)

In [None]:
predict = DT.predict(X_test)

In [None]:
DT_accuracy = DT.score(X_test,y_test)

In [None]:
DT_accuracy

In [None]:
accuracy.append(DT_accuracy)

In [None]:
model.append('Decision Tree')

# LOGISTIC REGRESSION

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
LG = LogisticRegression()

In [None]:
LG.fit(X_train , y_train)

In [None]:
LG_accuracy = LG.score(X_test,y_test)

In [None]:
LG_accuracy

In [None]:
accuracy.append(LG_accuracy)

In [None]:
model.append('Logistic Regression')

# RANDOM FOREST

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
RF = RandomForestClassifier()

In [None]:
RF.fit(X_train , y_train)

In [None]:
RF_accuracy = RF.score(X_test,y_test)

In [None]:
RF_accuracy

In [None]:
accuracy.append(RF_accuracy)
model.append('Random Forest')

In [None]:
print(model)
accuracy

In [None]:
accuracy

In [None]:
sns.barplot(x = model , y = accuracy ,palette ='Spectral')