# Prediction using Decision Tree  Algorithm

## Imports

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

## Reading the dataset

In [None]:
df = pd.read_csv('Iris.csv')

In [None]:
df.head()

## Dropping id column

In [None]:
df = df.drop(['Id'], axis=1)

In [None]:
df.head()

## Shape of dataset

In [None]:
df.shape

## Exploratory Data Analysis(EDA)

In [None]:
df.describe()

In [None]:
df.info()

## Checking null values

In [None]:
sns.heatmap(df.isnull(), yticklabels=False, cbar=False, cmap='viridis')

## Visualizing data

In [None]:
sns.pairplot(df, hue='Species')

In [None]:
sns.boxplot(x = 'Species', y = 'SepalLengthCm', data=df)

In [None]:
sns.boxplot(x = 'Species', y = 'PetalWidthCm', data=df)

In [None]:
sns.boxplot(x = 'Species', y = 'PetalLengthCm', data=df)
sns.stripplot(x = 'Species', y = 'PetalLengthCm', jitter=True, edgecolor = 'gray', data=df)

In [None]:
sns.jointplot(x = 'SepalLengthCm', y = 'SepalWidthCm', data = df, kind='hex')

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

In [None]:
df['Species'] = le.fit_transform(df['Species'])
df.head()

## Splitting data

In [None]:
from sklearn.model_selection import train_test_split

X = df.drop(columns=['Species'])
y = df['Species']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state=100)

## Building Decision tree model

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
model = DecisionTreeClassifier()

In [None]:
model.fit(X_train, y_train)

In [None]:
prediction = model.predict(X_test)

## Checking the accuracy

In [None]:
from sklearn import metrics

In [None]:
metrics.accuracy_score(y_test, prediction)

## Classification report and confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
print(confusion_matrix(y_test, prediction))
print(classification_report(y_test, prediction))

## Plotting

In [None]:
from sklearn import tree

In [None]:
clf = tree.DecisionTreeClassifier()

In [None]:
clf = clf.fit(X, y)

In [None]:
fname = ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',
       'Species']
cname = ['Setosa', 'Versicolor', 'Virginica']

In [None]:
plt.figure(figsize=(20,15))
tree.plot_tree(clf, feature_names=fname, class_names=cname, filled=True)