**IMPORTING DEPENDENCIES**

In [443]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.preprocessing import normalize

**LOADING THE DATA**

In [444]:
df = pd.read_csv('iris.data')

**CHECKING OUT OUR DATA**

In [445]:
df.head()

Unnamed: 0,5.1,3.5,1.4,0.2,Iris-setosa
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa


**CHECKING FOR NULL VALUES**

In [446]:
df.isnull().any().any()

False

**ADDING COLUMN NAMES**

In [447]:
df.columns = ['field1' , 'field2' , 'field3' , 'field4' , 'type']

In [448]:
df.head()

Unnamed: 0,field1,field2,field3,field4,type
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa


SO WE HAVE 4 INDEPENDENT VARIABLES IN OUR DATASET AND 1 INDEPENDENT VARIABLE. SO WE HAVE TO STANDARDISE OUR DATA BUT BEFORE THAT LET'S CHECK IF OUR DATA HAS ANY UNWANTED VALUES

In [449]:
df.dtypes

field1    float64
field2    float64
field3    float64
field4    float64
type       object
dtype: object

In [450]:
df['type'].unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

**DEFINING DEPENDENT AND INDEPENDENT VARIABLES**

In [451]:
x = df.iloc[: , :-1].values
y = df.iloc[: , -1].values

**STANDARDISING OUR DATAFRAME**

In [452]:
#x = normalize(x , axis = 0)
sc = StandardScaler()
x = sc.fit_transform(x)

**LABEL ENCODING THE DEPENDENT VARIABLES**

In [453]:
lb = LabelEncoder()
y = lb.fit_transform(y)

**SPLIT TO TRAIN AND TESTSET**

In [454]:
x_train , x_test , y_train , y_test = train_test_split(x, y, test_size = 0.1, random_state = 0)

**TIME TO TRAIN ON DIFFERENT MODELS AND FIND THE BEST ACCURACY**

**MODEL 1 : DECISION TREES**

In [455]:
model_1 = DecisionTreeClassifier(max_leaf_nodes = 4 , random_state = 0)
model_1.fit(x_train, y_train)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=4,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=0, splitter='best')

In [456]:
y_pred = model_1.predict(x_test)

In [457]:
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[5 0 0]
 [0 5 0]
 [0 1 4]]


0.9333333333333333

**MODEL 2 : K NEAREST NEIGHBOURS**

In [458]:
model_2 = KNeighborsClassifier(n_neighbors = 7, metric = 'minkowski', p = 2)
model_2.fit(x_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=7, p=2,
                     weights='uniform')

In [459]:
y_pred = model_2.predict(x_test)

In [460]:
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[5 0 0]
 [0 5 0]
 [0 1 4]]


0.9333333333333333

**MODEL 3 : RANDOM FOREST CLASSIFIER**

In [461]:
model_3 = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
model_3.fit(x_train, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=0, verbose=0,
                       warm_start=False)

In [462]:
y_pred = model_3.predict(x_test)

In [463]:
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[5 0 0]
 [0 5 0]
 [0 1 4]]


0.9333333333333333

**MODEL 4 : NAIVE BAYES CLASSIFIER**

In [464]:
model_4 = GaussianNB()
model_4.fit(x_train, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [465]:
y_pred = model_4.predict(x_test)

In [466]:
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[5 0 0]
 [0 5 0]
 [0 2 3]]


0.8666666666666667

**MODEL 5 : SUPPORT VECTOR MACHINE(LINEAR)**

In [467]:
model_5 = SVC(kernel = 'linear', random_state = 0)
model_5.fit(x_train, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=0, shrinking=True, tol=0.001,
    verbose=False)

In [468]:
y_pred = model_5.predict(x_test)

In [469]:
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[5 0 0]
 [0 5 0]
 [0 1 4]]


0.9333333333333333

**MODEL 6 : SUPPORT VECTOR MACHINE(NON LINEAR)**

In [470]:
model_6 =  SVC(kernel = 'rbf', random_state = 0)
model_6.fit(x_train, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=0, shrinking=True, tol=0.001,
    verbose=False)

In [471]:
y_pred = model_6.predict(x_test)

In [472]:
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[5 0 0]
 [0 5 0]
 [0 1 4]]


0.9333333333333333

**MODEL 7 : LOGISTIC REGRESSION**

In [473]:
model_7 = LogisticRegression(random_state= 0)
model_7.fit(x_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=0, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [474]:
y_pred = model_7.predict(x_test)

In [475]:
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[5 0 0]
 [0 5 0]
 [0 2 3]]


0.8666666666666667

**TRYING NEURAL NETWORKS**

In [479]:
import tensorflow as tf
from keras.models import Sequential 
from keras.layers import Dense,Activation,Dropout 
from keras.utils import np_utils

In [482]:
df=df.iloc[np.random.permutation(len(df))]

In [483]:
df.loc[df["type"]=="Iris-setosa","type"]=0
df.loc[df["type"]=="Iris-versicolor","type"]=1
df.loc[df["type"]=="Iris-virginica","type"]=2

In [487]:
X=df.iloc[:,:-1].values
y=df.iloc[:,-1].values

In [490]:
X_normalized = normalize(X , axis = 0)

In [491]:
total_length=len(df)
train_length=int(0.8*total_length)
test_length=int(0.2*total_length)

X_train=X_normalized[:train_length]
X_test=X_normalized[train_length:]
y_train=y[:train_length]
y_test=y[train_length:]

In [492]:
y_train=np_utils.to_categorical(y_train,num_classes=3)
y_test=np_utils.to_categorical(y_test,num_classes=3)

In [493]:
model=Sequential()
model.add(Dense(1000,input_dim=4,activation='relu'))
model.add(Dense(500,activation='relu'))
model.add(Dense(300,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(3,activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 1000)              5000      
_________________________________________________________________
dense_1 (Dense)              (None, 500)               500500    
_________________________________________________________________
dense_2 (Dense)              (None, 300)               150300    
_________________________________________________________________
dropout (Dropout)            (None, 300)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 903       
Total params: 656,703
Trainable params: 656,703
Non-trainable params: 0
_________________________________________________________________


In [494]:
model.fit(X_train,y_train,validation_data=(X_test,y_test),batch_size=20,epochs=10,verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f2d07b72490>

In [495]:
prediction=model.predict(X_test)
length=len(prediction)
y_label=np.argmax(y_test,axis=1)
predict_label=np.argmax(prediction,axis=1)

accuracy=np.sum(y_label==predict_label)/length * 100 
print("Accuracy of the dataset",accuracy )

Accuracy of the dataset 96.66666666666667
