## Link to report 
### https://docs.google.com/document/d/1Amz4F5-KuK2CwyCt2EFSzCBJc6Gsqb1B2uor64YhlA8/edit?usp=sharing

In [1]:
%matplotlib inline

import seaborn as sns
import pandas as pd
import numpy as np

#sklearn
import sklearn
#models
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

#metrics
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.manifold import TSNE



In [2]:
df = pd.read_csv("iris.csv").drop(columns = ['Id'])
df

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
5,5.4,3.9,1.7,0.4,Iris-setosa
6,4.6,3.4,1.4,0.3,Iris-setosa
7,5.0,3.4,1.5,0.2,Iris-setosa
8,4.4,2.9,1.4,0.2,Iris-setosa
9,4.9,3.1,1.5,0.1,Iris-setosa


# The classes appear well seperated
### TSNE "compacts" demensions shows multidemensional relationsips in a 2d plot

In [4]:
print(df.columns)
print('features:', len(df.columns)-1)
dfnorm = (df.iloc[ : , :-1] - df.iloc[:,:-1].mean())/df.iloc[:,:-1].std()
dfnorm['Species'] = df['Species']
method = sklearn.manifold.TSNE(n_components=2, perplexity=40, n_iter=3000)
pl = method.fit_transform(dfnorm.drop(columns =['Species']).values, df.Species)


dfnorm['x-tsne'] = pl[:,0]
dfnorm['y-tsne'] = pl[:,1]

sns.lmplot(x='x-tsne', y='y-tsne', hue='Species', data = dfnorm, fit_reg=False, scatter_kws={'alpha':0.3})

Index(['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',
       'Species'],
      dtype='object')
features: 4


AttributeError: 'DataFrame' object has no attribute 'norm'

In [None]:
y = df['Species']
X = df.drop(['Species', 'y-tsne', 'x-tsne'], axis = 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)


    SGD Classifier method with cross validation

In [None]:
from sklearn.linear_model import SGDClassifier

sgd_clf = SGDClassifier(max_iter=5, tol=-np.infty, random_state=42)
sgd_clf.fit(X_train, y_train)
from sklearn.model_selection import cross_val_score
cross_val_score(sgd_clf, X_train, y_train, cv=3, scoring="accuracy")

# Trying with default forest

In [None]:
simp_clf = sklearn.ensemble.RandomForestClassifier()
simp_clf.fit(X_train, y_train)

y_pred = simp_clf.predict(X_test)
y_fit = simp_clf.predict(X_train)

In [None]:
print('cross_val:',cross_val_score(simp_clf, X_train, y_train, cv=3, scoring="accuracy"))
print('accuracy:', accuracy_score(y_test, y_pred))


In [None]:
conf_train = confusion_matrix(y_train, y_fit, labels = y_train.unique())
totaltrn = conf_train.sum()
#conf_train0_1 = conf_train/totaltrn
conf_train

In [None]:
conf_test = confusion_matrix(y_test, y_pred, labels = y_train.unique())
totaltst = conf_test.sum()
#conf_test0_1 = conf_test/totaltst
conf_test

In [None]:
sns.heatmap(conf_train, 
            xticklabels=y_train.unique(),
            yticklabels=y_train.unique(),
           cmap="icefire")

In [None]:
sns.heatmap(conf_test, 
            xticklabels=y_train.unique(),
            yticklabels=y_train.unique(),
           cmap="Greys")

# KNN

In [None]:
knn_clf = KNeighborsClassifier(n_jobs=-1, weights='distance', n_neighbors=5)
knn_clf.fit(X_train, y_train)
cross_val_score(knn_clf, X_train, y_train, cv=3, scoring="accuracy")

In [None]:
y_fit_knn = knn_clf.predict(X_train)
conf_train = confusion_matrix(y_train, y_fit_knn, labels = y_train.unique())
conf_train

In [None]:
y_pred_knn = knn_clf.predict(X_test)
conf_test = confusion_matrix(y_test, y_pred_knn, labels = y_train.unique())
conf_test

In [None]:
y_knn_pred = knn_clf.predict(X_test)
accuracy_score(y_test, y_knn_pred)

# Seems to work almost perfectly

In [None]:
sns.heatmap(conf_train, 
            xticklabels=y_train.unique(),
            yticklabels=y_train.unique(),
           cmap="icefire")

In [None]:
sns.heatmap(conf_test, 
            xticklabels=y_train.unique(),
            yticklabels=y_train.unique(),
           cmap="Greys")