In [1]:
import pandas as pd
import numpy as np
from env import user, password, host
import acquire
import prepare
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.preprocessing import MinMaxScaler as MMScaler

from sklearn.neighbors import KNeighborsClassifier as KNN

## Acquire, Prepare, and Use Data in Data Frame

In [2]:
creports = []

In [3]:
import acquire, prepare

In [4]:
idf = prepare.prep_iris(acquire.get_iris_data())

In [5]:
tdf = prepare.prep_titanic(acquire.get_titanic_data())

In [6]:
idf.columns

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'species'],
      dtype='object')

In [7]:
X = idf.drop(columns='species')
y = idf[['species']]

## Split into Test and Train, Model, Predict

In [8]:
X_train, X_test, y_train, y_test =train_test_split(X,y, random_state = 123)

In [9]:
knn = KNN()
knn.fit(X_train,y_train)
xtrpred= knn.predict(X_train)

  


In [10]:
confusion_matrix(y_train,xtrpred)

array([[34,  0,  0],
       [ 0, 41,  1],
       [ 0,  1, 35]])

In [11]:
iris_creport1 = classification_report(y_train,xtrpred)
creports.append(iris_creport1)
print(iris_creport1)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        34
           1       0.98      0.98      0.98        42
           2       0.97      0.97      0.97        36

   micro avg       0.98      0.98      0.98       112
   macro avg       0.98      0.98      0.98       112
weighted avg       0.98      0.98      0.98       112



## Repeat with more Neighbors

In [12]:
knn2 = KNN(n_neighbors=10)
knn2.fit(X_train,y_train)
xtrpred= knn2.predict(X_train)

  


In [13]:
confusion_matrix(y_train,xtrpred)

array([[34,  0,  0],
       [ 0, 41,  1],
       [ 0,  2, 34]])

In [14]:
iris_creport2 = classification_report(y_train,xtrpred)
creports.append(iris_creport2)
print(iris_creport2)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        34
           1       0.95      0.98      0.96        42
           2       0.97      0.94      0.96        36

   micro avg       0.97      0.97      0.97       112
   macro avg       0.97      0.97      0.97       112
weighted avg       0.97      0.97      0.97       112



## Even more neighbors

In [15]:
knn3 = KNN(n_neighbors=20)
knn3.fit(X_train,y_train)
xtrpred= knn3.predict(X_train)

  


In [16]:
confusion_matrix(y_train,xtrpred)

array([[34,  0,  0],
       [ 0, 41,  1],
       [ 0,  3, 33]])

In [17]:
iris_creport3 = classification_report(y_train,xtrpred)
creports.append(iris_creport3)
print(iris_creport3)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        34
           1       0.93      0.98      0.95        42
           2       0.97      0.92      0.94        36

   micro avg       0.96      0.96      0.96       112
   macro avg       0.97      0.96      0.97       112
weighted avg       0.96      0.96      0.96       112



## Titanic KNN modeling 

In [18]:
X = tdf[['pclass','fare','embarked','alone']]
y = tdf[['survived']]

In [19]:
X_train, X_test, y_train, y_test =train_test_split(X,y, random_state = 123)

In [20]:
knn = KNN()
knn.fit(X_train,y_train)
xtrpred= knn.predict(X_train)

  


In [21]:
confusion_matrix(y_train,xtrpred)

array([[339,  71],
       [ 97, 161]])

In [22]:
titanic_creport1 = classification_report(y_train,xtrpred)
creports.append(titanic_creport1)
print(titanic_creport1)

              precision    recall  f1-score   support

           0       0.78      0.83      0.80       410
           1       0.69      0.62      0.66       258

   micro avg       0.75      0.75      0.75       668
   macro avg       0.74      0.73      0.73       668
weighted avg       0.75      0.75      0.75       668



## Titanic with more Neighbors

In [None]:
knn2 = KNN(n_neighbors=10)
knn2.fit(X_train,y_train)
xtrpred= knn2.predict(X_train)

In [24]:
confusion_matrix(y_train,xtrpred)

array([[349,  61],
       [133, 125]])

In [25]:
titanic_creport2 = classification_report(y_train,xtrpred)
creports.append(titanic_creport2)
print(titanic_creport2)

              precision    recall  f1-score   support

           0       0.72      0.85      0.78       410
           1       0.67      0.48      0.56       258

   micro avg       0.71      0.71      0.71       668
   macro avg       0.70      0.67      0.67       668
weighted avg       0.70      0.71      0.70       668



## Even more Neighbors

In [26]:
knn3 = KNN(n_neighbors=20)
knn3.fit(X_train,y_train)
xtrpred= knn3.predict(X_train)

  


In [27]:
confusion_matrix(y_train,xtrpred)

array([[360,  50],
       [153, 105]])

In [28]:
titanic_creport3 = classification_report(y_train,xtrpred)
creports.append(titanic_creport3)
print(titanic_creport3)

              precision    recall  f1-score   support

           0       0.70      0.88      0.78       410
           1       0.68      0.41      0.51       258

   micro avg       0.70      0.70      0.70       668
   macro avg       0.69      0.64      0.64       668
weighted avg       0.69      0.70      0.68       668



## KNN Titanic (5 Neighbors)
#### Min Max Scaling on X_train

In [29]:
mmscaler = MMScaler()

In [30]:
mmscaler.fit(X_train)

  return self.partial_fit(X, y)


MinMaxScaler(copy=True, feature_range=(0, 1))

In [31]:
mmscaler.transform(X_train)

array([[0.        , 0.26086743, 1.        , 0.        ],
       [1.        , 0.02975782, 0.        , 0.        ],
       [1.        , 0.01511079, 0.66666667, 1.        ],
       ...,
       [1.        , 0.01546857, 1.        , 1.        ],
       [1.        , 0.01415106, 1.        , 1.        ],
       [1.        , 0.01512699, 0.66666667, 1.        ]])

In [32]:
X_train = mmscaler.transform(X_train)

In [33]:
knn4 = KNN(n_neighbors=5)
knn4.fit(X_train,y_train)
xtrpred= knn4.predict(X_train)

  


In [34]:
confusion_matrix(y_train,xtrpred)

array([[341,  69],
       [ 91, 167]])

In [35]:
titanic_creport4 = classification_report(y_train,xtrpred)
creports.append(titanic_creport4)
print(titanic_creport4)

              precision    recall  f1-score   support

           0       0.79      0.83      0.81       410
           1       0.71      0.65      0.68       258

   micro avg       0.76      0.76      0.76       668
   macro avg       0.75      0.74      0.74       668
weighted avg       0.76      0.76      0.76       668



In [36]:
for i in creports:
    print(i)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        34
           1       0.98      0.98      0.98        42
           2       0.97      0.97      0.97        36

   micro avg       0.98      0.98      0.98       112
   macro avg       0.98      0.98      0.98       112
weighted avg       0.98      0.98      0.98       112

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        34
           1       0.95      0.98      0.96        42
           2       0.97      0.94      0.96        36

   micro avg       0.97      0.97      0.97       112
   macro avg       0.97      0.97      0.97       112
weighted avg       0.97      0.97      0.97       112

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        34
           1       0.93      0.98      0.95        42
           2       0.97      0.92      0.94        36

   micro avg       0