# Build the KNN model 

Now that the data is prepared we can build the model for predictions an evaluations. <br>
I will be using the K nearest neighbours model to find organisations that are similar to each other. 

In [None]:
# Import libraries

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sqlalchemy import create_engine

%matplotlib inline

In [None]:
engine = create_engine('sqlite:///../data/customers_with_behaviours.db')
df_feat_contacted_initial = pd.read_sql_table('feat_all_contacted', engine)
df_feat_contacted_initial.head()

In [None]:
df_all_data_contacted = pd.read_sql_table('all_data_contacted', engine)
df_all_data_contacted.head()

In [None]:
X = df_feat_contacted_initial
y = df_all_data_contacted['CONVERTED']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

num_neighbors = 1
knn = KNeighborsClassifier(n_neighbors=num_neighbors)
knn.fit(X_train, y_train)
pred = knn.predict(X_test)

In [None]:
print(f'WITH K={num_neighbors}\n')
print('\nCONFUSION MATRIX:')
print(confusion_matrix(y_test, pred))
print('\nCLASSIFICATION REPORT:')
print(classification_report(y_test,pred))

#### Improve the model with a better K value
The initial K value was arbitrary to prove the functioning of the model. <br>
Lets try to improve accuracy by finding a better K value for KNN

In [None]:
error_rate = []

for i in range(1, 40):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train,y_train)
    pred_i = knn.predict(X_test)
    error_rate.append(np.mean(pred_i != y_test))

In [None]:
plt.figure(figsize=(10,6))
plt.plot(range(1,40),error_rate,color='blue', linestyle='dashed', marker='o',
         markerfacecolor='red', markersize=10)
plt.title('Error Rate vs. K Value')
plt.xlabel('K')
plt.ylabel('Error Rate')

In [None]:
num_neighbors = 33
knn = KNeighborsClassifier(n_neighbors=num_neighbors)
knn.fit(X_train, y_train)
pred = knn.predict(X_test)

In [None]:
print(f'WITH K={num_neighbors}\n')
print('\nCONFUSION MATRIX:')
print(confusion_matrix(y_test, pred))
print('\nCLASSIFICATION REPORT:')
print(classification_report(y_test,pred))