In [63]:
import pydotplus
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [51]:
df = pd.read_csv("data/Maternal Health Risk Data Set.csv")
df.head()

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel
0,25,130,80,15.0,98.0,86,high risk
1,35,140,90,13.0,98.0,70,high risk
2,29,90,70,8.0,100.0,80,high risk
3,30,140,85,7.0,98.0,70,high risk
4,35,120,60,6.1,98.0,76,low risk


In [52]:
print(df.RiskLevel.value_counts())

RiskLevel
low risk     406
mid risk     336
high risk    272
Name: count, dtype: int64


In [58]:
X = df.drop(columns = "RiskLevel")
y = df.RiskLevel
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) 

In [74]:
clf = KNeighborsClassifier()
scores = cross_val_score(clf, X_train, y_train, cv=5)

print('Validation Accuracy, unscaled', scores.mean())


Validation Accuracy, unscaled 0.7024572969733294


In [75]:
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

scores = cross_val_score(clf, X_train_scaled, y_train, cv=5)

print('Validation Accuracy, min-max', scores.mean())

Validation Accuracy, min-max 0.7024572969733294


In [76]:
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)

scores = cross_val_score(clf, X_train_scaled, y_train, cv=5)

print('Validation Accuracy, z-scaled', scores.mean())

Validation Accuracy, z-scaled 0.6784437119168915


In [89]:
for k in range(1,15) :
    clf = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(clf, X_train, y_train, cv=5)
    print("k: ", k, "validation accuracy: ", scores.mean())


k:  1 validation accuracy:  0.7954749775247228
k:  2 validation accuracy:  0.7404754769753271
k:  3 validation accuracy:  0.70524423134552
k:  4 validation accuracy:  0.7010188792328439
k:  5 validation accuracy:  0.7024572969733294
k:  6 validation accuracy:  0.7052642093696934
k:  7 validation accuracy:  0.6996204175407051
k:  8 validation accuracy:  0.6911597243032664
k:  9 validation accuracy:  0.6813105583857757
k:  10 validation accuracy:  0.6855159324742783
k:  11 validation accuracy:  0.6770452502247528
k:  12 validation accuracy:  0.678423733892718
k:  13 validation accuracy:  0.6713814803715913
k:  14 validation accuracy:  0.6713814803715913


In [95]:
clf = KNeighborsClassifier(n_neighbors = 1)
clf.fit(X_train,y_train)
print('Test Accuracy: ', clf.score(X_test, y_test))

Test Accuracy:  0.7934426229508197


In [94]:
yhat = clf.predict(X_test)
cm = confusion_matrix(y_test, yhat)
print(cm)

[[67  7 13]
 [ 5 89 23]
 [14 39 48]]
