# Medical Diagnosis with Support Vector Machines

## Task 1: Import Libraries



In [1]:
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

## Task 1: Get Data

In [5]:
column_names = ["pregnancies", "glucose", "bpressure", "skinfold", "insulin", "bmi", "pedigree", "age", "class"]
df=pd.read_csv("data.csv",names=column_names)
print(df.shape)
df.head()

(13, 9)


Unnamed: 0,pregnancies,glucose,bpressure,skinfold,insulin,bmi,pedigree,age,class
0,<html>,,,,,,,,
1,<head><title>504 Gateway Time-out</title></head>,,,,,,,,
2,<body>,,,,,,,,
3,<center><h1>504 Gateway Time-out</h1></center>,,,,,,,,
4,<hr><center>nginx</center>,,,,,,,,


## Task 1: Extract Features

In [None]:
X=df.iloc[:,:8]
X.head()

## Task 1: Extract Class Labels

In [None]:
y=df['class']
y.head()

## Task 2: Split Dataset

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=0)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
X_test.head()

## Task 2: Normalize Features

In [None]:
scaler=StandardScaler()
scaler.fit(X_train)
X_train=scaler.transform(X_train)
X_train[:5,:]

## Task 3: Training a Support Vector Machine

In [None]:
clf=svm.SVC(kernel='sigmoid')#classifier
clf.fit(X_train,y_train)

## Task 3: Decision Boundary

In [None]:
y_pred=clf.predict(X_train)
print(y_pred)
print(accuracy_score(y_train,y_pred))

## Task 3: SVM Kernels

In [None]:
#Hyperparameter Optimization
for k in ('linear','poly','rbf','sigmoid'):
    clf=svm.SVC(kernel=k)
    clf.fit(X_train,y_train)
    y_pred=clf.predict(X_train)
    print(k)
    print(accuracy_score(y_train,y_pred))

## Task 4: Instantiating the Best Model

In [None]:
clf=svm.SVC(kernel='rbf')
clf.fit(X_train,y_train)

## Task 4: Making a single prediction

In [None]:
# "pregnancies", "glucose", "bpressure", 
# "skinfold", "insulin", "bmi", 
# "pedigree", "age", "class"
patient=np.array([[1.,50.,75.,40.,0.,45.,1.5,20]])
patient=scaler.transform(patient)
clf.predict(patient)

## Task 4: Testing Set Prediction

In [None]:
patient=np.array([X_test.iloc[8]])
patient=scaler.transform(patient)
print(clf.predict(patient))
print(y_test.iloc[8])

## Task 5: Accuracy on Testing Set

In [None]:
X_test=scaler.transform(X_test)
y_pred=clf.predict(X_test)
print(accuracy_score(y_test,y_pred))


## Task 5: Comparison to All-Zero Prediction

In [None]:
y_zero=np.zeros(y_test.shape)
print(accuracy_score(y_test,y_zero))

## Task 5: Precision and Recall

In [None]:
print(classification_report(y_test,y_pred))