# Diabetes prediction using support vector machines

In [520]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
from sklearn.preprocessing import MinMaxScaler,StandardScaler



In [521]:
data= pd.read_csv('diabetes.csv')
df= pd.DataFrame(data)

df.head(10)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
5,5,116,74,0,0,25.6,0.201,30,0
6,3,78,50,32,88,31.0,0.248,26,1
7,10,115,0,0,0,35.3,0.134,29,0
8,2,197,70,45,543,30.5,0.158,53,1
9,8,125,96,0,0,0.0,0.232,54,1


**setting up features and targets**

In [522]:
x= df[['Pregnancies','Glucose','BloodPressure','SkinThickness','Insulin','BMI','Age']]
y= df['Outcome']

X_train, X_test, y_train, y_test= train_test_split(x,y, test_size=0.2, random_state=42)

scaler=MinMaxScaler()

X_train_scaled= scaler.fit_transform(X_train)
X_test_scaled= scaler.transform(X_test)

svc= SVC(
    kernel='rbf',
    gamma=0.2,
    C=0.2,
    class_weight='balanced'
)

cross_score= cross_val_score(svc, X_train_scaled,y_train,cv=5,scoring='balanced_accuracy')

svc.fit(X_train_scaled,y_train)

y_pred= svc.predict(X_test_scaled)

test_accuracy= accuracy_score(y_test,y_pred)
recall= recall_score(y_test,y_pred)
precision= precision_score(y_test,y_pred)
f1= f1_score(y_test,y_pred)


print(f'cv scores:{cross_score}')
print(f'mean CV score: {cross_score.mean()}')
print(f"test accuracy: {test_accuracy}")
print(f'Recall score:{recall}')
print(f"Precision score:{precision}")
print(f"f1 score:{f1}")

cv scores:[0.70414462 0.73633721 0.7497093  0.67281977 0.71517857]
mean CV score: 0.7156378942619253
test accuracy: 0.7337662337662337
Recall score:0.7636363636363637
Precision score:0.6
f1 score:0.672
