# Diabetes prediction using support vector machines

In [105]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split,GridSearchCV,RandomizedSearchCV
from sklearn.metrics import balanced_accuracy_score,recall_score,precision_score,f1_score
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from scipy.stats import uniform


In [106]:
data= pd.read_csv('diabetes.csv')
df= pd.DataFrame(data)

df.head(10)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
5,5,116,74,0,0,25.6,0.201,30,0
6,3,78,50,32,88,31.0,0.248,26,1
7,10,115,0,0,0,35.3,0.134,29,0
8,2,197,70,45,543,30.5,0.158,53,1
9,8,125,96,0,0,0.0,0.232,54,1


**setting up features and targets**

In [107]:
x= df[['Pregnancies','Glucose','BloodPressure','SkinThickness','Insulin','BMI','Age']]
y= df['Outcome']

X_train, X_test, y_train, y_test= train_test_split(x,y,test_size=0.2,random_state=42)

scaler=StandardScaler()

X_train_Scaled= scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)

model= SVC()

param_grid=[{
        'kernel': ['linear'],
        'C': [0.01, 0.1, 1, 10],
        'class_weight': ['balanced']
    },
    {
        'kernel': ['rbf'],
        'C': [0.5, 1, 10],
        'gamma': [0.01, 0.3, 1],
        'class_weight': ['balanced']
    }
]
grid= GridSearchCV(model,param_grid,cv=5,scoring='balanced_accuracy')
grid.fit(X_train_Scaled,y_train)

print("best params:",grid.best_params_)
print("best cv score:",grid.best_score_)


best_SVM= grid.best_estimator_
best_pred= best_SVM.predict(X_test_scaled)

print("test score:",balanced_accuracy_score(y_test,best_pred))







best params: {'C': 0.5, 'class_weight': 'balanced', 'gamma': 0.3, 'kernel': 'rbf'}
best cv score: 0.7636714552315327
test score: 0.7010101010101011
