In [None]:
import pandas as pd

from sklearn import metrics
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt


# read training data
diabetes_train = pd.read_csv("/data/training/diabetes_train.csv")

# read test data
diabetes_test = pd.read_csv("/data/test/diabetes_test.csv")

print(diabetes_train.head())
print(diabetes_test.head())


# creat x and y train 
x_train = diabetes_train.iloc[:, :7]
y_train = diabetes_train.iloc[:, 7]

# tune the model
n_folds = 5

# specify range of parameters (C) as a list
params = {"C": [0.1, 1, 10, 100, 1000]}

# create SVC object
linear_model = SVC()

# set up grid search scheme
model_cv = GridSearchCV(estimator = linear_model, param_grid = params, 
                        scoring= 'accuracy', 
                        cv = n_folds, 
                        verbose = 1,
                       return_train_score=True)      



# fit
model_cv.fit(x_train, y_train)

# results
cv_results = model_cv.cv_results_
print(pd.DataFrame(model_cv.cv_results_))


# plot C versus train and test scores
plt.figure(figsize=(8, 6))
plt.plot(cv_results['param_C'], cv_results['mean_test_score'])
plt.plot(cv_results['param_C'], cv_results['mean_train_score'])
plt.xlabel('C')
plt.ylabel('Accuracy')
plt.legend(['test accuracy', 'train accuracy'], loc='upper left')
plt.xscale('log')
plt.show()
plt.savefig('hyperparam_c.png') 

# best hyperparameters
best_score = model_cv.best_score_
best_C = model_cv.best_params_['C']

print(best_score)
print(best_C)

#  choose best C
C = 1

# model with best C
linear_model = SVC(C=1)
linear_model.fit(x_train, y_train)


# make predictions
print(diabetes_test.head())
predictions = linear_model.predict(diabetes_test.iloc[:, :7])
predictions[:5]

# YOUR CODE ENDS HERE

# write columns id, predictions into the output file
d = pd.DataFrame({'id': diabetes_test['id'], 'Diabetes_Predicted': predictions})
print("\n", "d", "\n", d.head())

# write the output
d.to_csv('/code/output/diabetes_predictions.csv', sep=",")