# Introduction

Before anything can continue, libaries need to be imported and structures defined. 


In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.svm import LinearSVC
from sklearn.svm import SVC

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures

from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV

In [2]:
## Load the CSV file

data = pd.read_csv('diabetes.csv')

print(data[:10])
print(data.shape)

   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   
5            5      116             74              0        0  25.6   
6            3       78             50             32       88  31.0   
7           10      115              0              0        0  35.3   
8            2      197             70             45      543  30.5   
9            8      125             96              0        0   0.0   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   2

In [3]:


##split the data into training and testing data
X = data.drop('Outcome', axis=1)
y = data['Outcome']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# ## Create a 2d stack of data
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)


# #convert y_train and y_test to numpy array
# y_train = y_train.to_numpy()
# y_test = y_test.to_numpy()

In [5]:
# clf = SVC(
# 	kernel='poly',
# 	max_iter=7000,
# 	tol=0.0001,
# 	random_state=23,
# 	C=100.0,
# 	degree=1,
# 	decision_function_shape='ovo'

# 	)
# clf.fit(X_train, y_train)

In [6]:
# pred_clf = clf.predict(X_test)

# print('Accuracy: ', accuracy_score(y_test, pred_clf))
# print(pred_clf)

In [7]:
def false_negative_ratio(y_true, y_pred):
    # Get the confusion matrix
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[0, 1]).ravel()
    
    # Calculate False Negative Ratio
    fnr = fn / (fn + tp) if (fn + tp) > 0 else 0
    
    return fnr

In [8]:
class testData:

	def __init__(self, data):
		self.kernel 					= data['kernel']
		self.max_iter 					= data['max_iter']
		self.func_shape 				= data['decision_function_shape']
		self.probability 				= data['probability']
		self.shrinking 					= data['shrinking']
		self.tollerance 				= data['tollerance']
		self.C 							= data['C']

		self.closed_fnr 		= None
		self.closed_recall		= None
		self.closed_f1 			= None
		self.closed_precision 	= None
		self.closed_accuracy 	= None

		self.closed_pred 		= None

	def predict(self):
		closed_clf = make_pipeline(StandardScaler(), SVC(kernel=self.kernel, 
																max_iter=self.max_iter, 
																decision_function_shape=self.func_shape, 
																probability=self.probability, 
																shrinking=self.shrinking,
																C=self.C,
																tol=self.tollerance
																))
		
		closed_clf.fit(X_train, y_train)

		self.closed_pred 		= closed_clf.predict(X_test)
		self.closed_accuracy 	= accuracy_score(y_test, self.closed_pred)
		self.closed_recall		= recall_score(y_test, self.closed_pred)
		self.closed_f1 			= f1_score(y_test, self.closed_pred)
		self.closed_precision 	= precision_score(y_test, self.closed_pred)
		self.closed_fnr 		= false_negative_ratio(y_test, self.closed_pred)

In [9]:
current_test_data = {
	'kernel': None,#
	'max_iter': None,
	'decision_function_shape': None,#
	'probability': None,#
	'shrinking': None,#
	'C': None,
	'tollerance':None#
}



solution_list = []
accuracy_list = []

In [10]:
C_index = np.linspace(1, 100, 100)
max_iter_index = np.linspace(10, 10000, 100).astype(int)
tollerance_index = np.linspace(0.0001, 0.1, 100)


kernel_types = ['linear', 'poly', 'rbf', 'sigmoid']
decision_function_shape = ['ovo', 'ovr']
probability_opts = [True, False]


iteration_count = 0

In [11]:
def check_best(current_test):
	if(len(solution_list) == 0):
		solution_list.append(current_test)
		accuracy_list.append(100)

		with open('best_params.txt', 'w') as f:
			f.write("[\n")
		return

	for i in range(len(solution_list)):

		if current_test.closed_fnr < accuracy_list[i]:
			solution_list.append(current_test)
			accuracy_list.append(current_test.closed_fnr)

			with open('best_params.txt', 'a') as f:
				f.write(str(current_test.__dict__) + ",\n")



			if(len(solution_list) > 10):
				solution_list.pop(0)
				accuracy_list.pop(0)

			return

In [12]:

number_of_tests = (len(kernel_types) * len(decision_function_shape) * ( len(probability_opts) * 2 ) * len(max_iter_index) * len(tollerance_index))

for kernel in kernel_types:
	for function_shape in decision_function_shape:
		for probability in probability_opts:
			for shrinking in probability_opts:
				for max_iter in max_iter_index:

					for tol in tollerance_index:

						current_test_data['C'] = 7
						current_test_data['kernel'] = kernel
						current_test_data['max_iter'] = max_iter
						current_test_data['decision_function_shape'] = function_shape
						current_test_data['probability'] = probability
						current_test_data['shrinking'] = shrinking
						current_test_data['tollerance'] = tol

						test = testData(current_test_data)
						test.predict()

						check_best(test)

						iteration_count += 1

						with open('iterations.txt', 'w') as f:
							f.write(str(iteration_count) + "/" + str(number_of_tests) + "\n")

							


						with open('top10Solutions.json', 'w') as f:
							f.write("[\n")
							for i in range(len(solution_list)):
								f.write("{\n")
								f.write('"kernel": ' + ' " ' + solution_list[i].kernel + ' " ' + ",\n")
								f.write('"max_iter": ' + ' " ' + str(solution_list[i].max_iter) + ' " ' + ",\n")
								f.write('"decision_function_shape": ' + ' " ' + str(solution_list[i].func_shape) + ' " ' + ",\n")
								f.write('"probability": ' + ' " ' + str(solution_list[i].probability) + ' " ' + ",\n")
								f.write('"shrinking": ' + ' " ' + str(solution_list[i].shrinking) + ' " ' + ",\n")
								f.write('"C": ' + str(solution_list[i].C) + ' " ' + ",\n")
								f.write('"tollerance": ' + ' " ' + str(solution_list[i].tollerance) + ' " ' + ",\n")
								f.write('"accuracy": ' + ' " ' + str(accuracy_list[i]) + ",\n")
								f.write('"fnr": ' + ' " ' + str(solution_list[i].closed_fnr) + ",\n")
								f.write('"recall": ' + ' " ' + str(solution_list[i].closed_recall) + ' " ' + ",\n")
								f.write('"f1": ' + ' " ' + str(solution_list[i].closed_f1) + ' " ' + ",\n")
								f.write('"precision": ' + ' " ' + str(solution_list[i].closed_precision) + ' " ' + ",\n")

								np_to_to_list = solution_list[i].closed_pred.tolist()
								f.write("pred: " + ' " ' + str(np_to_to_list) + ' " ' + "\n")

								if(i == len(solution_list) - 1):
									f.write("}\n")

								else:
									f.write("},\n")

							f.write("]\n") 





KeyboardInterrupt: 

In [None]:
def plot_2D_dataset(X, y, column_ids):
	fig = plt.figure()
	ax = fig.add_subplot()

	

	_x = X[:, column_ids[0]]
	_y = X[:, column_ids[1]]

	ax.plot(_x[y==0], _y[y==0], "bs")
	ax.plot(_x[y==1], _y[y==1], "g^")

	ax.set_xlabel(data.columns[column_ids[0]])
	ax.set_ylabel(data.columns[column_ids[1]])

	plt.show()

In [None]:
# for i in range(0, 8):
# 	for j in range(i+1, 8):
# 		plot_2D_dataset(X_train, y_train, [i, j])
# 		plt.show()

In [None]:
%matplotlib widget
def plot_3d_dataset(X, y, column_ids):


	_x = X[:, column_ids[0]]
	_y = X[:, column_ids[1]]
	_z = X[:, column_ids[2]]


	fig = plt.figure()
	ax = fig.add_subplot(111, projection='3d')
	ax.scatter(_x[y==0], _y[y==0], _z[y==0], c='b', marker='o')
	ax.scatter(_x[y==1], _y[y==1], _z[y==1], c='r', marker='^')

	

	ax.set_xlabel(data.columns[column_ids[0]])
	ax.set_ylabel(data.columns[column_ids[1]])
	ax.set_zlabel(data.columns[column_ids[2]])


	ax.view_init(elev=20, azim=45)
	plt.show()