# 1. Data Preparation

In [1]:
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_csv('diabetes.csv')
X = df[['Glucose', 'BloodPressure', 'Insulin', 'BMI', 'Age']]
y = df['Diabetes']

# 2. Train SVM Model

In [2]:
from sklearn.svm import SVC

# Split data using custom function (task 3)
def custom_train_test_split(X, y, test_size=0.3, random_state=None):
    if random_state:
        np.random.seed(random_state)
    indices = np.arange(len(X))
    np.random.shuffle(indices)
    split_idx = int(len(X) * (1 - test_size))
    X_train = X.iloc[indices[:split_idx]]
    X_test = X.iloc[indices[split_idx:]]
    y_train = y.iloc[indices[:split_idx]]
    y_test = y.iloc[indices[split_idx:]]
    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = custom_train_test_split(X, y, test_size=0.3, random_state=42)

# Train SVM with linear kernel
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)

# 3. Hyperplane Equation & Support Vectors

In [3]:
# For linear kernel
w = svm_model.coef_[0]
b = svm_model.intercept_[0]
print(f"Hyperplane: {w[0]:.2f}*Glucose + {w[1]:.2f}*BP + {w[2]:.2f}*Insulin + {w[3]:.2f}*BMI + {w[4]:.2f}*Age + {b:.2f} = 0")
print("Support Vectors:\n", svm_model.support_vectors_)

Hyperplane: 0.03*Glucose + -0.01*BP + -0.00*Insulin + 0.06*BMI + 0.02*Age + -6.15 = 0
Support Vectors:
 [[136.   90.    0.   29.9  50. ]
 [103.   72.  190.   37.7  55. ]
 [117.    0.    0.   33.8  44. ]
 ...
 [155.   62.  495.   34.   46. ]
 [ 90.   85.    0.   34.9  56. ]
 [129.   64.  115.   26.4  28. ]]


# 4. Confusion Matrix (From Scratch)


In [4]:
def custom_confusion_matrix(y_true, y_pred):
    tp = np.sum((y_true == 1) & (y_pred == 1))
    tn = np.sum((y_true == 0) & (y_pred == 0))
    fp = np.sum((y_true == 0) & (y_pred == 1))
    fn = np.sum((y_true == 1) & (y_pred == 0))
    return np.array([[tn, fp], [fn, tp]])

y_pred = svm_model.predict(X_test)
cm = custom_confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

Confusion Matrix:
 [[137  15]
 [ 40  39]]


# 5. Calculate Metrics

In [5]:
tn, fp, fn, tp = cm.ravel()
accuracy = (tp + tn) / (tp + tn + fp + fn)
print(f"TP: {tp}, FN: {fn}, FP: {fp}, TN: {tn}")
print(f"Accuracy: {accuracy:.2f}")

TP: 39, FN: 40, FP: 15, TN: 137
Accuracy: 0.76


# 6. GridSearch for Best Hyperparameters


In [6]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['rbf', 'linear']
}
grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=2)
grid.fit(X_train, y_train)

print("Best Hyperparameters:", grid.best_params_)
best_model = grid.best_estimator_

Fitting 5 folds for each of 32 candidates, totalling 160 fits
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=   0.1s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=   0.0s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=   0.0s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=   0.0s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=   0.0s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.0s
[CV] END .......................C=0.1, gamma=0.

KeyboardInterrupt: 

# 7. Re-evaluate with Best Model


In [7]:
y_pred_grid = best_model.predict(X_test)
cm_grid = custom_confusion_matrix(y_test, y_pred_grid)
tn_g, fp_g, fn_g, tp_g = cm_grid.ravel()
accuracy_g = (tp_g + tn_g) / (tp_g + tn_g + fp_g + fn_g)
print(f"Optimized Accuracy: {accuracy_g:.2f}")

NameError: name 'best_model' is not defined

# 8. 2D Scatter Plots
