In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as mp
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC

In [None]:
df = pd.read_csv('diabetes.csv')

In [None]:
df['Outcome'] = df['Outcome'].replace(0, -1)
print(df.head())

   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31       -1  
2                     0.672   32        1  
3                     0.167   21       -1  
4                     2.288   33        1  


In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = df[['DiabetesPedigreeFunction','Pregnancies','Glucose','BloodPressure','SkinThickness','Insulin','BMI','Age']].values
y = df[['Outcome']].values


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
n_samples, n_features = X.shape
learning_rate = 0.01
epochs = 1000
lambda_param = 0.001

In [None]:
weights = np.zeros(n_features)
bias = 0

In [None]:
for epoch in range(epochs):
    for i in range(n_samples):

        condition = y[i] * (np.dot(X[i], weights) + bias) >= 1
        if condition:

            weights -= learning_rate * (2 * lambda_param * weights)
        else:

            weights -= learning_rate * (2 * lambda_param * weights - y[i] * X[i])
            bias -= learning_rate * y[i]

In [None]:
print(f"Weights: {weights}")
print(f"Bias: {bias}")

Weights: [ 3.66557785 17.05932569  1.03879453 -6.60441818 -1.1686902   0.40320352
 -2.97918532 -3.82876212]
Bias: [341.39]


In [None]:
def predict(X, weights, bias):
    return np.sign(np.dot(X, weights) + bias)

In [None]:
predictions = predict(X, weights, bias)
print("\nPredictions:", predictions)
print("Actual Labels:", y)


accuracy = np.mean(predictions == y)
print(f"Accuracy: {accuracy * 100:.2f}%")


Predictions: [-1. -1.  1. -1. -1. -1. -1.  1. -1. -1. -1. -1. -1.  1. -1.  1. -1. -1.
  1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.  1. -1. -1. -1. -1.
  1. -1.  1. -1. -1. -1.  1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1.  1. -1.  1.  1. -1. -1. -1. -1. -1. -1.  1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1.  1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1.  1. -1. -1.  1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1.  1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
  1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.  1. -1. -1.  1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.  1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1.  1. -1. -1. -1. -1. -1. -1.  1. -1. -1. -1. -1.
 -1.  1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.  1.
 -1. -1. -1. -1.  1. -1.  1. -1. -1. -1. -1. -1.  1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1.  1. 

In [None]:
X = df[['DiabetesPedigreeFunction','Pregnancies','Glucose','BloodPressure','SkinThickness','Insulin','BMI','Age']].values
y = df[['Outcome']].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
y_train = y_train.ravel()
y_test = y_test.ravel()

# Using the RBF kernel SVM (default kernel is RBF)
svm_model = SVC(kernel='rbf', C=1, gamma='scale')  # 'scale' is often a good default for gamma

# Train the model
svm_model.fit(X_train, y_train)

# Predict on the test set
predictions = svm_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Optionally print predictions vs actual labels
print("\nPredictions:", predictions)
print("Actual Labels:", y_test)

Accuracy: 76.62%

Predictions: [-1 -1 -1 -1 -1 -1 -1 -1  1  1 -1  1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1 -1
 -1  1 -1 -1 -1 -1  1  1  1  1 -1  1  1 -1 -1  1 -1 -1 -1 -1 -1  1 -1 -1
 -1  1 -1  1  1 -1 -1 -1  1 -1 -1  1  1 -1 -1 -1 -1  1 -1 -1 -1  1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1  1 -1 -1 -1 -1  1  1 -1 -1 -1 -1 -1 -1 -1 -1  1
 -1 -1  1 -1  1 -1 -1  1  1 -1 -1  1 -1 -1 -1  1 -1  1 -1 -1  1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1  1  1  1  1  1 -1 -1  1 -1 -1  1  1 -1 -1 -1 -1  1
 -1 -1 -1 -1 -1  1 -1 -1 -1 -1]
Actual Labels: [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1  1 -1 -1  1 -1 -1  1  1 -1 -1 -1 -1
 -1  1 -1 -1  1 -1  1  1  1  1 -1  1  1  1 -1  1 -1 -1 -1  1 -1  1  1 -1
 -1 -1 -1  1  1  1 -1 -1 -1 -1 -1  1  1 -1 -1  1 -1 -1 -1  1 -1  1 -1 -1
 -1  1 -1 -1 -1 -1 -1 -1  1  1 -1 -1 -1  1  1  1 -1 -1 -1 -1 -1  1 -1 -1
 -1 -1  1 -1 -1 -1  1 -1  1 -1 -1  1  1  1 -1 -1 -1  1 -1 -1  1 -1 -1 -1
 -1 -1  1 -1 -1 -1  1  1  1 -1  1  1 -1  1  1 -1  1  1  1 -1 -1 -1 -1 -1
 -1 -1  1 -1 -1  1 -1 -1  1 -1]


In [1]:
#without using sklearn

In [None]:
# Radial Basis Function (RBF) kernel
def rbf_kernel(X1, X2, gamma=1.0):
    # Compute the pairwise distance and return the RBF kernel matrix
    diff = X1[:, np.newaxis] - X2  # Broadcasting to compute pairwise difference
    return np.exp(-gamma * np.linalg.norm(diff, axis=2) ** 2)

# SVM Model with RBF kernel
class SVM:
    def __init__(self, C=1.0, gamma=1.0, epochs=1000, learning_rate=0.01):
        self.C = C
        self.gamma = gamma
        self.epochs = epochs
        self.learning_rate = learning_rate

    def fit(self, X, y):
        n_samples, n_features = X.shape
        # Initialize the Lagrange multipliers (alpha) and the bias (b)
        self.alpha = np.zeros(n_samples)
        self.b = 0
        self.X_train = X  # Store training data

        # Training loop (gradient descent)
        for _ in range(self.epochs):
            for i in range(n_samples):
                # Calculate decision function
                decision_value = np.sum(self.alpha * y * self.kernel(X, X[i])) + self.b
                # Compute the gradient for hinge loss + regularization
                if y[i] * decision_value < 1:
                    self.alpha[i] += self.learning_rate * (self.C * y[i] - 2 * self.alpha[i])
                    self.b += self.learning_rate * self.C * y[i]
                else:
                    self.alpha[i] -= self.learning_rate * 2 * self.alpha[i]

    def kernel(self, X1, X2):
        return rbf_kernel(X1, X2, gamma=self.gamma)

    def predict(self, X):
        # Calculate decision function using kernel between test points and training points
        kernel_matrix = self.kernel(X, self.X_train)  # X is test data, self.X_train is training data
        decision_values = np.dot(kernel_matrix, self.alpha) + self.b
        return np.sign(decision_values)

    def accuracy(self, X, y):
        predictions = self.predict(X)
        return np.mean(predictions == y)




X = df[['DiabetesPedigreeFunction','Pregnancies','Glucose','BloodPressure','SkinThickness','Insulin','BMI','Age']].values
y = df[['Outcome']].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
y_train = y_train.ravel()
y_test = y_test.ravel()

# Initialize the SVM model with RBF kernel
svm_model = SVM(C=3.0, gamma=1, epochs=10000, learning_rate=0.001)

# Train the SVM model
svm_model.fit(X_train, y_train)

# Calculate accuracy on the test set
accuracy = svm_model.accuracy(X_test, y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")
