# BITS F464 : Machine Learning Assignment 4
Gaussian Naïve Bayes, and Perceptron Model

Adit Rastogi <br>
2022A7PS1330H <br><br>
Deeptansh Gupta <br>
2022A7PS1336H <br><br>
Anirban Nayak <br>
2022A3PS0705H <br>

In [9]:
import pandas as pd
import numpy as np
import pickle
from sklearn.linear_model import Perceptron
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
data = pd.read_csv("./diabetes.csv")

In [3]:
data.head()

Unnamed: 0,Glucose,Insulin,BMI,Age,Outcome
0,148,0,33.6,50,1
1,85,0,26.6,31,0
2,183,0,23.3,32,1
3,89,94,28.1,21,0
4,137,168,43.1,33,1


In [4]:
X = data.drop(columns=['Outcome'])  # Features
y = data['Outcome']                 # Target

# Splitting the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Applying Standard Scaler
# Perceptron benefit from standardized inputs to improve convergence during training.
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

In [5]:
# Initialize the Gaussian Naïve Bayes model
naive_bayes_model = GaussianNB()

# Train the model on the training data
naive_bayes_model.fit(X_train, y_train)

# Predict the target on the test data
y_pred_nb = naive_bayes_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred_nb)
classification_rep = classification_report(y_test, y_pred_nb)
confusion_mat = confusion_matrix(y_test, y_pred_nb)

print("Accuracy:", accuracy)
print("\nClassification Report:\n", classification_rep)
print("\nConfusion Matrix:\n", confusion_mat)


Accuracy: 0.7467532467532467

Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.81      0.80        99
           1       0.65      0.64      0.64        55

    accuracy                           0.75       154
   macro avg       0.72      0.72      0.72       154
weighted avg       0.75      0.75      0.75       154


Confusion Matrix:
 [[80 19]
 [20 35]]


In [6]:
# Initialize the Perceptron model
perceptron_model = Perceptron(max_iter=1000, tol=1e-3, random_state=42)

# Train the model on the training data
perceptron_model.fit(X_train, y_train)

# Predict the target on the test data
y_pred_perceptron = perceptron_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred_perceptron)
classification_rep = classification_report(y_test, y_pred_perceptron)
confusion_mat = confusion_matrix(y_test, y_pred_perceptron)

print("Accuracy:", accuracy)
print("\nClassification Report:\n", classification_rep)
print("\nConfusion Matrix:\n", confusion_mat)

Accuracy: 0.6363636363636364

Classification Report:
               precision    recall  f1-score   support

           0       0.72      0.72      0.72        99
           1       0.49      0.49      0.49        55

    accuracy                           0.64       154
   macro avg       0.60      0.60      0.60       154
weighted avg       0.64      0.64      0.64       154


Confusion Matrix:
 [[71 28]
 [28 27]]


In [7]:
class CustomPerceptron:
    def __init__(self, learning_rate=0.01, n_iters=1000):
        self.learning_rate = learning_rate
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        # Initialize weights and bias
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0
        
        # Convert labels to -1 and 1 (Perceptron works with these values)
        y_ = np.where(y <= 0, -1, 1)

        # Training loop
        for _ in range(self.n_iters):
            for idx, x_i in enumerate(X):
                linear_output = np.dot(x_i, self.weights) + self.bias
                y_predicted = np.sign(linear_output)
                
                # Perceptron update rule
                if y_predicted != y_[idx]:  # If the prediction is wrong
                    self.weights += self.learning_rate * y_[idx] * x_i
                    self.bias += self.learning_rate * y_[idx]

    def predict(self, X):
        linear_output = np.dot(X, self.weights) + self.bias
        y_predicted = np.sign(linear_output)
        return np.where(y_predicted == -1, 0, 1)  # Convert back to 0 and 1


# Initialize and train the model
custom_perceptron = CustomPerceptron(learning_rate=0.01, n_iters=1000)
custom_perceptron.fit(X_train, y_train)

# Predict on test data
y_pred_custom = custom_perceptron.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred_custom)
classification_rep = classification_report(y_test, y_pred_custom)
confusion_mat = confusion_matrix(y_test, y_pred_custom)

print("Accuracy:", accuracy)
print("\nClassification Report:\n", classification_rep)
print("\nConfusion Matrix:\n", confusion_mat)


Accuracy: 0.7272727272727273

Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.82      0.79        99
           1       0.63      0.56      0.60        55

    accuracy                           0.73       154
   macro avg       0.70      0.69      0.70       154
weighted avg       0.72      0.73      0.72       154


Confusion Matrix:
 [[81 18]
 [24 31]]


In [8]:
with open('naive_bayes_model.pkl', 'wb') as file:
    pickle.dump(naive_bayes_model, file)

with open('perceptron_model.pkl', 'wb') as file:
    pickle.dump(perceptron_model, file)


In [11]:
# Define the number of folds for cross-validation
k = 5  # For example, using 5-fold cross-validation
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Function to perform cross-validation
def evaluate_model(model, X, y, kf):
    # Cross-validation prediction
    y_pred = cross_val_predict(model, X, y, cv=kf)
    
    # Accuracy and other metrics
    accuracy = accuracy_score(y, y_pred)
    classification_rep = classification_report(y, y_pred)
    confusion_mat = confusion_matrix(y, y_pred)
    
    return accuracy, classification_rep, confusion_mat

# Evaluate Gaussian Naive Bayes model with k-fold cross-validation
print("Evaluating Gaussian Naive Bayes with k-fold cross-validation...")
accuracy_nb, classification_rep_nb, confusion_mat_nb = evaluate_model(naive_bayes_model, X, y, kf)

print("Gaussian Naive Bayes Model Results:")
print("Accuracy:", accuracy_nb)
print("\nClassification Report:\n", classification_rep_nb)
print("\nConfusion Matrix:\n", confusion_mat_nb)

# Evaluate Perceptron model with k-fold cross-validation
print("\nEvaluating Perceptron with k-fold cross-validation...")
accuracy_perceptron, classification_rep_perceptron, confusion_mat_perceptron = evaluate_model(perceptron_model, X, y, kf)

print("Perceptron Model Results:")
print("Accuracy:", accuracy_perceptron)
print("\nClassification Report:\n", classification_rep_perceptron)
print("\nConfusion Matrix:\n", confusion_mat_perceptron)


Evaluating Gaussian Naive Bayes with k-fold cross-validation...
Gaussian Naive Bayes Model Results:
Accuracy: 0.75390625

Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.87      0.82       500
           1       0.69      0.54      0.61       268

    accuracy                           0.75       768
   macro avg       0.73      0.71      0.71       768
weighted avg       0.75      0.75      0.75       768


Confusion Matrix:
 [[433  67]
 [122 146]]

Evaluating Perceptron with k-fold cross-validation...
Perceptron Model Results:
Accuracy: 0.5338541666666666

Classification Report:
               precision    recall  f1-score   support

           0       0.67      0.56      0.61       500
           1       0.37      0.49      0.42       268

    accuracy                           0.53       768
   macro avg       0.52      0.52      0.52       768
weighted avg       0.57      0.53      0.54       768


Confusion Matrix:
 [[