**STEP 1**

In [2]:
import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

STEP 2

In [3]:
# Load dataset
data = pd.read_csv('/content/diabetes.csv')

# Check the available columns in your dataset
print(data.columns)

FileNotFoundError: [Errno 2] No such file or directory: '/content/diabetes.csv'

In [4]:
# Split data into features and target
X = data[['Age', 'Glucose', 'Insulin', 'BMI']]
y = data['Outcome']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

NameError: name 'data' is not defined

In [4]:
# Assuming `X_train` is your training data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Train your models with `X_train_scaled` and then save both the models and the scaler
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

STEP 3

In [5]:
# Train Naive Bayes model
naive_bayes_model = GaussianNB()
naive_bayes_model.fit(X_train, y_train)

# Predict with Naive Bayes
y_pred_nb = naive_bayes_model.predict(X_test)

STEP 4

In [6]:
# Train Perceptron model
perceptron_model = Perceptron(max_iter=1000, tol=1e-3, random_state=42)
perceptron_model.fit(X_train, y_train)

# Predict with Perceptron
y_pred_perceptron = perceptron_model.predict(X_test)

Custom perceptron

The CustomPerceptron class models a simple perceptron, a linear classifier that adjusts its weights iteratively based on misclassified samples.

In [7]:

class CustomPerceptron:
    def __init__(self, learning_rate=0.01, n_iters=1000):
        #learning_rate: Controls how much the weights are updated after each misclassification.
        self.learning_rate = learning_rate
        #n_iters: The number of iterations (epochs) to train the model.
        self.n_iters = n_iters
        #weights: An array to store the weights corresponding to each feature in the dataset. Initialized
        self.weights = None
        #bias: An intercept term added to the linear function, also initialized during training.
        self.bias = None


#X: Feature matrix of shape (n_samples, n_features), where n_samples is the number of data points and n_features is the number of features.
#y: Target labels, assumed to be binary (0 and 1).
    def fit(self, X, y):
        n_samples, n_features = X.shape
        #self.weights as a zero array of size equal to the number of features.
        self.weights = np.zeros(n_features)
        self.bias = 0

        # Convert labels to -1 and 1 if they are 0 and 1, for Perceptron calculation
        y_ = np.where(y <= 0, -1, 1)

        # Training loop
        #linear_output: Computes the weighted sum of inputs (X) and weights, plus the bias:z=wTx+b
        # y_predicted:Applies the sign function to classify the sample
        for _ in range(self.n_iters):
            for idx, x_i in enumerate(X):
                linear_output = np.dot(x_i, self.weights) + self.bias
                y_predicted = np.sign(linear_output)

                # Update weights and bias if prediction is incorrect
                #Update Rule: If the prediction (y_predicted) is incorrect, update the weights and bias:
                #w=w+ηy
                #b=b+ηy

                if y_predicted != y_[idx]:
                    self.weights += self.learning_rate * y_[idx] * x_i
                    self.bias += self.learning_rate * y_[idx]

#Computes the linear output for the input features
    def predict(self, X):
        linear_output = np.dot(X, self.weights) + self.bias
        #Applies the sign function to classify the data.
        y_predicted = np.sign(linear_output)
        #Converts the outputs back to binary labels (0 and 1).
        return np.where(y_predicted <= 0, 0, 1)

# Initialize and train the custom Perceptron
#Fits the model on the training dataset (X_train, y_train)
custom_perceptron_model = CustomPerceptron(learning_rate=0.3, n_iters=500)
custom_perceptron_model.fit(X_train, y_train)

# Make predictions
# Predicts the labels for the test dataset (X_test).
y_pred_custom_perceptron = custom_perceptron_model.predict(X_test)


STEP 5

In [8]:
# Evaluation metrics
def evaluate_model(y_true, y_pred):
    return {
        'Accuracy': accuracy_score(y_true, y_pred),
        'Precision': precision_score(y_true, y_pred, average='weighted'),
        'Recall': recall_score(y_true, y_pred, average='weighted'),
        'F1 Score': f1_score(y_true, y_pred, average='weighted')
    }

In [9]:

# Evaluate Naive Bayes
nb_metrics = evaluate_model(y_test, y_pred_nb)
print("Naive Bayes Metrics:", nb_metrics)

# Evaluate Perceptron
perceptron_metrics = evaluate_model(y_test, y_pred_perceptron)
print("Perceptron Metrics:", perceptron_metrics)

# Evaluate Custom made Perceptron
custom_perceptron_metrics = evaluate_model(y_test, y_pred_custom_perceptron)
print("Custom Perceptron Metrics:", custom_perceptron_metrics)

Naive Bayes Metrics: {'Accuracy': 0.7467532467532467, 'Precision': 0.7457671957671957, 'Recall': 0.7467532467532467, 'F1 Score': 0.7462278627738957}
Perceptron Metrics: {'Accuracy': 0.6363636363636364, 'Precision': 0.6363636363636364, 'Recall': 0.6363636363636364, 'F1 Score': 0.6363636363636364}
Custom Perceptron Metrics: {'Accuracy': 0.7532467532467533, 'Precision': 0.7475563504054762, 'Recall': 0.7532467532467533, 'F1 Score': 0.743401841868823}


**Analysis:**

*Naive Bayes:*

Strengths: Higher accuracy, precision, and recall indicate good performance, particularly in datasets with well-separated classes or features with strong independence.

Weaknesses: May struggle if feature independence assumptions are violated or if features are heavily correlated.

*Perceptron:*

Strengths: Simplicity and computational efficiency make it useful for linearly separable datasets.

Weaknesses: Lower performance metrics suggest potential struggles with convergence or non-linear decision boundaries.

**Recommendation:**

Use Naive Bayes if the dataset aligns well with its assumptions and speed is a priority.
Consider Custom Perceptron for better generalization on diverse datasets, especially if it includes enhancements for non-linearity.
Investigate further modifications or alternative models like Support Vector Machines (SVMs) or neural networks if neither suffices for the dataset's complexity.

STEP 6

In [10]:
# Save models
with open('naive_bayes_model.pkl', 'wb') as f:
    pickle.dump(naive_bayes_model, f)

with open('perceptron_model.pkl', 'wb') as f:
    pickle.dump(perceptron_model, f)

with open('custom_perceptron_model.pkl', 'wb') as f:
    pickle.dump(custom_perceptron_model, f)

from google.colab import files

# Downloading the Naïve Bayes model
files.download('naive_bayes_model.pkl')

# Downloading the Perceptron model
files.download('perceptron_model.pkl')

# Downloading the Custom Perceptron model
files.download('custom_perceptron_model.pkl')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

STEP 9

In [11]:
# K-Fold Cross-validation for Naive Bayes
nb_cv_scores = cross_val_score(naive_bayes_model, X, y, cv=5)
print("Naive Bayes Cross-Validation Scores:", nb_cv_scores.mean())

# K-Fold Cross-validation for Perceptron
perceptron_cv_scores = cross_val_score(perceptron_model, X, y, cv=5)
print("Perceptron Cross-Validation Scores:", perceptron_cv_scores.mean())


Naive Bayes Cross-Validation Scores: 0.7552754435107376
Perceptron Cross-Validation Scores: 0.5480943892708598


STEP 10)

In [12]:
# Evaluation metrics
def evaluate_model(y_true, y_pred):
    return {
        'Accuracy': accuracy_score(y_true, y_pred),
        'Precision': precision_score(y_true, y_pred, average='weighted'),
        'Recall': recall_score(y_true, y_pred, average='weighted'),
        'F1 Score': f1_score(y_true, y_pred, average='weighted')
    }

In [13]:
# Evaluate Naive Bayes

# Predicting on the test set using the Naive Bayes model
nb_predictions = naive_bayes_model.predict(X_test)
# Evaluating the predictions against the actuals
nb_metrics_k_fold = evaluate_model(y_test, nb_predictions)
print("Naive Bayes Metrics k-fold:", nb_metrics_k_fold)

Naive Bayes Metrics k-fold: {'Accuracy': 0.7467532467532467, 'Precision': 0.7457671957671957, 'Recall': 0.7467532467532467, 'F1 Score': 0.7462278627738957}


In [14]:
# Evaluate Perceptron

# Predicting on the test set using the Perceptron model
perceptron_predictions = perceptron_model.predict(X_test)
# Evaluating the predictions against the actuals
perceptron_metrics_k_fold = evaluate_model(y_test, perceptron_predictions)
print("Perceptron Metrics k-fold:", perceptron_metrics_k_fold)

Perceptron Metrics k-fold: {'Accuracy': 0.6363636363636364, 'Precision': 0.6363636363636364, 'Recall': 0.6363636363636364, 'F1 Score': 0.6363636363636364}


In [15]:
# Initialize models
naive_bayes_model = GaussianNB()
perceptron_model = Perceptron(max_iter=1000, tol=1e-3, random_state=42)

# Define scoring metrics
scoring_metrics = ['accuracy', 'precision_weighted', 'recall_weighted', 'f1_weighted']

In [16]:
# Function to perform cross-validation and print results
def evaluate_model(model, model_name):
    results = {}
    for metric in scoring_metrics:
        cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring=metric)  # 5-fold cross-validation
        results[metric] = {
            'mean': cv_scores.mean(),
            'std': cv_scores.std()
        }
    print(f"Model: {model_name}")
    for metric, scores in results.items():
        print(f"{metric.capitalize()}: Mean = {scores['mean']:.4f}, Std = {scores['std']:.4f}")
    print("-" * 20)

In [17]:
# Evaluate models
evaluate_model(naive_bayes_model, 'Naive Bayes')
evaluate_model(perceptron_model, 'Perceptron')

Model: Naive Bayes
Accuracy: Mean = 0.7574, Std = 0.0260
Precision_weighted: Mean = 0.7526, Std = 0.0281
Recall_weighted: Mean = 0.7574, Std = 0.0260
F1_weighted: Mean = 0.7490, Std = 0.0244
--------------------
Model: Perceptron
Accuracy: Mean = 0.7361, Std = 0.0324
Precision_weighted: Mean = 0.7406, Std = 0.0326
Recall_weighted: Mean = 0.7361, Std = 0.0324
F1_weighted: Mean = 0.7374, Std = 0.0321
--------------------
