In [70]:
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pickle
import numpy as np
import pandas as pd

In [71]:
df=pd.read_csv('diabetes.csv')
df

Unnamed: 0,Glucose,Insulin,BMI,Age,Outcome
0,148,0,33.6,50,1
1,85,0,26.6,31,0
2,183,0,23.3,32,1
3,89,94,28.1,21,0
4,137,168,43.1,33,1
...,...,...,...,...,...
763,101,180,32.9,63,0
764,122,0,36.8,27,0
765,121,112,26.2,30,0
766,126,0,30.1,47,1


In [72]:
df.isnull().sum()

Unnamed: 0,0
Glucose,0
Insulin,0
BMI,0
Age,0
Outcome,0


In [73]:
df.duplicated().sum()

0

In [74]:
# Function to detect outliers using IQR
def calculate_outliers(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    outliers = df[(df[column] < lower_bound) | (df[column] > upper_bound)]
    return outliers

# Detect outliers for each relevant column
for col in ['Glucose','Insulin', 'BMI', 'Age']:
    outliers = calculate_outliers(df, col)
    print(f"Outliers in {col}:")
    print(outliers, "\n")

Outliers in Glucose:
     Glucose  Insulin   BMI  Age  Outcome
75         0        0  24.7   22        0
182        0       23  27.7   21        0
342        0        0  32.0   22        0
349        0        0  41.0   37        1
502        0        0  39.0   41        1 

Outliers in Insulin:
     Glucose  Insulin   BMI  Age  Outcome
8        197      543  30.5   53        1
13       189      846  30.1   59        1
54       150      342  34.7   42        0
111      155      495  34.0   46        1
139      105      325  36.9   28        0
153      153      485  40.6   23        0
186      181      495  30.1   60        1
220      177      478  34.6   21        1
228      197      744  36.7   31        0
231      134      370  46.2   46        1
247      165      680  52.3   23        0
248      124      402  35.4   34        0
258      193      375  25.9   24        0
286      155      545  38.7   34        0
296      146      360  28.0   29        1
360      189      325  31.2   29

In [75]:
X = df[['Glucose','Insulin', 'BMI', 'Age']]
y = df['Outcome']  # Target variable

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [76]:
df.describe()

Unnamed: 0,Glucose,Insulin,BMI,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0
mean,120.894531,79.799479,31.992578,33.240885,0.348958
std,31.972618,115.244002,7.88416,11.760232,0.476951
min,0.0,0.0,0.0,21.0,0.0
25%,99.0,0.0,27.3,24.0,0.0
50%,117.0,30.5,32.0,29.0,0.0
75%,140.25,127.25,36.6,41.0,1.0
max,199.0,846.0,67.1,81.0,1.0


In [77]:
df.head()

Unnamed: 0,Glucose,Insulin,BMI,Age,Outcome
0,148,0,33.6,50,1
1,85,0,26.6,31,0
2,183,0,23.3,32,1
3,89,94,28.1,21,0
4,137,168,43.1,33,1


In [78]:
X_train

Unnamed: 0,Glucose,Insulin,BMI,Age
60,84,0,0.0,21
618,112,0,28.2,50
346,139,83,28.7,22
294,161,0,21.9,65
231,134,370,46.2,46
...,...,...,...,...
71,139,140,28.6,26
106,96,0,22.4,27
270,101,0,45.6,38
435,141,0,42.4,29


In [79]:
X_test

Unnamed: 0,Glucose,Insulin,BMI,Age
668,98,190,34.0,43
324,112,0,35.7,21
624,108,0,30.8,21
690,107,0,24.6,34
473,136,0,29.9,50
...,...,...,...,...
355,165,0,30.4,49
534,77,56,33.3,24
344,95,0,36.8,57
296,146,360,28.0,29


In [80]:
naive_bayes_model = GaussianNB()
naive_bayes_model.fit(X_train, y_train)
y_pred_nb = naive_bayes_model.predict(X_test)

Naive bayes model

In [81]:
naive_bayes_model = GaussianNB()
naive_bayes_model.fit(X_train, y_train)
y_pred_nb = naive_bayes_model.predict(X_test)

Perceptron model

In [82]:
# Using sklearn's Perceptron
perceptron_model = Perceptron(max_iter=1000, tol=1e-3, random_state=42)
perceptron_model.fit(X_train, y_train)
y_pred_perceptron = perceptron_model.predict(X_test)

In [83]:
# Evaluation metrics
def evaluate_model(y_test, y_pred):
    return {
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred, average='weighted'),
        'recall': recall_score(y_test, y_pred, average='weighted'),
        'f1_score': f1_score(y_test, y_pred, average='weighted')
    }

nb_metrics = evaluate_model(y_test, y_pred_nb)
perceptron_metrics = evaluate_model(y_test, y_pred_perceptron)

print("Naive Bayes Metrics:", nb_metrics)
print("Perceptron Metrics:", perceptron_metrics)

Naive Bayes Metrics: {'accuracy': 0.7467532467532467, 'precision': 0.7457671957671957, 'recall': 0.7467532467532467, 'f1_score': 0.7462278627738957}
Perceptron Metrics: {'accuracy': 0.5844155844155844, 'precision': 0.5709456580405078, 'recall': 0.5844155844155844, 'f1_score': 0.5761877566560606}


Custom perceptron

In [84]:
class CustomPerceptron:
    def __init__(self, learning_rate=0.01, n_iterations=1000):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = 0

    def fit(self, X, y):
        # Initialize weights and bias
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)

        # Convert y from {0, 1} to {-1, 1} for compatibility with the Perceptron algorithm
        y_ = np.where(y <= 0, -1, 1)

        # Training loop
        for _ in range(self.n_iterations):
            for idx, x_i in enumerate(X):
                # Calculate linear output
                linear_output = np.dot(x_i, self.weights) + self.bias
                y_predicted = np.sign(linear_output)

                # Update weights and bias if prediction is incorrect
                if y_predicted != y_[idx]:
                    self.weights += self.learning_rate * y_[idx] * x_i
                    self.bias += self.learning_rate * y_[idx]

    def predict(self, X):
        # Calculate the linear output and make predictions
        linear_output = np.dot(X, self.weights) + self.bias
        return np.where(linear_output >= 0, 1, 0)


In [86]:
# Assuming X_train and y_train are defined
Cperceptron = CustomPerceptron(learning_rate=0.01, n_iterations=1000)
Cperceptron.fit(X_train, y_train)

UFuncTypeError: ufunc 'multiply' did not contain a loop with signature matching types (dtype('<U7'), dtype('<U32')) -> None

In [87]:
import numpy as np
import pandas as pd

# Assuming X_train is a pandas DataFrame

# Convert relevant columns in X_train to numerical type
for col in X_train.columns:
    if X_train[col].dtype == 'object':  # Check for object (string) type
        try:
            X_train[col] = pd.to_numeric(X_train[col])
        except ValueError:
            # Handle non-numeric values (e.g., using one-hot encoding or other strategies)
            print(f"Column '{col}' contains non-numeric values and needs further processing.")
            # Example: Using one-hot encoding for categorical features
            # X_train = pd.get_dummies(X_train, columns=[col])

In [88]:
# Predict on test data
y_pred_Cperceptron =Cperceptron.predict(X_test)

In [89]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Example evaluation
accuracy = accuracy_score(y_test, y_pred_Cperceptron)
precision = precision_score(y_test, y_pred_Cperceptron)
recall = recall_score(y_test, y_pred_Cperceptron)
f1 = f1_score(y_test, y_pred_Cperceptron)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Accuracy: 0.35714285714285715
Precision: 0.35714285714285715
Recall: 1.0
F1 Score: 0.5263157894736842


In [90]:
with open('naive_bayes_model.pkl', 'wb') as f:
    pickle.dump(naive_bayes_model, f)

with open('perceptron_model.pkl', 'wb') as f:
    pickle.dump(perceptron_model, f)