In [5]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv("/nb.csv")

# Separate the features and the target variable
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

# Convert categorical string data to numerical data using label encoding
le = LabelEncoder()
X = X.apply(le.fit_transform)
y = le.fit_transform(y)

class NaiveBayes:
    def __init__(self):
        self.lr_model = LogisticRegression()
    
    def fit(self, X_train, y_train):
        # Fit the logistic regression model on the training set
        self.lr_model.fit(X_train, y_train)

    def predict(self, X_test):
        # Predict the class of a given instance using the logistic regression model
        predictions = self.lr_model.predict(X_test)
        return predictions

    def score(self, X_test, y_test):
        # Calculate the accuracy of your logistic regression model on the testing set
        y_pred = self.predict(X_test)
        accuracy = np.mean(y_pred == y_test)
        cm = confusion_matrix(y_test, y_pred)
        precision = precision_score(y_test, y_pred, pos_label=1)
        recall = recall_score(y_test, y_pred, pos_label=1)
        f1 = f1_score(y_test, y_pred, pos_label=1)
        return accuracy, cm, precision, recall, f1

# Initialize lists to store the results
accuracies = []
precisions = []
recalls = []
f1_scores = []

# Run 10 iterations of test-train splits
for i in range(10):
    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=i)

    # Train the model
    nb = NaiveBayes()
    nb.fit(X_train, y_train)

    # Test the model
    accuracy, cm, precision, recall, f1 = nb.score(X_test, y_test)
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)
    
    # Print the results of the current iteration
    print(f"Iteration {i+1}")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("confusion matrix", cm)
    print("="*50)

# Calculate and print the average and variance of the results over all iterations
print("Average Accuracy:", np.mean(accuracies))
print("Variance of Accuracy:", np.var(accuracies))
print("Average Precision:", np.mean(precisions))
print("Variance of Precision:", np.var(precisions))
print("Average Recall:", np.mean(recalls))
print("Variance of Recall:", np.mean(recall))


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Iteration 1
Accuracy: 0.7831022704440426
Precision: 0.807848130570668
Recall: 0.9332709280556298
F1 Score: 0.8660420673822671
confusion matrix [[ 816 1660]
 [ 499 6979]]
Iteration 2
Accuracy: 0.7791842475386779
Precision: 0.801669907354455
Recall: 0.9379098086444534
F1 Score: 0.8644548593981253
confusion matrix [[ 747 1734]
 [ 464 7009]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Iteration 3
Accuracy: 0.7816958006831425
Precision: 0.8009836440581036
Recall: 0.9417697686928456
F1 Score: 0.8656900920946906
confusion matrix [[ 778 1740]
 [ 433 7003]]
Iteration 4
Accuracy: 0.7893309222423146
Precision: 0.804618726420487
Recall: 0.9477511446269863
F1 Score: 0.8703394546466333
confusion matrix [[ 819 1709]
 [ 388 7038]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Iteration 5
Accuracy: 0.7747639140044204
Precision: 0.7964119450437153
Recall: 0.9398365268658716
F1 Score: 0.8622003687768901
confusion matrix [[ 698 1793]
 [ 449 7014]]
Iteration 6
Accuracy: 0.7820976491862568
Precision: 0.8034622042700519
Recall: 0.9372644049542272
F1 Score: 0.8652209034984154
confusion matrix [[ 823 1703]
 [ 466 6962]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Iteration 7
Accuracy: 0.7931484830219008
Precision: 0.8141551983346825
Recall: 0.9396689802455953
F1 Score: 0.8724208439184583
confusion matrix [[ 855 1607]
 [ 452 7040]]
Iteration 8
Accuracy: 0.775065300381756
Precision: 0.797084815792494
Recall: 0.9364886731391586
F1 Score: 0.8611817223634447
confusion matrix [[ 770 1768]
 [ 471 6945]]
Iteration 9
Accuracy: 0.774362065501306
Precision: 0.7971246006389776
Recall: 0.9372149181647438
F1 Score: 0.8615119003576274
confusion matrix [[ 722 1778]
 [ 468 6986]]
Iteration 10
Accuracy: 0.7846092023307213
Precision: 0.8080924855491329
Recall: 0.9352421728659353
F1 Score: 0.8670305135202183
confusion matrix [[ 820 1660]
 [ 484 6990]]
Average Accuracy: 0.7817359855334539
Variance of Accuracy: 3.521765724098988e-05
Average Precision: 0.8031451658032769
Variance of Precision: 2.95246433589116e-05
Average Recall: 0.9386417326255447
Variance of Recall: 0.9352421728659353


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
