In [2]:
import pandas as pd
import numpy as np

from get_samples import split_data
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
import naivebayes
from NormalizeNRemoveOutliers import remove_outliers_iqr

X_train, X_test, y_train, y_test = split_data()
scaler_minmax = MinMaxScaler()
scaler_standard = StandardScaler()
X_train_minmax = scaler_minmax.fit_transform(X_train)
X_test_minmax = scaler_minmax.fit_transform(X_test)
X_train_standard = scaler_standard.fit_transform(X_train)
X_test_standard = scaler_standard.fit_transform(X_test)

# Run gaussian naive bayes with outliers present
print("Running naive bayes with outliers present")
y_pred = naivebayes.run_naive_bayes(X_train, X_test, y_train, "gaussian")
y_pred_minmax = naivebayes.run_naive_bayes(X_train_minmax, X_test_minmax, y_train, "gaussian")
y_pred_standard = naivebayes.run_naive_bayes(X_train_standard, X_test_standard, y_train, "gaussian")
acc = naivebayes.accuracy_scorer(y_test, y_pred)
acc_minmax = naivebayes.accuracy_scorer(y_test, y_pred_minmax)
acc_standard = naivebayes.accuracy_scorer(y_test, y_pred_standard)
print(f"Accuracy Gaussian Naive Bayes: {acc}")
print(f"Accuracy Gaussian Naive Bayes min max normalized: {acc_minmax}")
print(f"Accuracy Gaussian Naive Bayes standardized: {acc_standard}")

# Remove outliers
train_set = np.concatenate([X_train, y_train[:,None]], axis=1)
df = pd.DataFrame(train_set)
(X_train, y_train) = remove_outliers_iqr(df, len(X_train[0]))
X_train_minmax = scaler_minmax.fit_transform(X_train)
X_test_minmax = scaler_minmax.fit_transform(X_test)
X_train_standard = scaler_standard.fit_transform(X_train)
X_test_standard = scaler_standard.fit_transform(X_test)

# Run gaussian naive bayes without outliers
print("Running naive bayes without outliers")
y_pred = naivebayes.run_naive_bayes(X_train, X_test, y_train, "gaussian")
y_pred_minmax = naivebayes.run_naive_bayes(X_train_minmax, X_test_minmax, y_train, "gaussian")
y_pred_standard = naivebayes.run_naive_bayes(X_train_standard, X_test_standard, y_train, "gaussian")
acc = naivebayes.accuracy_scorer(y_test, y_pred)
acc_minmax = naivebayes.accuracy_scorer(y_test, y_pred_minmax)
acc_standard = naivebayes.accuracy_scorer(y_test, y_pred_standard)
print(f"Accuracy Gaussian Naive Bayes: {acc}")
print(f"Accuracy Gaussian Naive Bayes min max normalized: {acc_minmax}")
print(f"Accuracy Gaussian Naive Bayes standardized: {acc_standard}")

# Run complement naive bayes

y_pred = naivebayes.run_naive_bayes(X_train, X_test, y_train, "complement")
acc = naivebayes.accuracy_scorer(y_test, y_pred)
print(f"Accuracy Complement Naive Bayes: {acc}")

# Run multinomial naive bayes

y_pred = naivebayes.run_naive_bayes(X_train, X_test, y_train, "multinomial")
acc = naivebayes.accuracy_scorer(y_test, y_pred)
print(f"Accuracy Multinomial Naive Bayes: {acc}")

# Run bernoulli naive bayes

y_pred = naivebayes.run_naive_bayes(X_train, X_test, y_train, "bernoulli")
acc = naivebayes.accuracy_scorer(y_test, y_pred)
print(f"Accuracy Bernoulli Naive Bayes: {acc}")

Running naive bayes with outliers present
Accuracy Gaussian Naive Bayes: 0.6286430586023191
Accuracy Gaussian Naive Bayes min max normalized: 0.6242557192102789
Accuracy Gaussian Naive Bayes standardized: 0.6104669382638671
Running naive bayes without outliers
Accuracy Gaussian Naive Bayes: 0.6643685365089314
Accuracy Gaussian Naive Bayes min max normalized: 0.5841429019116264
Accuracy Gaussian Naive Bayes standardized: 0.6251958633657161
Accuracy Complement Naive Bayes: 0.5368223127546223
Accuracy Multinomial Naive Bayes: 0.6659354434346599
Accuracy Bernoulli Naive Bayes: 0.21780006267627702
