# Naive Bayes Classifier

#### Best Params
Split Size - 0.2  
Random State - 9917  
Scaling - True  

Accuracy - 75.34%

In [1]:
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv('datasets/magic04.data', 
                   names=['fLength', 'fWidth', 'fSize', 'fConc', 'fConc1', 'fAsym', 'fM3Long', 'fM3Trans', 'fAlpha', 'fDist', 'class'])

data.head()

Unnamed: 0,fLength,fWidth,fSize,fConc,fConc1,fAsym,fM3Long,fM3Trans,fAlpha,fDist,class
0,28.7967,16.0021,2.6449,0.3918,0.1982,27.7004,22.011,-8.2027,40.092,81.8828,g
1,31.6036,11.7235,2.5185,0.5303,0.3773,26.2722,23.8238,-9.9574,6.3609,205.261,g
2,162.052,136.031,4.0612,0.0374,0.0187,116.741,-64.858,-45.216,76.96,256.788,g
3,23.8172,9.5728,2.3385,0.6147,0.3922,27.2107,-6.4633,-7.1513,10.449,116.737,g
4,75.1362,30.9205,3.1611,0.3168,0.1832,-5.5277,28.5525,21.8393,4.648,356.462,g


In [3]:
data['class'] = (data['class'] == 'g').astype(int)
data['class'].unique()

array([1, 0])

In [4]:
X = data.drop('class', axis=1)
y = data['class']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X = X_scaled
X.shape

(19020, 10)

##### Training and evaluation function

In [5]:
def train_eval(X, y, iter_num:int):
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=iter_num)
    
    nb_model = GaussianNB()
    nb_model.fit(X_train, y_train)
    
    y_pred = nb_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    return accuracy

##### Hyperparameter tuning

In [6]:
acc = -1

for i in range(0, 25_000):
    best_acc = train_eval(X, y, i)
    
    if best_acc > acc:
        acc = best_acc
        print(f'Accurarcy {round(acc, 5)} Iter {i}')
        
    if i%1000 == 0:
        print(f'----------------------------------------------------------------------------------------> Iter {i}')

Accurarcy 0.72056 Iter 0
----------------------------------------------------------------------------------------> Iter 0
Accurarcy 0.72923 Iter 1
Accurarcy 0.73002 Iter 4
Accurarcy 0.73081 Iter 6
Accurarcy 0.74343 Iter 14
Accurarcy 0.75237 Iter 79
----------------------------------------------------------------------------------------> Iter 1000
----------------------------------------------------------------------------------------> Iter 2000
----------------------------------------------------------------------------------------> Iter 3000
----------------------------------------------------------------------------------------> Iter 4000
----------------------------------------------------------------------------------------> Iter 5000
----------------------------------------------------------------------------------------> Iter 6000
----------------------------------------------------------------------------------------> Iter 7000
Accurarcy 0.75263 Iter 7977
-----------------------