# Naive Bayes Implementation

The  code showcases the implementation of a Naive Bayes classifier for binary classification, specifically focusing on distinguishing between normal and tumor classes based on genetic data. The code involves data loading, model training using the Gaussian Naive Bayes algorithm from scikit-learn, and subsequent manual calculations to demonstrate the underlying probability calculations of the Naive Bayes approach.

In [1]:
import pandas as pd
import numpy as np
from sklearn.naive_bayes import GaussianNB

In [2]:
df=pd.read_csv("BAYES.csv")
data = np.array(df)
x1 = data[0:14,1:]
y1 = data[14:,1:]
x = x1[:,:4]
y = x1[:,4]
X_TEST=[1,-1,1,1]
# 0:normal 1:tumor 

In [3]:
model = GaussianNB()
model.fit(x, y)
y_pred = model.predict([[1,-1,1,1]])
y_pred

array([0], dtype=int64)

In [4]:
n,t= 0,0
for i  in range(0,14):
    if y[i] == 0:
        n += 1 
    else:
        t += 1
pn = n/(n+t)
pt = t/(n+t)
print('P(Normal)' , pn)
print('P(Tumor)' , pt )

P(Normal) 0.35714285714285715
P(Tumor) 0.6428571428571429


In [5]:
s=(3,5)
on=np.zeros(s)
ot = np.zeros(s)
for j in range(0,4):
    for i  in range(0,14):
        if x[i][j] == 1 and y[i] == 0:
            on[0,(j+1)] += 1
        elif x[i][j] == 0 and y[i] == 0:
            on[1,(j+1)] += 1
        elif x[i][j] == -1 and y[i] == 0:
            on[2,(j+1)] +=1
        elif x[i][j] == 1 and y[i] == 1:
            ot[0,(j+1)] += 1
        elif x[i][j] == 0 and y[i] == 1:
            ot[1,(j+1)] += 1
        elif x[i][j] == -1 and y[i] == 1:
            ot[2,(j+1)] +=1

In [6]:
probn= {}
probt = {}
def prob(*fn):
    lnormal = []
    ltumor = []
    for i in range(3):
        lnormal.append(on[i,fn]/n)
        ltumor.append(ot[i,fn]/t)
        probn = np.array(lnormal)
        probt = np.array(ltumor)
    return (('NORMAL' , probn) , ('TUMOR' ,probt))

In [7]:
def test(Gene):
    tn = []
    tt = []
    for i in range(3):
        tn.append((on[i,Gene]/n)*pn)
        tt.append((ot[i,Gene]/t)*pt)
    if tn > tt:
        return "Class Normal" , tn 
    else:
        return "Class Tumor" , tt

In [8]:
# Use the defined functions to calculate probabilities and make predictions
gene_to_test = 3  # Choose the gene you want to test

# Calculate probabilities using the defined functions
probs = prob(gene_to_test)
print(f"Conditional Probabilities for Gene {gene_to_test}:")
print(f"Normal: {probs[0][1]}")
print(f"Tumor: {probs[1][1]}")

# Make predictions using the defined test function
result, probabilities = test(gene_to_test)
print(f"\nPrediction for Gene {gene_to_test}: {result}")
print(f"Probabilities: {probabilities}")


Conditional Probabilities for Gene 3:
Normal: [[0.8]
 [0.2]
 [0. ]]
Tumor: [[0.33333333]
 [0.66666667]
 [0.        ]]

Prediction for Gene 3: Class Normal
Probabilities: [0.28571428571428575, 0.07142857142857144, 0.0]
