In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import CategoricalNB
from sklearn.metrics import classification_report

In [None]:
df = pd.read_csv("/content/glass.data",sep = ",",names = ["Id","RI","Na","Mg","Al","Si","K","Ca","Ba","Fe","Type"])

In [None]:
df.drop(["Id"],axis = 1,inplace = True)

df

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.0,1
1,1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.0,1
...,...,...,...,...,...,...,...,...,...,...
209,1.51623,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.0,7
210,1.51685,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.0,7
211,1.52065,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.0,7
212,1.51651,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.0,7


In [None]:
df["Type"].value_counts()

2    76
1    70
7    29
3    17
5    13
6     9
Name: Type, dtype: int64

In [None]:
feature = pd.DataFrame(columns = ["Feature","Value","Type","Count"])
temp = pd.DataFrame()

for k in df.columns[:-1]:
  for j in np.unique(df[k]):
    temp = df.loc[df[k] == j].copy()
    temp = temp.reset_index(drop = True)
  
    for i in np.unique(df["Type"]):
      try:
        count = temp["Type"].value_counts()[i]
      except:
        count = 0
      temp1 = {}
      temp1["Feature"] = k
      temp1["Value"] = j
      temp1["Type"] = i
      temp1["Count"] = count
      feature = feature.append(temp1,ignore_index = True)
  

In [None]:
feature

Unnamed: 0,Feature,Value,Type,Count
0,RI,1.51115,1,0
1,RI,1.51115,2,0
2,RI,1.51115,3,0
3,RI,1.51115,5,0
4,RI,1.51115,6,1
...,...,...,...,...
5629,Fe,0.51000,2,0
5630,Fe,0.51000,3,0
5631,Fe,0.51000,5,1
5632,Fe,0.51000,6,0


In [None]:
test = {"RI" : 1.51711,"Na" : 13.44,"Mg" : 0.0,"Al" : 2.08,"Si" : 73.36,"K" : 0.0,"Ca" : 8.28,"Ba" : 0.00,"Fe" : 0.00}

test = pd.DataFrame(test,index = [0])

In [None]:
test

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe
0,1.51711,13.44,0.0,2.08,73.36,0.0,8.28,0.0,0.0


In [None]:
feature_prob = []
for i in test.columns:
  prob = {}
  for j in np.unique(df["Type"]):
    try:
      prob[j] = int(feature.loc[feature["Feature"] == i].loc[feature["Value"] == float(test[i])].loc[feature["Type"] == j]["Count"])
    except:
      prob[j] = 0
  feature_prob.append(prob)

In [None]:
feature_prob

[{1: 0, 2: 1, 3: 0, 5: 0, 6: 0, 7: 1},
 {1: 0, 2: 1, 3: 0, 5: 1, 6: 0, 7: 1},
 {1: 0, 2: 9, 3: 0, 5: 7, 6: 3, 7: 23},
 {1: 0, 2: 1, 3: 0, 5: 0, 6: 0, 7: 1},
 {1: 0, 2: 1, 3: 0, 5: 0, 6: 0, 7: 1},
 {1: 1, 2: 3, 3: 1, 5: 0, 6: 9, 7: 16},
 {1: 0, 2: 1, 3: 0, 5: 0, 6: 0, 7: 1},
 {1: 67, 2: 70, 3: 16, 5: 11, 6: 9, 7: 3},
 {1: 45, 2: 44, 3: 12, 5: 11, 6: 9, 7: 23}]

In [None]:
prob = {1:1,2:1,3:1,5:1,6:1,7:1}
counts = df["Type"].value_counts()
for i in feature_prob:
  for j in np.unique(df["Type"]):
    prob[j] = prob[j] * (i[j]/counts[j]) 

for i in prob.keys():
  prob[i] = prob[i] * (counts[i]/len(df))

In [None]:
prob.values()

dict_values([0.0, 3.4913385926568877e-13, 0.0, 0.0, 0.0, 2.3719151708685433e-10])

In [None]:
print("The test data belongs to the type : ",list(prob.keys())[list(prob.values()).index(max(prob.values()))])

The test data belongs to the type :  7


## Using a Naive Bayes Model


In [None]:
X_train,X_test,y_train,y_test = train_test_split(df.drop(["Type"],axis = 1) , df["Type"],test_size=0.2)

In [None]:
nb = GaussianNB()

In [None]:
nb.fit(X_train,y_train)

GaussianNB()

In [None]:
prediction = nb.predict(X_test)

In [None]:
prediction

array([5, 1, 2, 7, 1, 1, 3, 1, 7, 7, 1, 1, 1, 7, 6, 1, 1, 7, 6, 7, 1, 2,
       1, 7, 1, 1, 2, 1, 3, 1, 1, 2, 2, 1, 3, 1, 7, 6, 1, 3, 1, 3, 1])

In [None]:
print(classification_report(y_test,prediction))

              precision    recall  f1-score   support

           1       0.43      0.69      0.53        13
           2       0.60      0.21      0.32        14
           3       0.20      0.33      0.25         3
           5       1.00      0.50      0.67         2
           6       0.67      1.00      0.80         2
           7       1.00      0.89      0.94         9

    accuracy                           0.56        43
   macro avg       0.65      0.60      0.58        43
weighted avg       0.63      0.56      0.55        43



Using CategoricalNB instead of GaussianNB

In [None]:
nb1 = CategoricalNB()

In [None]:
nb1.fit(df.drop(["Type"],axis = 1),df["Type"])

CategoricalNB()

In [None]:
nb1.predict(test)

array([7])