In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [4]:
#getting data set

df = pd.read_csv("/mushrooms.csv")

In [5]:
df.shape

(8124, 23)

In [6]:
df.head()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g


In [7]:
le = LabelEncoder()

In [8]:
df_encoded = df.apply(le.fit_transform, axis = 0)

In [9]:
df_encoded.head()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,1,5,2,4,1,6,1,0,1,4,...,2,7,7,0,2,1,4,2,3,5
1,0,5,2,9,1,0,1,0,0,4,...,2,7,7,0,2,1,4,3,2,1
2,0,0,2,8,1,3,1,0,0,5,...,2,7,7,0,2,1,4,3,2,3
3,1,5,3,8,1,6,1,0,1,5,...,2,7,7,0,2,1,4,2,3,5
4,0,5,2,3,0,5,1,1,0,4,...,2,7,7,0,2,1,0,3,0,1


In [10]:
df = df_encoded.values

In [11]:
X = df[:, 1: ]
X

array([[5, 2, 4, ..., 2, 3, 5],
       [5, 2, 9, ..., 3, 2, 1],
       [0, 2, 8, ..., 3, 2, 3],
       ...,
       [2, 2, 4, ..., 0, 1, 2],
       [3, 3, 4, ..., 7, 4, 2],
       [5, 2, 4, ..., 4, 1, 2]])

In [12]:
y = df[:, 0]
y

array([1, 0, 0, ..., 0, 1, 0])

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 42)

#Posterior Probability

 ##Likely Hood
 ##Prior Probability

In [14]:
# Prior probability

def prior_propab(y, label):
  m = y.shape[0]
  s = np.sum(y_train == label)

  return m/s

In [15]:
def cond_propab(X_train, y_train, feature_col, feature_val, label):

  X_filtered = X_train[y_train == label]
  num = np.sum(X_filtered[:, feature_col] == feature_val)

  denom = X_filtered.shape[0]

  return float(num / denom)

In [16]:
def predict(X_train, y_train, X_test):
  classes = np.unique(y_train)
  n_featurs = X_train.shape[1]
  posterior_propab = []

  for label in classes:
    likelyhood = 1.0
    for fea in range(n_featurs):
        cond = cond_propab(X_train, y_train, fea, X_test[fea], label)
        likelyhood = likelyhood * cond
    prior = prior_propab(y_train, label)
    post = likelyhood * prior

    posterior_propab.append(post)

    pred = np.argmax(posterior_propab)

    return pred

In [17]:
def accuracy(X_train, y_train, X_test, y_test):
  pred = []

  for i in range(X_test.shape[0]):
    p = predict(X_train, y_train, X_test[1])
    pred.append(p)

  y_pred = np.array(pred)
  acc = np.sum(y_pred == y_pred.shape[0])

  return acc

In [18]:
acc = accuracy(X_train, y_train, X_test, y_test)

In [20]:
print(acc*100)

0
