In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

class NaiveBayesClassifier():
    def __init__(self):
        self.prior={}
        self.conditional={}
        
    def fit(self,X,y):
        self.classes=np.unique(y)
        for c in self.classes:
            self.prior[c]=np.mean(y == c)
            
        for feature in X.columns:
            self.conditional[feature]={}
            for c in self.classes:
                feature_value=X[feature][y == c]
                print("feature values:",feature_value)
                self.conditional[feature][c]={'mean':np.mean(feature_value),'std':np.std(feature_value)}
                
    def predict(self,X):
        y_pred=[]
        for _,sample in X.iterrows():
            probabilities={}
            for c in self.classes:
                probabilities[c]=self.prior[c]
                for feature in X.columns:
                    mean=self.conditional[feature][c]['mean']
                    std=self.conditional[feature][c]['std']
                    x=sample[feature]
                    probabilities[c]*=self.gaussian_pdf(x,mean,std)
                
            y_pred.append(max(probabilities,key=probabilities.get))
        return y_pred
    
    
    def gaussian_pdf(self,x,mean,std):
        exponent=np.exp(-((x-mean) ** 2) / (2 * std**2))
        return (1/(np.sqrt(2*np.pi) * std)) * exponent
    
    
    
df=pd.read_csv('Titanic-Dataset.csv')
df=df[['Survived','Pclass','Age','SibSp','Parch','Fare','Embarked']]
df['Age'].fillna(df['Age'].median(),inplace=True)
df['Fare'].fillna(df['Fare'].median(),inplace=True)
df['Embarked'].fillna(df['Embarked'].mode()[0],inplace=True)
df['Embarked']=df['Embarked'].map({'C':0,'Q':1,'S':2})

X=df.drop('Survived',axis=1)
y=df['Survived']

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)

#train,test=train_test_split(df,test_size=0.2)

#X_train=train.drop('Survived',axis=1)
#y_train=train['Survived']
#X_test=test.drop('Survived',axis=1)
#y_test=test['Survived']


classifier=NaiveBayesClassifier()
classifier.fit(X_train,y_train)
y_pred=classifier.predict(X_test)

cm=confusion_matrix(y_test,y_pred)
print("Confusion Matrix:\n", cm)
accuracy=np.mean(y_pred == y_test)
print("Accuracy:",accuracy)
                
                
    
        
            

feature values: 883    2
143    3
836    3
361    2
349    3
      ..
100    3
467    1
881    3
594    2
419    3
Name: Pclass, Length: 435, dtype: int64
feature values: 649    3
307    1
186    3
651    2
577    1
      ..
412    1
875    3
230    1
472    2
55     1
Name: Pclass, Length: 277, dtype: int64
feature values: 883    28.0
143    19.0
836    21.0
361    29.0
349    42.0
       ... 
100    28.0
467    56.0
881    33.0
594    37.0
419    10.0
Name: Age, Length: 435, dtype: float64
feature values: 649    23.0
307    17.0
186    28.0
651    18.0
577    39.0
       ... 
412    33.0
875    15.0
230    35.0
472    33.0
55     28.0
Name: Age, Length: 277, dtype: float64
feature values: 883    0
143    0
836    0
361    1
349    0
      ..
100    0
467    0
881    0
594    1
419    0
Name: SibSp, Length: 435, dtype: int64
feature values: 649    0
307    1
186    1
651    0
577    1
      ..
412    1
875    0
230    1
472    1
55     0
Name: SibSp, Length: 277, dtype: int64
feature 