In [47]:
# Naive Bayes Algorithm

# The Naive Bayes algorithm is a family of simple yet powerful probabilistic classifiers based on applying Bayes' theorem with
# strong (naive) independence assumptions between the features. It's particularly popular for text classification tasks such as
# spam detection, sentiment analysis, and document categorization.

# There are several types of Naive Bayes classifiers, depending on the nature of the feature data:

# Gaussian Naive Bayes: Assumes that the features follow a normal (Gaussian) distribution. It's used for continuous data.

# Multinomial Naive Bayes: Used for discrete data, particularly in text classification where features represent word frequencies.

# Bernoulli Naive Bayes: Assumes binary features (Os and Is), used for binary/boolean features, such as in text classification
# tasks where the presence or absence Of a word is considered. Steps Of Naive Bayes Algorithm Training Phase:

# Calculate the prior probability for each class. Calculate the likelihood for each feature given each class. If using Gaussian
# Naive Bayes, calculate the mean and variance Of the features for each class.

In [48]:
import numpy as np
import pandas as pd

In [49]:
df = pd.read_csv("D:\\Jupyter\\Test\\Social_Network_Ads.csv",usecols=['Age','EstimatedSalary','Purchased'])
df.head()

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0


In [50]:
x=df.drop(columns=['Purchased'])
y=df['Purchased']

In [51]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [52]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()

In [53]:
x_train_sc=sc.fit_transform(x_train)
x_test_sc=sc.transform(x_test)


In [54]:
from sklearn.naive_bayes import GaussianNB,MultinomialNB,BernoulliNB
classifier = GaussianNB()
classifier.fit(x_train_sc,y_train)

In [55]:
y_pred=classifier.predict(x_test_sc)

In [56]:
from sklearn.metrics import confusion_matrix,precision_score,recall_score,f1_score,accuracy_score
cm=confusion_matrix(y_test,y_pred)
print(cm)

[[50  2]
 [ 3 25]]


In [57]:
accuracy_score(y_test,y_pred)

0.9375

In [58]:
precision_score(y_test,y_pred)

0.9259259259259259

In [59]:
recall_score(y_test,y_pred)

0.8928571428571429

In [60]:
f1_score(y_test,y_pred)

0.9090909090909091

In [61]:
# Now apply naive bayed on covid data

In [62]:
df1=pd.read_csv("D:\\Jupyter\\Test\\covid_toy.csv")
df1.head()

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No


In [63]:
df1.isnull().sum()
from sklearn.impute import SimpleImputer
si=SimpleImputer()
df1['fever']=si.fit_transform(df1[['fever']])
df1.isnull().sum()

age          0
gender       0
fever        0
cough        0
city         0
has_covid    0
dtype: int64

In [64]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
df1['gender']=le.fit_transform(df1['gender'])
df1['cough']=le.fit_transform(df1['cough'])
df1['city']=le.fit_transform(df1['city'])
df1['has_covid']=le.fit_transform(df1['has_covid'])
df1.head()

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,1,103.0,0,2,0
1,27,1,100.0,0,1,1
2,42,1,101.0,0,1,0
3,31,0,98.0,0,2,0
4,65,0,101.0,0,3,0


In [65]:
x=df1.drop(columns=['has_covid'])
y=df1['has_covid']

In [66]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [67]:
from sklearn.preprocessing import StandardScaler
sc1=StandardScaler()
x_train_sc1=sc1.fit_transform(x_train)
x_test_sc1=sc1.transform(x_test)

In [68]:
from sklearn.naive_bayes import GaussianNB,MultinomialNB,BernoulliNB
classifier1 = BernoulliNB()
classifier1.fit(x_train_sc1,y_train)
y_pred1=classifier1.predict(x_test_sc1)

In [69]:
from sklearn.metrics import confusion_matrix,accuracy_score,recall_score,precision_score,f1_score

In [70]:
confusion_matrix(y_test,y_pred1)

array([[7, 6],
       [1, 6]], dtype=int64)

In [71]:
recall_score(y_test,y_pred1)

0.8571428571428571

In [72]:
precision_score(y_test,y_pred1)

0.5

In [73]:
accuracy_score(y_test,y_pred1)

0.65

In [74]:
f1_score(y_test,y_pred1)

0.631578947368421

In [75]:
# Now apply this on Titanic dataset

In [79]:
df2=pd.read_csv("D:\\Jupyter\\Test\\titanic.csv",usecols=['Survived','Pclass','Sex','Age','SibSp','Parch','Fare'])
df2.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare
0,0,3,male,34.5,0,0,7.8292
1,1,3,female,47.0,1,0,7.0
2,0,2,male,62.0,0,0,9.6875
3,0,3,male,27.0,0,0,8.6625
4,1,3,female,22.0,1,1,12.2875


In [82]:
df2.isnull().sum()
from sklearn.impute import SimpleImputer
si=SimpleImputer()
df2['Age']=si.fit_transform(df2[['Age']])
df2['Fare']=si.fit_transform(df2[['Fare']])
df2.isnull().sum()

Survived    0
Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
dtype: int64

In [85]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
df2['Sex']=le.fit_transform(df2['Sex'])
df2.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare
0,0,3,1,34.5,0,0,7.8292
1,1,3,0,47.0,1,0,7.0
2,0,2,1,62.0,0,0,9.6875
3,0,3,1,27.0,0,0,8.6625
4,1,3,0,22.0,1,1,12.2875


In [86]:
x=df2.drop(columns=['Survived'])
y=df2['Survived']

In [87]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [88]:
from sklearn.preprocessing import StandardScaler
sc2=StandardScaler()
x_train_sc2=sc2.fit_transform(x_train)
x_test_sc2=sc2.transform(x_test)

In [89]:
from sklearn.naive_bayes import GaussianNB,MultinomialNB,BernoulliNB
bnb=BernoulliNB()
bnb.fit(x_train_sc2,y_train)
y_pred2=bnb.predict(x_test_sc2)

In [90]:
from sklearn.metrics import confusion_matrix,accuracy_score,recall_score,precision_score,f1_score

In [91]:
confusion_matrix(y_test,y_pred2)

array([[50,  0],
       [ 0, 34]], dtype=int64)

In [92]:
recall_score(y_test,y_pred2)

1.0

In [93]:
accuracy_score(y_test,y_pred2)

1.0

In [94]:
precision_score(y_test,y_pred2)

1.0

In [95]:
f1_score(y_test,y_pred2)

1.0

In [96]:
from sklearn.ensemble import RandomForestClassifier
rfc=RandomForestClassifier()
rfc.fit(x_train_sc2,y_train)
y_pred3=rfc.predict(x_test_sc2)

In [97]:
from sklearn.metrics import confusion_matrix,accuracy_score,precision_score,recall_score,f1_score

In [100]:
print(confusion_matrix(y_test,y_pred3))
print(accuracy_score(y_test,y_pred3))
print(precision_score(y_test,y_pred3))
print(recall_score(y_test,y_pred3))
print(f1_score(y_test,y_pred3))

[[50  0]
 [ 0 34]]
1.0
1.0
1.0
1.0


In [104]:
from sklearn.linear_model import LogisticRegression
lr=LogisticRegression()
lr.fit(x_train_sc2,y_train)
y_pred4=lr.predict(x_test_sc2)

In [105]:
from sklearn.metrics import confusion_matrix,accuracy_score,precision_score,recall_score,f1_score
print(confusion_matrix(y_test,y_pred4))
print(accuracy_score(y_test,y_pred4))
print(precision_score(y_test,y_pred4))
print(recall_score(y_test,y_pred4))
print(f1_score(y_test,y_pred4))

[[50  0]
 [ 0 34]]
1.0
1.0
1.0
1.0
