# Classification with Naive Bayes
Create a classification model with Naive Bayes on sklearn with the titanic dataset to predict if a person survived the crash given features.

*   **survived** column is a predicted attribute (class).
*   Remove **alive** and **deck** columns from the dataframe.
*   Remove rows where values in some columns are missing.
*   encode **embarked**, **embark_town** with one-hot encoding
*   encode class, sex, who,adult_male, alone with indexed based encoding/label encoding [link text](https://towardsdatascience.com/categorical-encoding-using-label-encoding-and-one-hot-encoder-911ef77fb5bd)
* Train a Naive Bayes model with sklearn with 70% training : 30% testing
* Report the performance of the model, including accuracy, recall, precision, and F1-score


In [46]:
import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from sklearn.metrics import classification_report

titanicDF = sns.load_dataset("titanic")
print(titanicDF)
print(titanicDF.info())

     survived  pclass     sex   age  sibsp  parch     fare embarked   class  \
0           0       3    male  22.0      1      0   7.2500        S   Third   
1           1       1  female  38.0      1      0  71.2833        C   First   
2           1       3  female  26.0      0      0   7.9250        S   Third   
3           1       1  female  35.0      1      0  53.1000        S   First   
4           0       3    male  35.0      0      0   8.0500        S   Third   
..        ...     ...     ...   ...    ...    ...      ...      ...     ...   
886         0       2    male  27.0      0      0  13.0000        S  Second   
887         1       1  female  19.0      0      0  30.0000        S   First   
888         0       3  female   NaN      1      2  23.4500        S   Third   
889         1       1    male  26.0      0      0  30.0000        C   First   
890         0       3    male  32.0      0      0   7.7500        Q   Third   

       who  adult_male deck  embark_town alive  alo

In [53]:
df = titanicDF.copy()
df.drop(['alive', 'deck'], axis=1, inplace=True)

df.dropna(inplace=True)

df = pd.get_dummies(df, columns=['embarked', 'embark_town'], drop_first=True)

label_encoders = {}
for item in ['class', 'sex', 'who', 'adult_male', 'alone', 'embarked_Q', 'embarked_S', 'embark_town_Queenstown', 'embark_town_Southampton']:
    lebel = LabelEncoder()
    df[item] = lebel.fit_transform(df[item])
    label_encoders[item] = lebel

print(df['class'].value_counts())
print()
print(df)
print()

x = df.drop('survived', axis=1)
y = df['survived']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=85)

model = GaussianNB()
model.fit(x_train, y_train)

y_pre = model.predict(x_test)

# resultDf = pd.DataFrame({'y_test': y_test, 'y_predict':y_pre})
# print(resultDf)

print(classification_report(y_test, y_pre))

accuracy = accuracy_score(y_test, y_pre)
recall = recall_score(y_test, y_pre)
precision = precision_score(y_test, y_pre)
f1 = f1_score(y_test, y_pre)

print(f'accuracy score : {accuracy}')
print(f'recall : {recall}')
print(f'precision : {precision}')
print(f'f1 : {f1}')
# print(df)
# print(df.groupby('class').count())

class
2    355
0    184
1    173
Name: count, dtype: int64

     survived  pclass  sex   age  sibsp  parch     fare  class  who  \
0           0       3    1  22.0      1      0   7.2500      2    1   
1           1       1    0  38.0      1      0  71.2833      0    2   
2           1       3    0  26.0      0      0   7.9250      2    2   
3           1       1    0  35.0      1      0  53.1000      0    2   
4           0       3    1  35.0      0      0   8.0500      2    1   
..        ...     ...  ...   ...    ...    ...      ...    ...  ...   
885         0       3    0  39.0      0      5  29.1250      2    2   
886         0       2    1  27.0      0      0  13.0000      1    1   
887         1       1    0  19.0      0      0  30.0000      0    2   
889         1       1    1  26.0      0      0  30.0000      0    1   
890         0       3    1  32.0      0      0   7.7500      2    1   

     adult_male  alone  embarked_Q  embarked_S  embark_town_Queenstown  \
0            