# Lab 5:
> Implement Bayesian Classification Algorithm

## Imports

In [1]:
import pandas as pd
from IPython.display import display_html
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
def build_model(X_train, y_train):
    clf = GaussianNB()
    clf = clf.fit(X_train, y_train)
    return clf

In [3]:
def prediction_using_model(clf, X_test, y_test):
    X_test = X_test.reset_index()
    del X_test['index']
    y_test = y_test.reset_index()
    del y_test['index']
    y_pred = clf.predict(X_test)
    predictions = pd.concat([X_test, pd.Series(y_pred, name = 'predicted_class')], axis = 1)
    print("Do you want to view the class label prediction for top five tuples of test data ?")
    choice = input()
    if choice == 'yes':
        display_html(predictions.head())
    print("Do you want to view Evaluation result of the model?")
    choice = input()
    if choice == 'yes':
        model_evaluation(y_pred, y_test)
    else:
        quit()

In [4]:
def model_evaluation(y_pred, y_test):
    print("Confusion Matrix")
    report = confusion_matrix(y_test, y_pred)
    cf = pd.DataFrame(report).transpose()
    display_html(cf)
    score = accuracy_score(y_test, y_pred)
    print(f"Naive Bayes Accuracy: {score}")
    print("Classification Report")
    report = classification_report(y_test, y_pred, output_dict = True)
    df = pd.DataFrame(report).transpose()
    display_html(df[['precision', 'recall', 'f1-score']].head(3))


In [15]:
def main():
    data = pd.read_csv("https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv")
    print("Do you want to view top five data tuples of Iris Dataset?")
    choice = input()
    if choice == 'yes':
        display_html(data.head())
    y = data['variety']
    X = data.drop(['variety'], axis = 1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 1)
    clf = build_model(X_train, y_train)
    prediction_using_model(clf, X_test, y_test)

In [16]:
main()

Do you want to view top five data tuples of Iris Dataset?
yes


Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


Do you want to view the class label prediction for top five tuples of test data ?
yes


Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,predicted_class
0,5.8,4.0,1.2,0.2,Setosa
1,5.1,2.5,3.0,1.1,Versicolor
2,6.6,3.0,4.4,1.4,Versicolor
3,5.4,3.9,1.3,0.4,Setosa
4,7.9,3.8,6.4,2.0,Virginica


Do you want to view Evaluation result of the model?
yes
Confusion Matrix


Unnamed: 0,0,1,2
0,13,0,0
1,0,15,0
2,0,1,9


Naive Bayes Accuracy: 0.9736842105263158
Classification Report


Unnamed: 0,precision,recall,f1-score
Setosa,1.0,1.0,1.0
Versicolor,1.0,0.9375,0.967742
Virginica,0.9,1.0,0.947368
