In [1]:
import numpy as mp
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('Zoo.csv')

In [3]:
df.head()

Unnamed: 0,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,breathes,venomous,fins,legs,tail,domestic,catsize,type
0,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
1,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,1
2,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,4
3,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
4,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,1


In [4]:
df.isna().sum()

hair        0
feathers    0
eggs        0
milk        0
airborne    0
aquatic     0
predator    0
toothed     0
backbone    0
breathes    0
venomous    0
fins        0
legs        0
tail        0
domestic    0
catsize     0
type        0
dtype: int64

In [5]:
df.corr()

Unnamed: 0,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,breathes,venomous,fins,legs,tail,domestic,catsize,type
hair,1.0,-0.427851,-0.817382,0.878503,-0.198431,-0.473554,-0.154769,0.492531,0.191681,0.441149,-0.104245,-0.280313,0.394009,0.048973,0.207208,0.45502,-0.562384
feathers,-0.427851,1.0,0.419248,-0.410761,0.656553,-0.058552,-0.10443,-0.613631,0.231403,0.254588,-0.145739,-0.223541,-0.206686,0.292569,0.031586,-0.135934,-0.19752
eggs,-0.817382,0.419248,1.0,-0.938848,0.376646,0.376244,0.011605,-0.64215,-0.34042,-0.382777,0.098689,0.164796,-0.224918,-0.22109,-0.15561,-0.51465,0.661825
milk,0.878503,-0.410761,-0.938848,1.0,-0.366765,-0.362613,-0.029721,0.628168,0.384958,0.423527,-0.242449,-0.156328,0.214196,0.210026,0.163928,0.574906,-0.723683
airborne,-0.198431,0.656553,0.376646,-0.366765,1.0,-0.172638,-0.295181,-0.594311,-0.104718,0.286039,0.008528,-0.251157,0.043712,0.009482,0.063274,-0.349768,0.022677
aquatic,-0.473554,-0.058552,0.376244,-0.362613,-0.172638,1.0,0.375978,0.05315,0.022463,-0.637506,0.087915,0.604492,-0.360638,-0.034642,-0.224308,-0.111866,0.326639
predator,-0.154769,-0.10443,0.011605,-0.029721,-0.295181,0.375978,1.0,0.129452,0.051022,-0.262931,0.115391,0.190302,-0.099723,0.018947,-0.309794,0.14479,0.061179
toothed,0.492531,-0.613631,-0.64215,0.628168,-0.594311,0.05315,0.129452,1.0,0.575085,-0.06569,-0.062344,0.364292,-0.193476,0.310368,0.06943,0.34401,-0.471527
backbone,0.191681,0.231403,-0.34042,0.384958,-0.104718,0.022463,0.051022,0.575085,1.0,0.207666,-0.246611,0.209499,-0.432856,0.731762,0.101733,0.356976,-0.828845
breathes,0.441149,0.254588,-0.382777,0.423527,0.286039,-0.637506,-0.262931,-0.06569,0.207666,1.0,-0.120752,-0.617219,0.369868,0.088952,0.124068,0.204125,-0.519308


In [6]:
x = df[['hair','eggs','milk','backbone','breathes','tail','catsize']]
y = df['type']

In [7]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=123456)

In [8]:
from sklearn.linear_model import LogisticRegressionCV

model_lg = LogisticRegressionCV(max_iter=1000)
model_lg.fit(x_train, y_train)



In [9]:
from sklearn.naive_bayes import GaussianNB

model_nb = GaussianNB()
model_nb.fit(x_train, y_train)

In [10]:
from sklearn.neighbors import KNeighborsClassifier

model_knn = KNeighborsClassifier(n_neighbors=5)
model_knn.fit(x_train, y_train)

In [11]:
from sklearn.svm import SVC

model_svm = SVC(C=2.0, gamma='auto')
model_svm.fit(x_train, y_train)

In [12]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

def evaluate_model(model, model_name):
    # get the predictions
    y_pred = model.predict(x_test)

    # ground truth
    y_true = y_test
    
    # get the accuracy score
    accuracy = accuracy_score(y_true, y_pred)

    # get the precision
    precision = precision_score(y_true, y_pred,average='micro')

    # get the recall
    recall = recall_score(y_true, y_pred,average='micro')

    # get the f1 score
    f1 = f1_score(y_true, y_pred,average='micro')

    return model_name, accuracy, precision, recall

In [13]:
result_lg = evaluate_model(model_lg, 'Logistic Regression')
result_nb = evaluate_model(model_nb, 'Naive Bayes')
result_knn = evaluate_model(model_knn, 'KNN')
result_svm = evaluate_model(model_svm, 'SVM')

In [14]:
result_df = pd.DataFrame([result_lg, result_nb, result_knn, result_svm], columns=['Algorithm', 'Accuracy', 'Precision', 'Recall'])
result_df

Unnamed: 0,Algorithm,Accuracy,Precision,Recall
0,Logistic Regression,0.923077,0.923077,0.923077
1,Naive Bayes,0.923077,0.923077,0.923077
2,KNN,0.923077,0.923077,0.923077
3,SVM,0.923077,0.923077,0.923077
