# IMPORTING LIBRARIES

In [3]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn import metrics 
from sklearn import preprocessing

In [19]:
data=pd.read_csv('zoo.data',sep=',',header=None)
data.columns=["animal_name","hair","feathers","egg","milk","airborne","aquatic","predator","toothed","backbone","breathes","venomous","fins","legs","tail","domestic","catsize","type"]

In [20]:
data

Unnamed: 0,animal_name,hair,feathers,egg,milk,airborne,aquatic,predator,toothed,backbone,breathes,venomous,fins,legs,tail,domestic,catsize,type
0,aardvark,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
1,antelope,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,1
2,bass,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,4
3,bear,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
4,boar,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,1
5,buffalo,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,1
6,calf,1,0,0,1,0,0,0,1,1,1,0,0,4,1,1,1,1
7,carp,0,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,4
8,catfish,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,4
9,cavy,1,0,0,1,0,0,0,1,1,1,0,0,4,0,1,0,1


In [21]:
data = pd.concat([data,pd.get_dummies(data['animal_name'], prefix='animal_name')],axis=1)
data.drop(['animal_name'],axis=1, inplace=True)

In [22]:
names=["hair","feathers","egg","milk","airborne","aquatic","predator","toothed","backbone","breathes","venomous","fins","tail","domestic","catsize"]
for name in names:
    data = pd.concat([data,pd.get_dummies(data[name], prefix=name)],axis=1)
    data.drop([name],axis=1, inplace=True)

In [26]:
data.shape

(101, 132)

In [28]:
y=np.array(data['type'])
data.drop(["type"],axis=1, inplace=True)
x=np.array(data)

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,shuffle=True)
len(x_train),len(y_train),len(x_test),len(y_test)

(80, 80, 21, 21)

# LINEAR KERNEL

In [46]:
lin_svm=SVC(kernel="linear")
parameters={"C":[0.1,1,5,10,15,20,50,70,100],"gamma":[0.001,0.01,0.1,1,10,20]}
clf_lin=GridSearchCV(lin_svm,parameters,cv=5)
clf_lin.fit(x_train,y_train)
predictions=clf_lin.predict(x_test)



In [47]:
print(metrics.accuracy_score(y_test,predictions))

0.9523809523809523


In [48]:
print(metrics.classification_report(y_test,predictions))

              precision    recall  f1-score   support

           1       1.00      1.00      1.00        11
           2       0.75      1.00      0.86         3
           3       1.00      0.50      0.67         2
           4       1.00      1.00      1.00         2
           5       1.00      1.00      1.00         1
           7       1.00      1.00      1.00         2

   micro avg       0.95      0.95      0.95        21
   macro avg       0.96      0.92      0.92        21
weighted avg       0.96      0.95      0.95        21



# Radial Bias KERNEL

In [49]:
rb_svm=SVC(kernel='rbf')
parameters={"C":[0.1,1,5,10,15,20,50,70,100],"gamma":[0.001,0.01,0.1,1,10,20]}
clf_rb=GridSearchCV(rb_svm,parameters,cv=5)
clf_rb.fit(x_train,y_train)
predictions=clf_rb.predict(x_test)



In [50]:
print(metrics.accuracy_score(y_test,predictions))

0.9523809523809523


In [51]:
print(metrics.classification_report(y_test,predictions))

              precision    recall  f1-score   support

           1       1.00      1.00      1.00        11
           2       1.00      1.00      1.00         3
           3       1.00      0.50      0.67         2
           4       1.00      1.00      1.00         2
           5       0.50      1.00      0.67         1
           7       1.00      1.00      1.00         2

   micro avg       0.95      0.95      0.95        21
   macro avg       0.92      0.92      0.89        21
weighted avg       0.98      0.95      0.95        21



# POLYNOMIAL KERNEL

In [52]:
poly_svm=SVC(kernel='polynomial')
parameters={"C":[0.1,1,5,10,15,20,50,70,100],"gamma":[0.001,0.01,0.1,1,10,20]}
clf_poly=GridSearchCV(rb_svm,parameters,cv=5)
clf_poly.fit(x_train,y_train)
predictions=clf_poly.predict(x_test)



In [53]:
print(metrics.accuracy_score(y_test,predictions))

0.9523809523809523


In [54]:
print(metrics.classification_report(y_test,predictions))

              precision    recall  f1-score   support

           1       1.00      1.00      1.00        11
           2       1.00      1.00      1.00         3
           3       1.00      0.50      0.67         2
           4       1.00      1.00      1.00         2
           5       0.50      1.00      0.67         1
           7       1.00      1.00      1.00         2

   micro avg       0.95      0.95      0.95        21
   macro avg       0.92      0.92      0.89        21
weighted avg       0.98      0.95      0.95        21



# SIGMOID KERNEL

In [55]:
sig_svm=SVC(kernel='sigmoid')
parameters={"C":[0.1,1,5,10,15,20,50,70,100],"gamma":[0.001,0.01,0.1,1,10,20]}
clf_sig=GridSearchCV(rb_svm,parameters,cv=5)
clf_sig.fit(x_train,y_train)
predictions=clf_sig.predict(x_test)



In [56]:
print(metrics.accuracy_score(y_test,predictions))

0.9523809523809523


In [57]:
print(metrics.classification_report(y_test,predictions))

              precision    recall  f1-score   support

           1       1.00      1.00      1.00        11
           2       1.00      1.00      1.00         3
           3       1.00      0.50      0.67         2
           4       1.00      1.00      1.00         2
           5       0.50      1.00      0.67         1
           7       1.00      1.00      1.00         2

   micro avg       0.95      0.95      0.95        21
   macro avg       0.92      0.92      0.89        21
weighted avg       0.98      0.95      0.95        21



In [59]:
probs=clf_sig.predict_proba(x_test)
import matplotlib.pyplot as plt
precision, recall, thresholds = metrics.precision_recall_curve(y_test, probs)
plt.plot(recall,precision,marker=".")

AttributeError: predict_proba is not available when  probability=False