# IMPORTING LIBRARIES

In [2]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn import metrics 
from sklearn import preprocessing

In [3]:
data=pd.read_csv('zoo.data',sep=',',header=None)
data.columns=["animal_name","hair","feathers","egg","milk","airborne","aquatic","predator","toothed","backbone","breathes","venomous","fins","legs","tail","domestic","catsize","type"]

In [4]:
data

Unnamed: 0,animal_name,hair,feathers,egg,milk,airborne,aquatic,predator,toothed,backbone,breathes,venomous,fins,legs,tail,domestic,catsize,type
0,aardvark,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
1,antelope,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,1
2,bass,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,4
3,bear,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
4,boar,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,1
5,buffalo,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,1
6,calf,1,0,0,1,0,0,0,1,1,1,0,0,4,1,1,1,1
7,carp,0,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,4
8,catfish,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,4
9,cavy,1,0,0,1,0,0,0,1,1,1,0,0,4,0,1,0,1


In [5]:
data = pd.concat([data,pd.get_dummies(data['animal_name'], prefix='animal_name')],axis=1)
data.drop(['animal_name'],axis=1, inplace=True)

In [6]:
names=["hair","feathers","egg","milk","airborne","aquatic","predator","toothed","backbone","breathes","venomous","fins","tail","domestic","catsize"]
for name in names:
    data = pd.concat([data,pd.get_dummies(data[name], prefix=name)],axis=1)
    data.drop([name],axis=1, inplace=True)

In [7]:
data.shape

(101, 132)

In [8]:
y=np.array(data['type'])
data.drop(["type"],axis=1, inplace=True)
x=np.array(data)

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,shuffle=True)
len(x_train),len(y_train),len(x_test),len(y_test)

(80, 80, 21, 21)

# LINEAR KERNEL

In [9]:
lin_svm=SVC(kernel="linear")
parameters={"C":[0.1,1,5,10,15,20,50,70,100],"gamma":[0.001,0.01,0.1,1,10,20]}
clf_lin=GridSearchCV(lin_svm,parameters,cv=5)
clf_lin.fit(x_train,y_train)
predictions=clf_lin.predict(x_test)



In [10]:
print(metrics.accuracy_score(y_test,predictions))

0.9523809523809523


In [11]:
print(metrics.classification_report(y_test,predictions))

              precision    recall  f1-score   support

           1       1.00      1.00      1.00         6
           2       1.00      1.00      1.00         5
           3       1.00      1.00      1.00         1
           4       1.00      1.00      1.00         5
           6       0.50      1.00      0.67         1
           7       1.00      0.67      0.80         3

   micro avg       0.95      0.95      0.95        21
   macro avg       0.92      0.94      0.91        21
weighted avg       0.98      0.95      0.96        21



# Radial Bias KERNEL

In [12]:
rb_svm=SVC(kernel='rbf')
parameters={"C":[0.1,1,5,10,15,20,50,70,100],"gamma":[0.001,0.01,0.1,1,10,20]}
clf_rb=GridSearchCV(rb_svm,parameters,cv=5)
clf_rb.fit(x_train,y_train)
predictions=clf_rb.predict(x_test)



In [13]:
print(metrics.accuracy_score(y_test,predictions))

1.0


In [14]:
print(metrics.classification_report(y_test,predictions))

              precision    recall  f1-score   support

           1       1.00      1.00      1.00         6
           2       1.00      1.00      1.00         5
           3       1.00      1.00      1.00         1
           4       1.00      1.00      1.00         5
           6       1.00      1.00      1.00         1
           7       1.00      1.00      1.00         3

   micro avg       1.00      1.00      1.00        21
   macro avg       1.00      1.00      1.00        21
weighted avg       1.00      1.00      1.00        21



# POLYNOMIAL KERNEL

In [15]:
poly_svm=SVC(kernel='polynomial')
parameters={"C":[0.1,1,5,10,15,20,50,70,100],"gamma":[0.001,0.01,0.1,1,10,20]}
clf_poly=GridSearchCV(rb_svm,parameters,cv=5)
clf_poly.fit(x_train,y_train)
predictions=clf_poly.predict(x_test)



In [16]:
print(metrics.accuracy_score(y_test,predictions))

1.0


In [17]:
print(metrics.classification_report(y_test,predictions))

              precision    recall  f1-score   support

           1       1.00      1.00      1.00         6
           2       1.00      1.00      1.00         5
           3       1.00      1.00      1.00         1
           4       1.00      1.00      1.00         5
           6       1.00      1.00      1.00         1
           7       1.00      1.00      1.00         3

   micro avg       1.00      1.00      1.00        21
   macro avg       1.00      1.00      1.00        21
weighted avg       1.00      1.00      1.00        21



# SIGMOID KERNEL

In [19]:
sig_svm=SVC(kernel='sigmoid',probability=True)
parameters={"C":[0.1,1,5,10,15,20,50,70,100],"gamma":[0.001,0.01,0.1,1,10,20]}
clf_sig=GridSearchCV(rb_svm,parameters,cv=5)
clf_sig.fit(x_train,y_train)
predictions=clf_sig.predict(x_test)



In [20]:
print(metrics.accuracy_score(y_test,predictions))

1.0


In [21]:
print(metrics.classification_report(y_test,predictions))

              precision    recall  f1-score   support

           1       1.00      1.00      1.00         6
           2       1.00      1.00      1.00         5
           3       1.00      1.00      1.00         1
           4       1.00      1.00      1.00         5
           6       1.00      1.00      1.00         1
           7       1.00      1.00      1.00         3

   micro avg       1.00      1.00      1.00        21
   macro avg       1.00      1.00      1.00        21
weighted avg       1.00      1.00      1.00        21



In [None]:
### second method --->https://www.discoverbits.in/371/sklearn-attributeerror-predict_proba-available-probability 

In [25]:
from sklearn.calibration import CalibratedClassifierCV
svm = SVC()
clf = CalibratedClassifierCV(svm)
clf.fit(x_train, y_train)
CalibratedClassifierCV(base_estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='sigmoid',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))

#array([[0.02352877, 0.64021213, 0.33625911],



CalibratedClassifierCV(base_estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='sigmoid',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
            cv='warn', method='sigmoid')

In [33]:
probs=clf.predict_proba(x_test)

In [34]:
probs = probs[:, 1]

In [35]:
type(probs),probs.shape,y_test.shape

(numpy.ndarray, (21,), (21,))

In [43]:
probs,y_test

(array([0.04544757, 0.15159044, 0.5441022 , 0.00346411, 0.04606372,
        0.54660806, 0.20701116, 0.05968216, 0.58322878, 0.5719295 ,
        0.57129024, 0.05049616, 0.03629886, 0.58909882, 0.00114734,
        0.20576224, 0.04554916, 0.58725633, 0.20734392, 0.20576224,
        0.04556018]),
 array([1, 4, 3, 6, 1, 7, 4, 1, 2, 2, 2, 1, 7, 2, 7, 4, 1, 2, 4, 4, 1]))

In [44]:
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve
precision, recall,_ = precision_recall_curve(y_test,probs)


ValueError: multiclass format is not supported

In [80]:
######### tensorflow
data2=pd.read_csv('zoo.data',sep=',',header=None)
data2.columns=["animal_name","hair","feathers","egg","milk","airborne","aquatic","predator","toothed","backbone","breathes","venomous","fins","legs","tail","domestic","catsize","type"]
data2.head()

Unnamed: 0,animal_name,hair,feathers,egg,milk,airborne,aquatic,predator,toothed,backbone,breathes,venomous,fins,legs,tail,domestic,catsize,type
0,aardvark,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
1,antelope,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,1
2,bass,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,4
3,bear,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
4,boar,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,1


In [119]:
x1=data2.iloc[:,1:17]
y1=data2.iloc[:,17]

In [120]:
x1.shape,y1.shape

((101, 16), (101,))

In [121]:
train_x, test_x, train_y, test_y = train_test_split(x1, y1, test_size=0.3, random_state=42, stratify=y1)
train_x.shape,test_x.shape,train_y.shape,test_y.shape

((70, 16), (31, 16), (70,), (31,))

In [122]:
train_x=np.array(train_x)
test_x=np.array(test_x)
train_y=np.array(train_y)
test_y=np.array(test_y)


In [123]:
train_x.shape,test_x.shape,train_y.shape,test_y.shape

((70, 16), (31, 16), (70,), (31,))

In [124]:
test_y=test_y.reshape(-1,1)
train_y=train_y.reshape(-1,1)

In [104]:
import tensorflow as tf

In [105]:
X = tf.placeholder(tf.float32, [None,16]) 
Y = tf.placeholder(tf.int32, [None, 1])


In [106]:
Y_one_hot = tf.one_hot(Y, 7)  # one hot encoding
Y_one_hot = tf.reshape(Y_one_hot, [-1, 7])

In [107]:
w1 = tf.Variable(tf.random_normal([16,10],seed=0), name='weight1')
b1 = tf.Variable(tf.random_normal([10],seed=0), name='bias1')
w2 = tf.Variable(tf.random_normal([10,7],seed=0), name='weight2')
b2 = tf.Variable(tf.random_normal([7],seed=0), name='bias2')

In [108]:
logits = tf.matmul(X, w1) + b1

In [109]:
in_layer1=tf.add(tf.matmul(X,w1),b1)
out_layer1=tf.nn.relu(in_layer1)
  
in_layer2=tf.add(tf.matmul(out_layer1,w2),b2)
logits=tf.nn.relu(in_layer2)
  

In [110]:
cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=Y_one_hot))

In [111]:
optimizer = tf.train.AdamOptimizer(learning_rate = 0.01)
optimize_step = optimizer.minimize(cost)

In [112]:
prediction = tf.argmax(logits, 1)
correct_prediction = tf.equal(prediction, tf.argmax(Y_one_hot, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [117]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for step in range(5001):
        sess.run(optimize_step, feed_dict={X: train_x, Y: train_y})
        if step % 1000 == 0:
            loss, acc = sess.run([cost, accuracy], feed_dict={X: train_x, Y: train_y})
            print("Step: {:5}\tLoss: {:.3f}\tAcc: {:.2%}".format(step, loss, acc))
            
    train_acc = sess.run(accuracy, feed_dict={X: train_x, Y: train_y})
    test_acc,test_predict,test_correct = sess.run([accuracy,prediction,correct_prediction], feed_dict={X: test_x, Y: test_y})

Step:     0	Loss: 19.614	Acc: 10.00%
Step:  1000	Loss: 0.723	Acc: 62.86%
Step:  2000	Loss: 0.723	Acc: 62.86%
Step:  3000	Loss: 0.723	Acc: 62.86%
Step:  4000	Loss: 0.723	Acc: 62.86%
Step:  5000	Loss: 0.723	Acc: 62.86%


In [118]:
print("Model Prediction =", train_acc)
print("Test Prediction =", test_acc)

Model Prediction = 0.62857145
Test Prediction = 0.61290324
