# iris project using SVM

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
sns.set_style('whitegrid')

In [2]:
from sklearn import datasets
from sklearn import svm

In [3]:
df = sns.load_dataset('iris')

In [4]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
sepal_length    150 non-null float64
sepal_width     150 non-null float64
petal_length    150 non-null float64
petal_width     150 non-null float64
species         150 non-null object
dtypes: float64(4), object(1)
memory usage: 5.9+ KB


In [6]:
df.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [7]:
df['species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [8]:
X = df.drop('species', axis=1)

In [9]:
X.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [10]:
species_to_num = {'setosa':0, 'versicolor':1, 'virginica':2}
df['tmp'] = df['species'].map(species_to_num)
y = df['tmp'].values
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int64)

In [11]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_data = sc_X.fit_transform(X)

In [12]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_data, y, train_size=0.8, random_state=42)



In [13]:
X_train.shape,  X_test.shape, y_train.shape, y_test.shape

((120, 4), (30, 4), (120,), (30,))

In [14]:
C=1.0
clf = svm.SVC(kernel='linear', C=C)
clf.fit(X_train,y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [15]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score

# cross validation for train dataset

In [16]:
res = cross_val_score(clf, X_train, y_train, cv=3, scoring='accuracy')
print('Avrage Accuracy:\t {0:.4f}'.format(np.mean(res)))
print('Average SD:\t\t {0:.4f}'.format(np.std(res)))

Avrage Accuracy:	 0.9585
Average SD:		 0.0426


In [17]:
y_train_predict = cross_val_predict(clf, X_train, y_train, cv=3)

In [18]:
confusion_matrix(y_train,y_train_predict)

array([[40,  0,  0],
       [ 0, 37,  4],
       [ 0,  1, 38]], dtype=int64)

In [19]:
print("Precision Score:\t {0:.4f}".format(precision_score(y_train, y_train_predict, average='weighted')))
print("Recall SCore: \t\t {0:.4f}".format(recall_score(y_train, y_train_predict, average='weighted')))
print("F1 Score:\t\t {0:.4f}".format(f1_score(y_train, y_train_predict, average='weighted')))

Precision Score:	 0.9601
Recall SCore: 		 0.9583
F1 Score:		 0.9583


# cross validation on test dataset

In [20]:
y_test_predict = cross_val_predict(clf, X_test, y_test, cv=3)

In [21]:
confusion_matrix(y_test, y_test_predict)

array([[10,  0,  0],
       [ 0,  7,  2],
       [ 0,  1, 10]], dtype=int64)

In [22]:
print("Precision Score:\t {0:.4f}".format(precision_score(y_test, y_test_predict, average='weighted')))
print("Recall SCore: \t\t {0:.4f}".format(recall_score(y_test, y_test_predict, average='weighted')))
print("F1 Score:\t\t {0:.4f}".format(f1_score(y_test, y_test_predict, average='weighted')))

Precision Score:	 0.9014
Recall SCore: 		 0.9000
F1 Score:		 0.8992


In [23]:
y_predict = clf.predict(X_test)

In [24]:
confusion_matrix(y_test, y_predict)

array([[10,  0,  0],
       [ 0,  8,  1],
       [ 0,  0, 11]], dtype=int64)

In [25]:
print("Precision Score:\t {0:.4f}".format(precision_score(y_test, y_predict, average='weighted')))
print("Recall SCore: \t\t {0:.4f}".format(recall_score(y_test, y_predict, average='weighted')))
print("F1 Score:\t\t {0:.4f}".format(f1_score(y_test, y_predict, average='weighted')))

Precision Score:	 0.9694
Recall SCore: 		 0.9667
F1 Score:		 0.9664


In [26]:
from sklearn.metrics import accuracy_score

In [27]:
print("Accuracy Score:\t {0:.4f}".format(accuracy_score(y_test, y_predict)))

Accuracy Score:	 0.9667


# using ploynomial kernel

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
sns.set_style('whitegrid')

In [2]:
from sklearn import datasets
from sklearn import svm

In [4]:
df = sns.load_dataset('iris')

In [5]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [6]:
df.shape

(150, 5)

In [8]:
X = df.drop('species',axis=1)

In [9]:
X.head(2)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2


In [10]:
df['species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [11]:
species_to_num = {'setosa':0, 'versicolor':1, 'virginica':2}

In [13]:
df['tmp'] = df['species'].map(species_to_num)
y = df['tmp'].values
print(y)
y.shape

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


(150,)

In [14]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_data = sc_X.fit_transform(X)

In [27]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_data, y, train_size=0.8, random_state=42)



# model Creation

In [130]:
C=1.1
clf = svm.SVC(kernel='poly', degree=3, C=C)
clf.fit(X_train, y_train)



SVC(C=1.1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='poly', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [131]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, f1_score, recall_score

# data validation on train data

In [132]:
res = cross_val_score(clf, X_train, y_train, cv=10, scoring='accuracy')
print("Avrage Accuracy: \t {:.4f}".format(np.mean(res)))
print("Average SD: \t\t {:.4f}".format(np.std(res)))

Avrage Accuracy: 	 0.9248
Average SD: 		 0.0698




In [133]:
y_train_predict = cross_val_predict(clf, X_train, y_train, cv=3) 



In [134]:
confusion_matrix(y_train_predict, y_train)

array([[40,  0,  0],
       [ 0, 41, 11],
       [ 0,  0, 28]], dtype=int64)

In [135]:
print("Precision Score:\t {0:.4f}".format(precision_score(y_train, y_train_predict, average='weighted')))
print("Recall SCore: \t\t {0:.4f}".format(recall_score(y_train, y_train_predict, average='weighted')))
print("F1 Score:\t\t {0:.4f}".format(f1_score(y_train, y_train_predict, average='weighted')))

Precision Score:	 0.9277
Recall SCore: 		 0.9083
F1 Score:		 0.9062


# cross validation on test dataset

In [136]:
y_test_predict = cross_val_predict(clf, X_test, y_test, cv=3)



In [137]:
confusion_matrix(y_test, y_test_predict)

array([[10,  0,  0],
       [ 0,  9,  0],
       [ 0,  2,  9]], dtype=int64)

In [138]:
print("Precision Score:\t {0:.4f}".format(precision_score(y_test, y_test_predict, average='weighted')))
print("Recall SCore: \t\t {0:.4f}".format(recall_score(y_test, y_test_predict, average='weighted')))
print("F1 Score:\t\t {0:.4f}".format(f1_score(y_test, y_test_predict, average='weighted')))

Precision Score:	 0.9455
Recall SCore: 		 0.9333
F1 Score:		 0.9333


In [141]:
y_predict = clf.predict(X_test)

In [142]:
from sklearn.metrics import accuracy_score
print("Accuracy Score:\t {0:.4f}".format(accuracy_score(y_test, y_predict)))

Accuracy Score:	 0.9667


# using rbf kernel model

In [143]:
c=1.0
rbf_clf = svm.SVC(kernel='rbf', gamma=0.7, C=C)
rbf_clf.fit(X_train,y_train)

SVC(C=1.1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.7, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [145]:
res = cross_val_score(clf, X_train, y_train, cv=10, scoring='accuracy')
print("Average Accuracy: \t {0:.4f}".format(np.mean(res)))
print("Accuracy SD: \t\t {0:.4f}".format(np.std(res)))

Average Accuracy: 	 0.9248
Accuracy SD: 		 0.0698




In [149]:
y_train_predict = cross_val_predict(rbf_clf, X_train, y_train, cv=3)

In [150]:
confusion_matrix(y_train, y_train_predict)

array([[39,  1,  0],
       [ 0, 37,  4],
       [ 0,  3, 36]], dtype=int64)

In [152]:
print("Precision Score: \t {0:.4f}".format(precision_score(y_train, 
                                                           y_train_predict, 
                                                           average='weighted')))
print("Recall Score: \t\t {0:.4f}".format(recall_score(y_train,
                                                     y_train_predict, 
                                                     average='weighted')))
print("F1 Score: \t\t {0:.4f}".format(f1_score(y_train,
                                             y_train_predict, 
                                             average='weighted')))

Precision Score: 	 0.9342
Recall Score: 		 0.9333
F1 Score: 		 0.9336


In [153]:
y_test_pred = cross_val_predict(rbf_clf, X_test, y_test, cv=3)

In [154]:
confusion_matrix(y_test_pred, y_test)

array([[10,  0,  0],
       [ 0,  7,  1],
       [ 0,  2, 10]], dtype=int64)

In [155]:
print("Precision Score: \t {0:.4f}".format(precision_score(y_test, 
                                                           y_test_pred, 
                                                           average='weighted')))
print("Recall Score: \t\t {0:.4f}".format(recall_score(y_test,
                                                     y_test_pred, 
                                                     average='weighted')))
print("F1 Score: \t\t {0:.4f}".format(f1_score(y_test,
                                             y_test_pred, 
                                             average='weighted')))

Precision Score: 	 0.9014
Recall Score: 		 0.9000
F1 Score: 		 0.8992


# simpale rbf kernel model without cross Validation

In [169]:
y_train_predict = rbf_clf.predict(X_train)

Accuracy Score:	 0.9750
Accuracy Score:	 1.0000


In [172]:
confusion_matrix(y_train, y_train_predict)

array([[40,  0,  0],
       [ 0, 38,  3],
       [ 0,  0, 39]], dtype=int64)

In [171]:
print("Accuracy Score:\t {0:.4f}".format(accuracy_score(y_train_predict, y_train)))

Accuracy Score:	 0.9750


In [173]:
y_pred = rbf_clf.predict(X_test)

In [174]:
confusion_matrix(y_pred, y_test)

array([[10,  0,  0],
       [ 0,  9,  0],
       [ 0,  0, 11]], dtype=int64)

In [175]:
from sklearn.metrics import accuracy_score
print("Accuracy Score:\t {0:.4f}".format(accuracy_score(y_test, y_pred)))

Accuracy Score:	 1.0000


# here we get score 1 .....