In [1]:
import numpy as np
import pandas as pd
from sklearn import svm
from matplotlib import pyplot as plt

In [2]:
train_data = pd.read_excel('./TrainingSet.xlsx')

In [3]:
train_data

Unnamed: 0,leaf.length,leaf.width,flower.length,flower.width,plant
0,5.4,3.7,1.5,0.2,Arctica
1,4.8,3.4,1.6,0.2,Arctica
2,4.8,3.0,1.4,0.1,Arctica
3,4.3,3.0,1.1,0.1,Arctica
4,5.8,4.0,1.2,0.2,Arctica
...,...,...,...,...,...
115,6.7,3.0,5.2,2.3,Carolinian
116,6.3,2.5,5.0,1.9,Carolinian
117,6.5,3.0,5.2,2.0,Carolinian
118,6.2,3.4,5.4,2.3,Carolinian


In [4]:
print(train_data.shape)

(120, 5)


In [5]:
train_data[0:5]

Unnamed: 0,leaf.length,leaf.width,flower.length,flower.width,plant
0,5.4,3.7,1.5,0.2,Arctica
1,4.8,3.4,1.6,0.2,Arctica
2,4.8,3.0,1.4,0.1,Arctica
3,4.3,3.0,1.1,0.1,Arctica
4,5.8,4.0,1.2,0.2,Arctica


## Getting the Unique Classes

In [6]:
train_data.plant.unique()

array(['Arctica', 'Harlequin', 'Carolinian'], dtype=object)

# Replacing Each class with a number label 1, 2 and 3 respectively

In [7]:
train_data.loc[train_data['plant'] == 'Arctica', 'plant'] = 1
train_data.loc[train_data['plant'] == 'Harlequin', 'plant'] = 2
train_data.loc[train_data['plant'] == 'Carolinian', 'plant'] = 3

In [8]:
train_data.plant.unique()

array([1, 2, 3], dtype=object)

In [9]:
train_data[0:5]

Unnamed: 0,leaf.length,leaf.width,flower.length,flower.width,plant
0,5.4,3.7,1.5,0.2,1
1,4.8,3.4,1.6,0.2,1
2,4.8,3.0,1.4,0.1,1
3,4.3,3.0,1.1,0.1,1
4,5.8,4.0,1.2,0.2,1


# Converting to Numpy Array

In [10]:
train_set = train_data.to_numpy(dtype='float32')

In [11]:
print(train_set.dtype)
print(train_set.shape)

float32
(120, 5)


In [12]:
train_set[0:5, :]

array([[5.4, 3.7, 1.5, 0.2, 1. ],
       [4.8, 3.4, 1.6, 0.2, 1. ],
       [4.8, 3. , 1.4, 0.1, 1. ],
       [4.3, 3. , 1.1, 0.1, 1. ],
       [5.8, 4. , 1.2, 0.2, 1. ]], dtype=float32)

# Using SVM from Scikit

In [13]:
X = train_set[:, 0:4]
Y = train_set[:, 4:5]

In [14]:
X[0:5, :]

array([[5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2]], dtype=float32)

In [15]:
Y[0:5, :]

array([[1.],
       [1.],
       [1.],
       [1.],
       [1.]], dtype=float32)

In [16]:
classifier = svm.SVC(kernel='linear', C=1, decision_function_shape='ovo').fit(X, Y)

  y = column_or_1d(y, warn=True)


# Predictions


In [17]:
test_data = pd.read_excel('./TestSet1.xlsx')

In [18]:
test_data[0:5]

Unnamed: 0,leaf.length,leaf.width,flower.length,flower.width,plant
0,4.4,2.9,1.4,0.2,
1,4.6,3.1,1.5,0.2,
2,4.6,3.4,1.4,0.3,
3,4.7,3.2,1.3,0.2,
4,4.9,3.0,1.4,0.2,


In [19]:
test_data.shape

(30, 5)

In [20]:
test_set = test_data.to_numpy(dtype='float32')

In [21]:
X_test = test_set[:, 0:4]

In [22]:
X_test.shape

(30, 4)

In [23]:
X_test[0:5, :]

array([[4.4, 2.9, 1.4, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [4.6, 3.4, 1.4, 0.3],
       [4.7, 3.2, 1.3, 0.2],
       [4.9, 3. , 1.4, 0.2]], dtype=float32)

In [24]:
Y_test = classifier.predict(X_test).reshape(30,1)

In [25]:
Y_test.shape

(30, 1)

In [26]:
Y_test[0:5, :]

array([[1.],
       [1.],
       [1.],
       [1.],
       [1.]], dtype=float32)

In [27]:
results = pd.DataFrame({'Plant': Y_test[:, 0]})

In [28]:
results.loc[results['Plant'] == 1, 'Plant' ] = 'Arctica'
results.loc[results['Plant'] == 2, 'Plant' ] = 'Harlequin'
results.loc[results['Plant'] == 3, 'Plant' ] = 'Carolinian'

In [29]:
results[0:30]

Unnamed: 0,Plant
0,Arctica
1,Arctica
2,Arctica
3,Arctica
4,Arctica
5,Arctica
6,Harlequin
7,Harlequin
8,Arctica
9,Arctica


In [30]:
resultant_data = test_data.copy()

In [31]:
resultant_data

Unnamed: 0,leaf.length,leaf.width,flower.length,flower.width,plant
0,4.4,2.9,1.4,0.2,
1,4.6,3.1,1.5,0.2,
2,4.6,3.4,1.4,0.3,
3,4.7,3.2,1.3,0.2,
4,4.9,3.0,1.4,0.2,
5,4.9,3.1,1.5,0.1,
6,4.9,2.4,3.3,1.0,
7,4.9,2.5,4.5,1.7,
8,5.0,3.6,1.4,0.2,
9,5.0,3.4,1.5,0.2,


In [32]:
test_data

Unnamed: 0,leaf.length,leaf.width,flower.length,flower.width,plant
0,4.4,2.9,1.4,0.2,
1,4.6,3.1,1.5,0.2,
2,4.6,3.4,1.4,0.3,
3,4.7,3.2,1.3,0.2,
4,4.9,3.0,1.4,0.2,
5,4.9,3.1,1.5,0.1,
6,4.9,2.4,3.3,1.0,
7,4.9,2.5,4.5,1.7,
8,5.0,3.6,1.4,0.2,
9,5.0,3.4,1.5,0.2,


In [33]:
resultant_data['plant'] = results

In [34]:
resultant_data

Unnamed: 0,leaf.length,leaf.width,flower.length,flower.width,plant
0,4.4,2.9,1.4,0.2,Arctica
1,4.6,3.1,1.5,0.2,Arctica
2,4.6,3.4,1.4,0.3,Arctica
3,4.7,3.2,1.3,0.2,Arctica
4,4.9,3.0,1.4,0.2,Arctica
5,4.9,3.1,1.5,0.1,Arctica
6,4.9,2.4,3.3,1.0,Harlequin
7,4.9,2.5,4.5,1.7,Harlequin
8,5.0,3.6,1.4,0.2,Arctica
9,5.0,3.4,1.5,0.2,Arctica
