# Iris Dataset Classification

## Loading Iris Dataset

In [12]:
from sklearn import datasets
from sklearn.preprocessing import StandardScaler

iris = datasets.load_iris()

scaler = StandardScaler()
iris.data = scaler.fit_transform(iris.data, iris.target)

X = iris.data
y = iris.target

## Setting Up The Data Frame

In [13]:
import pandas as pd
import numpy as np

df_iris = pd.DataFrame(X)
df_iris[len(df_iris.columns)] = y

df_iris.columns = np.append(iris.feature_names, 'class')

## Seperating Train & Test

In [14]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.2)

## Defining C Hyperparameter Test Function (Linear)

In [24]:
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import GridSearchCV
from sklearn.svm import LinearSVC

def grid_linear_svc_test(X, y, c):
    cv_sets = ShuffleSplit(test_size = 0.20, random_state = 0)
    svc = LinearSVC(loss='hinge', random_state=0)
    params = {'C': c}
    grid = GridSearchCV(svc, params, cv=cv_sets)
    grid = grid.fit(X, y)
    return grid

## Generating C Values & Testing Them On The Training Set (Linear)

In [28]:
import math

def generate_polynomials(base, min_exp, max_exp):
    output = []
    for i in np.arange(min_exp, max_exp+1):
        if(i > 0):
            output += [base**i]
    return output

c = generate_polynomials(2, 1, 20)
grid = grid_linear_svc_test(X_train, y_train, c)

left_boundry_exp = math.log(grid.best_estimator_.C, 2) - 1
right_boundry_exp = math.log(grid.best_estimator_.C, 2) + 1

c = np.arange(math.pow(2, left_boundry_exp), math.pow(2, right_boundry_exp), 1)
grid = grid_linear_svc_test(X_train, y_train, c)

## Displaying Best C & Best Score

In [29]:
print("Best C    :", grid.best_estimator_.C)
print("Best Score:", grid.best_score_ * 100, "%")

Best C    : 507.0
Best Score: 95.41666666666667 %


## Test Set Score (Linear)

In [30]:
print("Test Set Score", grid.best_estimator_.score(X_test, y_test) * 100, "%")

Test Set Score 96.66666666666667 %


## Using Polynomial SVC

In [19]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVC

def grid_poly_svc_test(X, y, c, degree, coef0):
    cv_sets = ShuffleSplit(test_size = 0.20, random_state = 0)
    svc = SVC(kernel="poly", random_state=0)
    params = {'C': c, "degree": degree, "coef0": coef0}
    grid = GridSearchCV(svc, params, cv=cv_sets)
    grid = grid.fit(X, y)
    return grid

## Generating C Values & Testing Them On The Training Set (Polynomial)

In [20]:
c = np.logspace(-10, 11, base=2)
degree = np.arange(2, 10, 1)
coef0 = np.arange(-10, 11, 1)

grid = grid_poly_svc_test(X_train, y_train, c, degree, coef0)

## Displaying Best C & Best Degree & Best Coef0 & Best Score

In [21]:
print("Best C     :", grid.best_estimator_.C)
print("Best Degree:", grid.best_estimator_.degree)
print("Best Coef0 :", grid.best_estimator_.coef0)
print("Best Score :", grid.best_score_ * 100, "%")

Best C     : 0.37149857228423705
Best Degree: 3
Best Coef0 : 1
Best Score : 96.66666666666667 %


## Test Set  (Polynomial)

In [23]:
print("Test Set Score", grid.best_estimator_.score(X_test, y_test) * 100, "%")

Test Set Score 100.0 %
