# SVM Multiclass


In [2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [3]:
## Function to Evaluate a Classifier
def eval_classifier(y_test, y_pred, model):
    # Evaluate accuary 
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{model} Accuracy: {accuracy}")
    
    # Classification Report
    print(f"Classification report: ")
    print(classification_report(y_test, y_pred))
    
    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    print(f"Confusion Matrix: \n {cm}")

In [4]:
# Load the Breast Cancer Dataset
iris = datasets.load_iris()
X = iris

X = iris.data

y = iris.target.astype(int)

# Levels are 0 and 1 so it's binary classification

# Target Names are Malignant and Benign 

In [5]:
X.shape, y.shape, X, y
# 30 Features

((150, 4),
 (150,),
 array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1

In [8]:
# Split data into train & test

#Common Ratio is 80/20, another one is 70/30

#Might not get the same result when running the command from different people as the data is shuffled, Random_state keeps the order consistent as others
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, train_size=0.8, random_state=1)

X_train.shape, X_test.shape, y_train.shape, y_test.shape
## 455 for Training, 30 for Testing

((120, 4), (30, 4), (120,), (30,))

In [None]:
import seaborn as sns
df = sns.load_dataset(iris)

# One-vs-One


In [10]:
# Create SVM Classifier for One-Vs_One
clf_ovo = svm.SVC(kernel='linear', decision_function_shape='ovo')

In [11]:
# Train
clf_ovo.fit(X_train,y_train)

SVC(decision_function_shape='ovo', kernel='linear')

In [12]:
y_pred_ovo = clf_ovo.predict(X_test)

In [13]:
eval_classifier(y_test,y_pred_ovo,'CLF_OVO')

CLF_OVO Accuracy: 1.0
Classification report: 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00         6

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Confusion Matrix: 
 [[11  0  0]
 [ 0 13  0]
 [ 0  0  6]]


## One V All


In [15]:
# Create SVM Classifier for One-Vs-All
clf_ova = svm.SVC(kernel='linear', decision_function_shape='ovr')

In [16]:
#Train Model
clf_ova.fit(X_train, y_train)


SVC(kernel='linear')

In [17]:
y_pred_ovr = clf_ova.predict(X_test)

In [18]:
eval_classifier(y_test, y_pred_ovr,'One-vs-all' )

One-vs-all Accuracy: 1.0
Classification report: 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00         6

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Confusion Matrix: 
 [[11  0  0]
 [ 0 13  0]
 [ 0  0  6]]


## SVM Classifier

In [9]:
# Instantiate a SVM Classifier 

clf_svm = svm.SVC(kernel='linear')

In [10]:
## Train the Model

clf_svm.fit(X_train, y_train)

SVC(kernel='linear')

In [11]:
## Predict with the Model for test dataset and store the results

svm_pred = clf_svm.predict(X_test)



In [18]:
## Evaluation

eval_classifier(y_test, svm_pred, 'SVM')

SVM Accuracy: 0.956140350877193
Classification report: 
              precision    recall  f1-score   support

           0       1.00      0.88      0.94        42
           1       0.94      1.00      0.97        72

    accuracy                           0.96       114
   macro avg       0.97      0.94      0.95       114
weighted avg       0.96      0.96      0.96       114

Confusion Matrix: 
 [[37  5]
 [ 0 72]]


## Logsitic Regression Classifier


In [19]:
from sklearn.linear_model import LogisticRegression
clf_lreg = LogisticRegression().fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [20]:
# Predict with Log Model
lreg_pred = clf_lreg.predict(X_test)

In [21]:
# Eval with log
eval_classifier(y_test, lreg_pred, 'LREG')

LREG Accuracy: 0.9473684210526315
Classification report: 
              precision    recall  f1-score   support

           0       0.95      0.90      0.93        42
           1       0.95      0.97      0.96        72

    accuracy                           0.95       114
   macro avg       0.95      0.94      0.94       114
weighted avg       0.95      0.95      0.95       114

Confusion Matrix: 
 [[38  4]
 [ 2 70]]
