In [1]:
# imports and load_csv def
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from csv import reader
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
import warnings
warnings.filterwarnings("ignore")

# Load a CSV file
def load_csv(filename, skip_header = True, return_names = True):
    dataset = list()
    labels = list()
    names = list()
    with open(filename, 'r') as file:
        csv_reader = reader(file)
        for row in csv_reader:
            if not row:
                continue
            labels.append(row.pop())
            dataset.append(row)
        if return_names:
            names = dataset[0:]
        if skip_header:
            dataset = dataset[1:]
            labels = labels[1:]
        if return_names:    
            return np.array(dataset, dtype = 'float'), np.array(labels), np.array(names)
        else:
            return np.array(dataset, dtype = 'float'), np.array(labels)

In [2]:
# load data
data, labels, names = load_csv('Dry_Bean_Dataset.csv', skip_header = True)
X_train, X_test, y_train, y_test = train_test_split(data, labels)

In [3]:
# Linear SVM
svcLinear = SVC(kernel='linear')
svcLinear.fit(X_train, y_train)
lin_pred = svcLinear.predict(X_test)
print(confusion_matrix(y_test, lin_pred))
print(classification_report(y_test, lin_pred))

[[303   0  30   0   3   3   9]
 [  0 129   0   0   0   0   0]
 [ 13   0 371   0   7   1   4]
 [  0   0   0 807   2  20  51]
 [  2   0  11   4 430   0  15]
 [  3   0   0  12   0 462  21]
 [  2   0   4  75  19  15 575]]
              precision    recall  f1-score   support

    BARBUNYA       0.94      0.87      0.90       348
      BOMBAY       1.00      1.00      1.00       129
        CALI       0.89      0.94      0.91       396
    DERMASON       0.90      0.92      0.91       880
       HOROZ       0.93      0.93      0.93       462
       SEKER       0.92      0.93      0.92       498
        SIRA       0.85      0.83      0.84       690

    accuracy                           0.90      3403
   macro avg       0.92      0.92      0.92      3403
weighted avg       0.90      0.90      0.90      3403



In [4]:
# SVM with polynomial kernel
svcPoly8 = SVC(kernel='poly', degree=8)
svcPoly8.fit(X_train, y_train)
poly_pred = svcPoly8.predict(X_test)
print(confusion_matrix(y_test, poly_pred))
print(classification_report(y_test, poly_pred))

[[  7   0 228   0 100   2  11]
 [  0 129   0   0   0   0   0]
 [  2   0 350   0  43   0   1]
 [  0   0   0 863   0  17   0]
 [  2   0  22  25 271  27 115]
 [  0   0   0 306  11  86  95]
 [  0   0   0 134  51 139 366]]
              precision    recall  f1-score   support

    BARBUNYA       0.64      0.02      0.04       348
      BOMBAY       1.00      1.00      1.00       129
        CALI       0.58      0.88      0.70       396
    DERMASON       0.65      0.98      0.78       880
       HOROZ       0.57      0.59      0.58       462
       SEKER       0.32      0.17      0.22       498
        SIRA       0.62      0.53      0.57       690

    accuracy                           0.61      3403
   macro avg       0.63      0.60      0.56      3403
weighted avg       0.59      0.61      0.55      3403



In [5]:
# SVM with Gaussian kernel
svcGaus = SVC(kernel='rbf')
svcGaus.fit(X_train, y_train)
gaus_pred = svcGaus.predict(X_test)
print(confusion_matrix(y_test, gaus_pred))
print(classification_report(y_test, gaus_pred))

[[ 12   0 234   0  93   0   9]
 [  0 129   0   0   0   0   0]
 [  4   0 352   0  39   0   1]
 [  0   0   0 758   0 101  21]
 [  4   0  23   8 284  16 127]
 [  0   0   0 181  12 123 182]
 [  0   0   0  44  67  87 492]]
              precision    recall  f1-score   support

    BARBUNYA       0.60      0.03      0.07       348
      BOMBAY       1.00      1.00      1.00       129
        CALI       0.58      0.89      0.70       396
    DERMASON       0.76      0.86      0.81       880
       HOROZ       0.57      0.61      0.59       462
       SEKER       0.38      0.25      0.30       498
        SIRA       0.59      0.71      0.65       690

    accuracy                           0.63      3403
   macro avg       0.64      0.62      0.59      3403
weighted avg       0.62      0.63      0.59      3403



In [6]:
# SVM with Sigmoid kernel
svcSigmoid = SVC(kernel='sigmoid')
svcSigmoid.fit(X_train, y_train)
sig_pred = svcSigmoid.predict(X_test)
print(confusion_matrix(y_test, sig_pred))
print(classification_report(y_test, sig_pred))

[[  2   0   0   2   0   0 344]
 [  0   0   0 129   0   0   0]
 [  1   0   0   4   0   2 389]
 [ 48 224   0 515   0  62  31]
 [ 30   5   0   1   0   8 418]
 [105 145   0  22   0  65 161]
 [181  38   0   2   0  32 437]]
              precision    recall  f1-score   support

    BARBUNYA       0.01      0.01      0.01       348
      BOMBAY       0.00      0.00      0.00       129
        CALI       0.00      0.00      0.00       396
    DERMASON       0.76      0.59      0.66       880
       HOROZ       0.00      0.00      0.00       462
       SEKER       0.38      0.13      0.19       498
        SIRA       0.25      0.63      0.35       690

    accuracy                           0.30      3403
   macro avg       0.20      0.19      0.17      3403
weighted avg       0.30      0.30      0.27      3403



In [7]:
# Linear SVM classifier had the best accuracy of: 0.90