In [1]:
## Importing necessary packages
import pandas as pd
import numpy as np

## `Impoting data`

In [2]:
data = pd.read_csv('Extracted-Features.csv')

In [3]:
data.shape

## dataset has 27 features and 330 observations

(330, 27)

In [4]:
data.columns

Index(['meanR', 'meanG', 'meanB', 'stdR', 'stdG', 'stdB', 'meanGray',
       'stdGray', 'entropy', 'aboveR', 'belowR', 'aboveG', 'belowG', 'aboveB',
       'belowB', 'FFT_avg', 'FFT_med', 'FFT_std', 'FFT_max', 'FFT_min', 'DCT',
       'H_avg', 'H_med', 'H_std', 'H_max', 'H_min', 'label'],
      dtype='object')

In [5]:
data.label.value_counts().sort_values()

## There are 101 instances of class 1, 124 instances of class 2, and 105 instances of class 3

1    101
3    105
2    124
Name: label, dtype: int64

## `Building Classfication Model`

In [6]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [7]:
## Applying Train-Test split

## Train has 70% data and test has 30% data

X = data.drop(['label'], axis = 1)
y = data['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 2021, test_size = 0.3)

In [8]:
## Since SVC is a distance based measure we need to scale (normalize) the data

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range = (0,1))

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [9]:
## Building a simple linear support vector classifier

clf = SVC(C = 100,class_weight = 'balanced', kernel = 'linear')

clf = clf.fit(X_train_scaled, y_train)

In [10]:
## Predicting on train and test data

pred_train = clf.predict(X_train_scaled)

pred_test = clf.predict(X_test_scaled)

In [11]:
## Classification report for the train data
print(classification_report(pred_train, y_train))

              precision    recall  f1-score   support

           1       0.89      0.90      0.89        70
           2       1.00      1.00      1.00        83
           3       0.91      0.90      0.90        78

    accuracy                           0.94       231
   macro avg       0.93      0.93      0.93       231
weighted avg       0.94      0.94      0.94       231



In [12]:
## Classification report for the test data
print(classification_report(pred_test, y_test))

              precision    recall  f1-score   support

           1       0.77      0.96      0.85        24
           2       1.00      1.00      1.00        41
           3       0.96      0.79      0.87        34

    accuracy                           0.92        99
   macro avg       0.91      0.92      0.91        99
weighted avg       0.93      0.92      0.92        99



***The model performed very well in classifying the images***