In [1]:
# mount gg drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd <your folder path>

/content/drive/MyDrive/20211/TextureClassification


In [3]:
import numpy as np
import scipy.io as sio
from sklearn import svm
import torch
from sklearn.model_selection import train_test_split

In [4]:
# Function for normalizing data
def normalize_data(input_data):
    mu = input_data.mean(axis=0)
    std = input_data.std(axis=0)
    return (input_data-mu)/std, mu, std

In [5]:
# Loading features
train_data = np.load('Train_data.npz')
test_data = np.load('Test_data.npz')

x_train_all_features = train_data['x_train_all_features']
y_train = train_data['y_train']

x_test_all_features = test_data['x_test_all_features']
y_test_all = test_data['y_test']

In [6]:
# number classes of image
print(len(set(y_test_all)))

47


In [7]:
# Number of features after PCA algorithm
# reference: https://www.scielo.br/j/eins/a/yzFzBTrdgGrv46hGsnzKssd/?lang=en#:~:text=Principal%20Components%20Analysis%20(PCA)(,similarities%20and%20differences%20are%20emphasized.
num_features = 47

# PCA on Train data
# reduce dimension of train data
(U, S, V) = torch.pca_lowrank(torch.tensor(x_train_all_features), q = num_features)

new_data_train = torch.matmul(torch.tensor(x_train_all_features), V[:, :num_features])
x_train_non_normalized = new_data_train.numpy()

# PCA on Test data
new_data_test = torch.matmul(torch.tensor(x_test_all_features), V[:, :num_features])
x_test_non_normalized = new_data_test.numpy()

In [8]:
# Normalizing features
x_train_svc, mu, std = normalize_data(x_train_non_normalized)
x_test_svc = (x_test_non_normalized - mu) / std

In [9]:
# Split test set into validation and test for the purpose of hyperparameter tuning
# you can modifiy these parameters to fine tune model
x_val, x_test, y_val, y_test = train_test_split( x_test_svc, y_test_all, test_size = 0.5, random_state = 15 )


In [10]:
# find C param to get best model
# reference: https://numpy.org/doc/stable/reference/generated/numpy.logspace.html
C_all = np.logspace(-3, 2, num=100)
acc_all = np.zeros_like(C_all)

# Cross-validation for hyperparameter C
for i in range(len(C_all)):

    # Create an svm Classifier
    clf = svm.SVC( C = C_all[i], kernel = 'linear')

    # Train the model using the training sets
    clf.fit( x_train_svc, y_train.ravel() )
    
    # Predict the response for validaiton data
    y_pred_val = clf.predict(x_val)

    same_pred = np.sum(y_pred_val == y_val.squeeze())
    acc = same_pred / len(y_pred_val)
    acc_all[i] = acc

pos = np.argmax(acc_all)
best_C = C_all[pos]
print( 'Best value for hyperparameter C: ' + str(best_C) )

# SVM classifier for best value of the hyperparameter C
clf = svm.SVC( C = best_C, kernel = 'linear')
# train model
clf.fit( x_train_svc, y_train.ravel() )

Best value for hyperparameter C: 0.298364724028334


SVC(C=0.298364724028334, kernel='linear')

In [None]:
# Results on train dataset
y_pred_train = clf.predict(x_train_svc)
print(y_pred_train)
print(y_train.squeeze())

same_pred_train = np.sum(y_pred_train == y_train.squeeze())
acc_train = same_pred_train / len(y_pred_train)
# accuracy = total images classified correcly / total images in dataset
print('Correct: ' + str( same_pred_train ) + '; Incorrect: ' + str( len(y_pred_train) - same_pred_train ) )
print('Accuracy: ' + str(acc_train * 100) + '%')

['braided' 'braided' 'studded' ... 'blotchy' 'cobwebbed' 'braided']
['blotchy' 'braided' 'studded' ... 'braided' 'cobwebbed' 'braided']
Correct: 2861; Incorrect: 1651
Accuracy: 63.40868794326241%


In [None]:
# Results on validation dataset
y_pred_val = clf.predict(x_val)

same_pred_val = np.sum(y_pred_val == y_val.squeeze())
acc_val = same_pred_val / len(y_pred_val)

print('Correct: ' + str( same_pred_val ) + '; Incorrect: ' + str( len(y_pred_val) - same_pred_val ) )
print('Accuracy: ' + str(acc_val * 100) + '%')

Correct: 218; Incorrect: 346
Accuracy: 38.652482269503544%


In [None]:
# Results on test dataset
y_pred_test = clf.predict(x_test)

same_pred_test = np.sum(y_pred_test == y_test.squeeze())
acc_test = same_pred_test / len(y_pred_test)

print('Correct: ' + str( same_pred_test ) + '; Incorrect: ' + str( len(y_pred_test) - same_pred_test ) )
print('Accuracy: ' + str(acc_test * 100) + '%')

Correct: 216; Incorrect: 348
Accuracy: 38.297872340425535%
