# Classical ML algorithms for image classification
## Importing useful libraries

In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [2]:
import pandas as pd
import os
from skimage.transform import resize
from skimage.io import imread
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

## Loading images and convert them to a dataframe version

In [3]:
train_flat_data_arr=[] #input array
train_target_arr=[] #output array

test_flat_data_arr=[] #input array
test_target_arr=[] #output array

class2idx = {'CuNi1': 0, 'CuNi2': 1, 'CuNi3': 2}

#loading the train and test dataframes
columns=['path','label']
train_df = pd.read_csv('../data/rgb/train_dry.csv')
train_df.columns=columns
test_df = pd.read_csv('../data/rgb/test_dry.csv')
test_df.columns=columns

#reading the images from the path for each set
for i in range(len(train_df)):
    img=imread(train_df['path'][i])
    img_resized=resize(img,(256,256,3))
    train_flat_data_arr.append(img_resized.flatten())
    clase = class2idx[train_df['label'][i]]
    train_target_arr.append(clase)

train_flat_data=np.array(train_flat_data_arr)
train_target=np.array(train_target_arr)

print("train data is ok!")

#for test data
for i in range(len(test_df)):
    img=imread(test_df['path'][i])
    img_resized=resize(img,(256,256,3))
    test_flat_data_arr.append(img_resized.flatten())
    clase = class2idx[test_df['label'][i]]
    test_target_arr.append(clase)

test_flat_data=np.array(test_flat_data_arr)
test_target=np.array(test_target_arr)

print("test data is ok!")

train data is ok!
test data is ok!


In [6]:
#saving the train arrays
np.save('../data/rgb/train_dry_flat_data.npy',train_flat_data)
np.save('../data/rgb/train_dry_target.npy',train_target)

#saving the test arrays
np.save('../data/rgb/test_dry_flat_data.npy',test_flat_data)
np.save('../data/rgb/test_dry_target.npy',test_target)

**Loading np arrays for train and test**

In [7]:
train_flat_data = np.load('../data/rgb/train_dry_flat_data.npy')
train_target = np.load('../data/rgb/train_dry_target.npy')

test_flat_data = np.load('../data/rgb/test_dry_flat_data.npy')
test_target = np.load('../data/rgb/test_dry_target.npy')

In [8]:
#===== train dataframe
train_df=pd.DataFrame(train_flat_data) 
train_df['Target']=train_target
print(train_df.shape)

#input data 
x_train=train_df.iloc[:,:-1] 
#output data
y_train=train_df.iloc[:,-1]

#===== test dataframe
test_df=pd.DataFrame(test_flat_data) 
test_df['Target']=test_target
print(test_df.shape)

#input data 
x_test=test_df.iloc[:,:-1] 
#output data
y_test=test_df.iloc[:,-1]

(736, 196609)
(194, 196609)


In [9]:
train_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,196599,196600,196601,196602,196603,196604,196605,196606,196607,Target
0,0.042515,0.059069,0.112732,0.040827,0.045298,0.090817,0.039046,0.035859,0.065394,0.037929,...,0.589762,0.526075,0.507731,0.410834,0.451747,0.514565,0.309519,0.435979,0.538652,2
1,0.011932,0.032248,0.167634,0.008213,0.036495,0.208557,0.006711,0.041894,0.262665,0.005314,...,0.444092,0.5406,0.759934,0.466304,0.578247,0.795463,0.467159,0.619142,0.825025,1
2,0.330415,0.35712,0.516988,0.305774,0.362259,0.499862,0.302907,0.394054,0.50234,0.332392,...,0.793346,0.826316,0.585341,0.796098,0.836487,0.64835,0.782724,0.821423,0.709909,1
3,0.862974,0.838126,0.764366,0.86841,0.818416,0.741802,0.856497,0.790339,0.722288,0.852546,...,0.527532,0.48532,0.653424,0.753343,0.717969,0.733184,0.848582,0.851078,0.802884,0
4,0.795135,0.748448,0.691149,0.789107,0.712922,0.675955,0.76677,0.658064,0.626455,0.77702,...,0.255317,0.160109,0.159601,0.374237,0.241884,0.193999,0.517896,0.395217,0.264109,0


In [10]:
test_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,196599,196600,196601,196602,196603,196604,196605,196606,196607,Target
0,0.054575,0.038888,0.04281,0.058021,0.042096,0.046137,0.061394,0.040383,0.046967,0.062654,...,0.037114,0.032988,0.060063,0.038628,0.034382,0.06254,0.041391,0.03505,0.074641,1
1,0.724488,0.609481,0.565259,0.619972,0.522581,0.444246,0.568197,0.485876,0.353755,0.618233,...,0.81246,0.782156,0.733529,0.795687,0.799233,0.734483,0.734392,0.797806,0.733649,0
2,0.532412,0.827931,0.787586,0.725105,0.881034,0.742293,0.865913,0.904092,0.734465,0.922228,...,0.811886,0.762915,0.798144,0.834909,0.788571,0.81153,0.842911,0.799568,0.792549,0
3,0.066893,0.055183,0.094225,0.060194,0.050496,0.07579,0.056327,0.045847,0.074555,0.049639,...,0.124581,0.129531,0.171408,0.193675,0.197399,0.191963,0.32955,0.296228,0.242106,0
4,0.074213,0.138425,0.509665,0.080822,0.174468,0.536089,0.096226,0.221393,0.564351,0.122578,...,0.300708,0.276041,0.462963,0.543929,0.428173,0.440352,0.774804,0.582063,0.452078,2


## Creating the SVM model

In [11]:
svc = svm.SVC(kernel='rbf', C=1, gamma=0.01, probability=True)
# Training the model using the training data
svc.fit(x_train,y_train)

In [12]:
# Testing the model using the testing data
y_pred = svc.predict(x_test)

# Calculating the accuracy of the model
accuracy = accuracy_score(y_pred, y_test)

# Print the accuracy of the model
print(f"The model is {accuracy*100}% accurate")

The model is 40.72164948453608% accurate


In [13]:
print(classification_report(y_test, y_pred, target_names=['CuNi1', 'CuNi2', 'CuNi3']))

              precision    recall  f1-score   support

       CuNi1       0.41      1.00      0.58        79
       CuNi2       0.00      0.00      0.00        61
       CuNi3       0.00      0.00      0.00        54

    accuracy                           0.41       194
   macro avg       0.14      0.33      0.19       194
weighted avg       0.17      0.41      0.24       194



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
