# Identification of USB type with ML

In [3]:
%matplotlib inline

from PIL import Image
import numpy, os
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.linear_model import RidgeCV

from skimage.io import imread
from skimage.filters import threshold_otsu
from skimage import measure
from skimage.transform import resize
from skimage.measure import regionprops
from skimage.morphology import binary_erosion, binary_dilation, binary_opening
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from sklearn.preprocessing import LabelEncoder
import time


In [4]:
def importimage(filename):
    image = imread(filename, as_gray=True)
    # crop top and bottom 100 pixels
    image = image[100:620, 0:405]

    gray_image_scaled = image * 255

    threshold_value = threshold_otsu(gray_image_scaled)
    binary_image = gray_image_scaled > threshold_value
 
    binary_image = numpy.invert(binary_image)

    # Dilate image to ensure that small parts of the usb are recognized as one object
    binary_image = binary_dilation(binary_image)
    binary_image = binary_dilation(binary_image)
    binary_image = binary_dilation(binary_image)

    label_image = measure.label(binary_image)

    minx = 9999
    miny = 9999
    maxx = 0 
    maxy = 0
    for region in regionprops(label_image):
        if region.area < 500:
            # remove too small areas
            continue
        
        # the bounding box coordinates
        minRow, minCol, maxRow, maxCol = region.bbox
        if (minx > minCol):
            minx = minCol
        if (miny > minRow):
            miny = minRow
        if (maxx < maxCol):
            maxx = maxCol
        if (maxy < maxRow):
            maxy = maxRow

    width = maxx - minx
    height = maxy - miny    
    # fig2, (ax3) = plt.subplots(1)
    # ax3.imshow(gray_image_scaled, cmap="gray")
    # rectBorder = patches.Rectangle((minx, miny), width, height, edgecolor="red", linewidth=2, fill=False)
    # ax3.add_patch(rectBorder)
    # plt.show()

    resize_width = 64
    resize_height = 64

    if (width > height):
        miny -= (width - height) / 2
        maxy += (width - height) / 2
    if (height > width):
        minx -= (height - width) / 2
        maxx += (height - width) / 2

    cropped_image = gray_image_scaled[int(miny):int(maxy), int(minx):int(maxx)]
    try:
        scaled_cropped_image = resize(cropped_image, (resize_width, resize_height), 
                                      anti_aliasing=False, mode='constant')
    except:
        return None

    return scaled_cropped_image

In [7]:
path="frames/"
labels = []
imgs = []
for directory in os.listdir(path):
    for directory2 in os.listdir(path+directory):
        for file in os.listdir(path+directory+"/"+directory2):
#            print(path+directory+"/"+directory2+"/"+file)
            img = importimage(path+directory+"/"+directory2+"/"+file)
            if img is not None:
                labels.append(directory)
                imgs.append(img.reshape(-1))
            # exit(0)

print("Imported {} images.".format(len(imgs)))
X_train, X_test, y_train, y_test = train_test_split(imgs, labels, test_size=0.5, random_state=42)

Imported 3958 images.
SVC poly
Accuracy: 99.74734714502273% (1974/1979)
Time taken: 53.793824195861816 seconds.
DecisionTreeClassifier
Accuracy: 91.61192521475493% (1813/1979)
Time taken: 4.534636735916138 seconds.
KNeighbor
Accuracy: 99.39363314805458% (1967/1979)
Time taken: 21.794674158096313 seconds.


In [None]:
from sklearn.svm import SVC
t0 = time.time()
print("SVC linear")
svc_model = SVC(kernel='linear', probability=True)
svc_model.fit(X_train, y_train)
result1 = svc_model.predict(X_test)
correct = 0
wrong = 0
for i in range(0, len(X_test)):
    if(y_test[i] == result1[i]):
        correct += 1
    else:
        wrong += 1
print("Accuracy: {}% ({}/{})".format(correct / (correct + wrong) * 100, correct, correct + wrong))
print("Time taken: {} seconds.".format(time.time() - t0))

In [None]:
from sklearn.tree import DecisionTreeClassifier
t0 = time.time()
print("DecisionTreeClassifier")
dtf = DecisionTreeClassifier(random_state=42)
dtf.fit(X_train, y_train)
result1 = dtf.predict(X_test)
correct = 0
wrong = 0
for i in range(0, len(X_test)):
    if(y_test[i] == result1[i]):
        correct += 1
    else:
        wrong += 1
print("Accuracy: {}% ({}/{})".format(correct / (correct + wrong) * 100, correct, correct + wrong))
print("Time taken: {} seconds.".format(time.time() - t0))

In [None]:
from sklearn.neighbors import KNeighborsClassifier
t0 = time.time()
print("KNeighbor")
knc = KNeighborsClassifier(n_neighbors=5)
knc.fit(X_train, y_train)
result1 = knc.predict(X_test)
correct = 0
wrong = 0
for i in range(0, len(X_test)):
    if(y_test[i] == result1[i]):
        correct += 1
    else:
        wrong += 1
print("Accuracy: {}% ({}/{})".format(correct / (correct + wrong) * 100, correct, correct + wrong))
print("Time taken: {} seconds.".format(time.time() - t0))