# **ENGR 418 - Project (Stage 2)**
### *Group 33 Members:*
#### Bonn Fernandez (42101212)
#### Maxwell Rex (68722255)
---

### Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
from sklearn import linear_model
from sklearn.neighbors import KNeighborsClassifier
from skimage.io import imread, imshow
from skimage.transform import resize
from skimage import filters, feature, color

### Feature Extraction Function

In [12]:
def feature_extraction(path):
    pictures = glob.glob(path + '/*.png')             # Used the glob library to determine the content/files in a given path/folder
    x = []                                            # Instance an empty list
    for point in range(0,len(pictures),1):            # Loops the algortithm based on the number of files in the path/folder
        image = imread(pictures[point],as_gray=True)  # Collects RGB data from an image in a form of 3D array. Since it's grayscaled, its truncated into a 2D data array.
        image = resize(image, (100, 100))             # Resize the image to 100x100
        image = feature.canny(image, 5, 0.001, 0.07)  # Find edges using canny edge detection
        img_height = image.shape[0]                   # Get the images height
        img_width = image.shape[1]                    # Get the images width
        area = 0                                      # Initialize the area to be zero
        min_width = 100                               # Initialize the minimum width to be 100 (maximum width)
        prev_width = width = max_diff_width = 0       # Initialize previous width, width, and maximum difference in width to be zero
        for row in range(img_height):                 # For each row in the image:
            first_edge = 0                            # Initialize first edge and last edge to be zero
            last_edge = 0                             
            for col in range(img_width):              # For each column in the row:
                if(image[row][col] == True):          # Check to see if the pixel is an edge or not
                    first_edge = col                  # Once the first edge is detected, assign it, then break out of the column loop
                    break
            for col in range(img_width):              # For each column in the row:
                if(image[row][col] == True):          # Check to see if the pixel is an edge or not
                    last_edge = col                   # If it is an edge, assign it (after the column loop is complete, only the last edge will be assigned)
            prev_width = width
            
            # Find area (feature 1)
            width = last_edge - first_edge            # Calculate width for that row
            area += width                             # Add the width to the area variable (area will be complete after the last row's width is added)
            
            # Find maximum change in width (feature 2)
            diff_width = abs(width-prev_width)        # Calculate the difference in width for that row
            if(diff_width > max_diff_width):          # Check to see if it's the biggest change in width for the image
                max_diff_width = diff_width
            
            # Find minimum width (feature 3)
            if(width < min_width and width != 0):     # Check to see if the width is the smallest width for the image
                min_width = width
                
        x.append([area, min_width, max_diff_width])   # Append the feature vector to the feature array
        
    x = np.array(x)
    return x

### Label Extraction Function

In [13]:
# Determines the no. of images per class and creates the label/class vector
def label_extraction(path):
    cir = len(glob.glob(path + '/cir_*.png')) # Determines how many circle images there are in the path.
    rec = len(glob.glob(path + '/rec_*.png')) # Determines how many rectangle images there are in the path.
    squ = len(glob.glob(path + '/squ_*.png')) # Determines how many square images there are in the path.

    i = 1
    y = []
    for shape in [cir,rec,squ]:               # Creates a list whose size is determined by the no. of points and their class; assuming the files in the path are organized accordingly: circle,rectangular,square 
        for j in range(0,shape,1):
            y.append(i)                       # Appends a class number for each corresponding shape 
        i = i+1
    y = np.array(y)                           # Converts the list into a numpy array
    return y, cir, rec, squ

### Classifier/Accuracy Function

In [14]:
def classifier(test_path,model):       
    x = feature_extraction(test_path)             # Calls the feature_extraction() function to collect features from testing dataset.
    [y,cir,squ,rec] = label_extraction(test_path) # Calls the label_extraction() function to generate a label vector from the testing dataset.
    preds = model.predict(x)                      # Uses our trained model to classify the testing dataset.
    print(pd.crosstab(preds,y))                   # Shows the Confusion Matrix to determine its quality.
    print("------------------------------------------------------")
    success = 0
    for i in [0,1,2]:
        points = [cir,rec,squ]
        shape = ['circle','rectangle','square']  
        success =  success + pd.crosstab(preds,y)[i+1][i+1]
        print(f"For the {shape[i]} lego, the accuracy of the classifier is {round((pd.crosstab(preds,y)[i+1][i+1]/points[i])*100,2)}%") # Calculates accuracy of the classifier model for each shape.
    print(f"The overall accuracy of the classifier is {round((success/pd.crosstab(preds,y).sum().sum())*100,2)}%")                      # Calculates the overall accuracy of the classifier model.
    return 

### Model Training
Make sure to update the train_path string to your training data's filepath

In [15]:
train_path = './Stage2/training'                   # Set the filepath for the training dataset
[yp,cir,rec,squ] = label_extraction(train_path)    # Get the label vector yp
xp = feature_extraction(train_path)                # Get the feature array xp
knn = KNeighborsClassifier(n_neighbors=1)          # Initialize a K-nearest-neighbors model 
knn.fit(xp,yp)                                     # Fit/train the model using xp and yp

KNeighborsClassifier(n_neighbors=1)

### Accuracy Test (Training Data)
Make sure to update the training string to your training data's filepath

In [16]:
training = './Stage2/training'
classifier(training,knn)

col_0   1   2   3
row_0            
1      27   0   0
2       0  27   0
3       0   0  27
------------------------------------------------------
For the circle lego, the accuracy of the classifier is 100.0%
For the rectangle lego, the accuracy of the classifier is 100.0%
For the square lego, the accuracy of the classifier is 100.0%
The overall accuracy of the classifier is 100.0%


### Accuracy Test (Testing Data)
Make sure to update the training string to your testing data's filepath

In [17]:
testing = './Stage2/testing'
classifier(testing,knn)

col_0   1   2   3
row_0            
1      25   0   1
2       0  24   0
3       2   3  26
------------------------------------------------------
For the circle lego, the accuracy of the classifier is 92.59%
For the rectangle lego, the accuracy of the classifier is 88.89%
For the square lego, the accuracy of the classifier is 96.3%
The overall accuracy of the classifier is 92.59%
