In [131]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from pandas import DataFrame

In [132]:
# Define paths
train_path = 'data/train'
valid_path = 'data/valid'

# Load annotations
column_names = ['filename', 'x_min', 'y_min', 'x_max', 'y_max', 'class']
train_annotations = pd.read_csv(os.path.join(train_path, '_annotations.csv'), header=None, names=column_names)
valid_annotations = pd.read_csv(os.path.join(valid_path, '_annotations.csv'), header=None, names=column_names)

In [133]:
def select_random_classes(annotations: DataFrame, num_classes: int):
  unique_classes = annotations['class'].unique()
  selected_classes = np.random.choice(unique_classes, size=num_classes, replace=False)
  return selected_classes

def filter_annotations(annotations: DataFrame, selected_classes):
  return annotations[annotations['class'].isin(selected_classes)]

In [134]:
classes = list(select_random_classes(train_annotations, 10))
classes

['Buick Enclave SUV 2012',
 'Chevrolet Tahoe Hybrid SUV 2012',
 'Bugatti Veyron 16-4 Coupe 2009',
 'BMW ActiveHybrid 5 Sedan 2012',
 'Chrysler Crossfire Convertible 2008',
 'Bugatti Veyron 16-4 Convertible 2009',
 'Maybach Landaulet Convertible 2012',
 'BMW Z4 Convertible 2012',
 'Honda Accord Coupe 2012',
 'Acura TL Type-S 2008']

In [135]:
train_annotations = filter_annotations(train_annotations, classes)
valid_annotations = filter_annotations(valid_annotations, classes)
train_annotations

Unnamed: 0,filename,x_min,y_min,x_max,y_max,class
5,05885_jpg.rf.e69fcadd40315639492a640c2a5246d9.jpg,0,74,388,415,Chrysler Crossfire Convertible 2008
55,00978_jpg.rf.e70b5ba6f48d5322f6e66a8253c14029.jpg,8,63,415,415,Chevrolet Tahoe Hybrid SUV 2012
68,05118_jpg.rf.e734da4c640c3a432eefecb53ff3198e.jpg,5,52,415,410,Chrysler Crossfire Convertible 2008
123,06099_jpg.rf.e7dfb4c33c301c1e67b07cb28f251245.jpg,46,74,362,330,Acura TL Type-S 2008
139,02913_jpg.rf.e8270cf6955679ef7a9cfbcf0042aab7.jpg,0,6,415,415,Acura TL Type-S 2008
...,...,...,...,...,...,...
19178,04077_jpg.rf.b5dbf7b5706e05855fa4df9fa94bce2d.jpg,13,38,415,400,Honda Accord Coupe 2012
19209,08120_jpg.rf.b65aefc24ee8dbbc979fd14b09401d56.jpg,6,69,394,364,Honda Accord Coupe 2012
19223,03660_jpg.rf.b66eb72134c2d03cdd7b89760b89b0f1.jpg,55,105,398,400,Buick Enclave SUV 2012
19281,04972_jpg.rf.b727525b6fea7262f8ab5b31879fd15e.jpg,6,7,415,415,BMW Z4 Convertible 2012


In [136]:
# Initialize lists to store images and labels
train_images = []
train_labels = []
test_images = []
test_labels = []

for index, row in train_annotations.iterrows():
  # Read image and resize to a fixed size if necessary
  train_image_path = os.path.join(train_path, row['filename'])
  image = cv2.imread(train_image_path)
  image = cv2.resize(image, (224, 224))
  train_images.append(image)
  train_labels.append(classes.index(row['class']))

for index, row in valid_annotations.iterrows():
  # Read image and resize to a fixed size if necessary
  valid_image_path = os.path.join(valid_path, row['filename'])
  image = cv2.imread(valid_image_path)
  image = cv2.resize(image, (224, 224))
  test_images.append(image)
  test_labels.append(classes.index(row['class']))

In [137]:
# Convert lists to numpy arrays
X_train = np.array(train_images)
Y_train = np.array(train_labels)

X_test = np.array(test_images)
Y_test = np.array(test_labels)

In [138]:
# Flatten the image data
X_train = X_train.reshape(X_train.shape[0], -1).T
X_test = X_test.reshape(X_test.shape[0], -1).T

# Normalize pixel values to range [0, 1]
X_train = X_train / 255.0
X_test = X_test / 255.0

num_classes = len(classes)
pixels = len(X_train[:, 0])

In [139]:
def init_params():
    np.random.seed(1)
    W1 = np.random.randn(num_classes, pixels) * 0.01
    b1 = np.zeros((num_classes, 1))
    W2 = np.random.randn(num_classes, num_classes) * 0.01
    b2 = np.zeros((num_classes, 1))
    return W1, b1, W2, b2

def ReLU(Z):
    return np.maximum(Z, 0)

def softmax(Z):
    exp_Z = np.exp(Z - np.max(Z)) # Numerical stability
    return exp_Z / np.sum(exp_Z, axis=0, keepdims=True)
    
def forward_prop(W1, b1, W2, b2, X):
    Z1 = np.dot(W1, X) + b1
    A1 = ReLU(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

def ReLU_deriv(Z):
    return Z > 0

def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

def backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y):
    m = Y.size
    one_hot_Y = one_hot(Y)
    dZ2 = A2 - one_hot_Y
    dW2 = 1 / m * np.dot(dZ2, A1.T)
    db2 = 1 / m * np.sum(dZ2, axis=1, keepdims=True)
    dZ1 = np.dot(W2.T, dZ2) * ReLU_deriv(Z1)
    dW1 = 1 / m * np.dot(dZ1, X.T)
    db1 = 1 / m * np.sum(dZ1, axis=1, keepdims=True)
    return dW1, db1, dW2, db2

def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 -= alpha * dW1
    b1 -= alpha * db1
    W2 -= alpha * dW2
    b2 -= alpha * db2
    return W1, b1, W2, b2

In [140]:
def get_predictions(A2):
    return np.argmax(A2, 0)

def get_accuracy(predictions, Y):
    return np.mean(predictions == Y) * 100

def gradient_descent(X, Y, alpha, iterations):
    W1, b1, W2, b2 = init_params()
    for i in range(iterations):
        Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X)
        dW1, db1, dW2, db2 = backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y)
        W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
        if i % 10 == 0:
            predictions = get_predictions(A2)
            accuracy = get_accuracy(predictions, Y)
            print(f"Iteration: {i}, Accuracy: {accuracy:.2f}%")
    return W1, b1, W2, b2

In [141]:
W1, b1, W2, b2 = gradient_descent(X_train, Y_train, alpha=0.01, iterations=100)

Iteration: 0, Accuracy: 11.45%
Iteration: 10, Accuracy: 13.52%
Iteration: 20, Accuracy: 13.52%
Iteration: 30, Accuracy: 15.05%
Iteration: 40, Accuracy: 17.78%
Iteration: 50, Accuracy: 20.94%
Iteration: 60, Accuracy: 23.01%
Iteration: 70, Accuracy: 22.36%
Iteration: 80, Accuracy: 22.68%
Iteration: 90, Accuracy: 23.77%


In [142]:
def make_predictions(X, W1, b1, W2, b2):
    _, _, _, A2 = forward_prop(W1, b1, W2, b2, X)
    predictions = get_predictions(A2)
    return predictions

def test_prediction(index, W1, b1, W2, b2):
    current_image = X_train[:, index, None]
    prediction = make_predictions(X_train[:, index, None], W1, b1, W2, b2)
    label = Y_train[index]
    print("Prediction: ", prediction)
    print("Label: ", label)
    
    current_image = current_image.reshape((224, 224, 3))
    plt.gray()
    plt.imshow(current_image, interpolation='nearest')
    plt.show()

In [143]:
dev_predictions = make_predictions(X_test, W1, b1, W2, b2)
print(dev_predictions)
print(f"{get_accuracy(dev_predictions, Y_test):.2f}%")

[4 0 5 8 9 4 4 6 0 0 0 6 4 0 0 5 8 6 4 8 0 9 9 5 0 9 9 0 6 0 0 6 0 8 4 8 8
 5 0 0 0 9 0 4 0 0 0 0 0 0 8 5 0 0 6 9 5 0 0 8 0 9 0 4 8 0 8 0 0 0 8 8 9 8]
17.57%
