In [1]:
import os
import pickle

from skimage.io import imread
from skimage.transform import resize
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score

In [2]:
df_train = pd.read_csv("./data/train/classes.csv")

In [3]:
df_test = pd.read_csv("./data/test/classes.csv")

In [4]:
def fix_path_train(path):
    return f"./data/train/{path}"

def fix_path_test(path):
    return f"./data/test/{path}"

In [5]:
df_train["filename"] = df_train["filename"].apply(fix_path_train)
df_test["filename"] = df_test["filename"].apply(fix_path_test)

In [6]:
categories = ['car', 'bike']
labels = [df_train.iloc[id, 1] for id in range(df_train.shape[0])]

In [7]:
labels_test = [df_test.iloc[id, 1] for id in range(df_test.shape[0])]

In [8]:
data = []

for id in range(df_train.shape[0]):
    img = imread(df_train.iloc[id, 0])
    img = resize(img, (15, 15))
    data.append(img.flatten())

In [9]:
data_test = []

for id in range(df_test.shape[0]):
    img = imread(df_test.iloc[id, 0])
    img = resize(img, (15, 15))
    data_test.append(img.flatten())

In [10]:
data = np.asarray(data)
labels = np.asarray(labels)

In [11]:
data_test = np.asarray(data_test)
labels_test = np.asarray(labels_test)

## Sklearn

In [12]:
from sklearn.ensemble import VotingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

In [19]:
svm_classifier = SVC(probability=True, random_state=42)
mlp_classifier = MLPClassifier(hidden_layer_sizes=(1000, 500, 100, 10), max_iter=300, solver='adam', activation='relu', random_state=42)
decision_tree_classifier = DecisionTreeClassifier(random_state=42)
random_forest_classifier = RandomForestClassifier(random_state=42)
naive_bayes_classifier = MultinomialNB()
knn_classifier = KNeighborsClassifier(n_neighbors=3, metric="euclidean")

ensemble_classifier = VotingClassifier(estimators=[
    ('svm', svm_classifier),
    ('mlp', mlp_classifier),
    ('decision_tree', decision_tree_classifier),
    ('random_forest', random_forest_classifier),
    ('naive_bayes', naive_bayes_classifier),
    ('knn', knn_classifier),
], voting='soft') 

In [20]:
ensemble_classifier.fit(data, labels)

In [22]:
y_prediction = ensemble_classifier.predict(data_test)

score = accuracy_score(y_prediction, labels_test)

print('{}% of samples were correctly classified'.format(str(score * 100)))

83.55704697986577% of samples were correctly classified


## Tensorflow

In [123]:
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPool2D, Flatten
from keras.utils import to_categorical

In [45]:
data[0].shape

(675,)

In [51]:
labels_one_hot = to_categorical(labels, num_classes=10)

In [115]:
model = Sequential()

model.add(Dense(2048, input_shape=(data[0].shape), activation='relu'))
model.add(Dense(1024, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(10, activation='softmax'))

In [116]:
model.summary()

Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_37 (Dense)            (None, 2048)              1384448   
                                                                 
 dense_38 (Dense)            (None, 1024)              2098176   
                                                                 
 dense_39 (Dense)            (None, 512)               524800    
                                                                 
 dense_40 (Dense)            (None, 256)               131328    
                                                                 
 dense_41 (Dense)            (None, 128)               32896     
                                                                 
 dense_42 (Dense)            (None, 64)                8256      
                                                                 
 dense_43 (Dense)            (None, 10)              

In [117]:
model.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

In [118]:
model.fit(data, labels, batch_size=64, epochs=300)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

<keras.src.callbacks.History at 0x2786be65f40>

In [120]:
y_prediction = model.predict(data_test)
predicted_labels = np.argmax(y_prediction, axis=1)
score = accuracy_score(predicted_labels, labels_test)

print('{}% of samples were correctly classified'.format(str(score * 100)))

83.55704697986577% of samples were correctly classified


## Tensorflow 2

In [133]:
data2 = np.reshape(data, (data.shape[0], 15, 45))

In [138]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dropout, Flatten, Dense

model = Sequential()

# convolutional layer
model.add(Conv2D(50, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu', input_shape=(15, 45, 1)))

# convolutional layer
model.add(Conv2D(75, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(125, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# flatten output of conv
model.add(Flatten())

# hidden layer
model.add(Dense(500, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(250, activation='relu'))
model.add(Dropout(0.3))

# output layer
model.add(Dense(1, activation='softmax'))

# compiling the sequential model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

# training the model for 10 epochs
model.fit(data2, labels, batch_size=64, epochs=10)


Epoch 1/10


  return dispatch_target(*args, **kwargs)


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x27886c0eb80>

In [140]:
data_test2 = np.reshape(data_test, (data_test.shape[0], 15, 45))

In [141]:
y_prediction = model.predict(data_test2)
predicted_labels = np.argmax(y_prediction, axis=1)
score = accuracy_score(predicted_labels, labels_test)

print('{}% of samples were correctly classified'.format(str(score * 100)))

47.81879194630873% of samples were correctly classified
