# TAU Vehicle Type Recognition Competition
## Vehicle type classification from image data

### Import libraries

In [5]:
import warnings
warnings.filterwarnings('ignore')

import os
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from PIL import Image
from math import ceil

from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img

from keras.applications import inception_v3
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_v3 import preprocess_input as inception_v3_preprocessor

from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Model

from keras.optimizers import Adam
from keras.metrics import categorical_crossentropy

### Import the data

In [6]:
root = 'C:/Machine Learning/ML Project/vehicle/train/train'
data = []
for category in sorted(os.listdir(root)):
    for file in sorted(os.listdir(os.path.join(root, category))):
        data.append((category, os.path.join(root, category,  file)))
df_train = pd.DataFrame(data, columns=['classes', 'file_path'])


root2 = 'C:/Machine Learning/ML Project/vehicle/test/testset'
test_data = []
for file in sorted(os.listdir(root2)):
      test_data.append(os.path.join(root2,  file))
df_test = pd.DataFrame(test_data, columns=['file_path'])



### Load the paths

In [39]:
from sklearn.model_selection import train_test_split

y_train = df_train['classes'].to_numpy()
X_train = df_train['file_path'].to_numpy()
X_test_wl = df_test['file_path'].to_numpy()


X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42)


train_data = np.array([X_train,y_train])
test_data = np.array([X_test,y_test])

dataframe_train = pd.DataFrame({'file_path': train_data[0,:], 'classes': train_data[1,:]})

dataframe_test = pd.DataFrame({'file_path': test_data[0,:], 'classes': test_data[1,:]})


# df_train = pd.DataFrame(train_data, columns=['classes', 'file_path'])
# df_test = pd.DataFrame(test_data, columns=['classes', 'file_path'])
# print(df_train)

In [4]:
# Display example
print(y_train)

['Boat' 'Boat' 'Boat' ... 'Bicycle' 'Car' 'Motorcycle']


### Create generator to load images during the fit

In [8]:
batch_size = 32

train_datagen = ImageDataGenerator(rescale=1./255)

valid_datagen = ImageDataGenerator(rescale=1./255)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
        dataframe_train,
        x_col = 'file_path',
        y_col = 'classes',
        target_size=(299, 299),
        batch_size=batch_size,
        class_mode='categorical')

validation_generator = valid_datagen.flow_from_dataframe(
        dataframe_test,
        x_col = 'file_path',
        y_col = 'classes',
        target_size=(299, 299),
        batch_size=batch_size,
        class_mode='categorical')

test_generator = test_datagen.flow_from_dataframe(
        dataframe_test,
        x_col = 'file_path',
        target_size=(299, 299),
        batch_size=batch_size,
        class_mode=None)

Found 22436 validated image filenames belonging to 17 classes.
Found 5609 validated image filenames belonging to 17 classes.
Found 5609 validated image filenames.


### Create the model

In [40]:
# Get the InceptionV3 model so we can do transfer learning
base_model = InceptionV3(weights = 'imagenet', include_top = False, input_shape=(299, 299, 3))

# Add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)

# Add a fully-connected layer and a logistic layer with 20 classes 

x = Dense(512, activation='relu')(x)
predictions = Dense(17, activation='softmax')(x)

# The model we will train
model = Model(inputs = base_model.input, outputs = predictions)

# Freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False



Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


### Compile the model

In [41]:
# Compile with Adam
model.compile(Adam(lr=.0001), loss='categorical_crossentropy', metrics=['accuracy'])

### Train the model

In [44]:
model.fit_generator(
        train_generator,
        steps_per_epoch=ceil(len(X_train) / batch_size),
        epochs=10,
        validation_data=validation_generator,
        validation_steps=ceil(len(X_test) / batch_size))


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x2079bbb66a0>

### Save the model 

In [45]:
model.save("model.h5")

# Load model 
# model = load_model('model.h5')

### Test on Kaggle dataset

In [9]:
from keras.models import load_model

# Load model 
model = load_model('model.h5')






In [59]:
model.fit_generator(
        train_generator,
        steps_per_epoch=ceil(len(X_train) / batch_size),
        epochs=1,
        validation_data=validation_generator,
        validation_steps=ceil(len(X_test) / batch_size))


Epoch 1/1


<keras.callbacks.callbacks.History at 0x2940ff3f588>

In [42]:
size_test = 1000

testList = []

X_test_reduced = X_test_wl

for i in range(len(X_test_reduced)):
    if (i%100==0):
        print(i)
    img = img_to_array(load_img(X_test_reduced[i], target_size=(299,299)))/255
    img = img.astype(np.float32)
    testList.append(img)

testList = np.array(testList)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
5300
5400
5500
5600
5700
5800
5900
6000
6100
6200
6300
6400
6500
6600
6700
6800
6900
7000
7100
7200
7300
7400
7500
7600
7700
7800
7900


In [40]:
print(len(X_test_wl))

7958


In [43]:
print(testList.shape)

(7958, 299, 299, 3)


In [44]:
# Predicting
predicted = model.predict(test_generator,steps = ceil(len(X_test)/1))
# predicted = model.predict(testList)

In [36]:
print(predicted[0])

[1.6378048e-05 3.8820098e-03 1.2303755e-05 9.4173700e-01 1.6872598e-03
 5.0528903e-02 4.4117251e-06 4.7081357e-08 2.9773043e-06 1.8067808e-04
 2.3189766e-06 4.7191452e-06 5.5452261e-07 7.2619673e-07 2.5292381e-04
 8.3190936e-04 8.5488299e-04]


In [45]:
predictedList = list(map(lambda x : np.argmax(x),predicted))
# print(predictedList)

classes = list(dict.fromkeys(list(np.array(df_train["classes"]))))
# print(classes)

stringPredictedList = list(map(lambda x : classes[x],predictedList))
print(stringPredictedList)

['Boat', 'Van', 'Truck', 'Van', 'Bicycle', 'Car', 'Car', 'Boat', 'Tank', 'Car', 'Motorcycle', 'Car', 'Bicycle', 'Bus', 'Car', 'Bus', 'Tank', 'Car', 'Limousine', 'Car', 'Car', 'Car', 'Car', 'Van', 'Bicycle', 'Bicycle', 'Car', 'Car', 'Car', 'Car', 'Boat', 'Car', 'Taxi', 'Car', 'Car', 'Car', 'Car', 'Boat', 'Truck', 'Motorcycle', 'Car', 'Truck', 'Helicopter', 'Car', 'Motorcycle', 'Car', 'Bicycle', 'Car', 'Snowmobile', 'Bicycle', 'Bicycle', 'Car', 'Boat', 'Ambulance', 'Car', 'Boat', 'Boat', 'Car', 'Helicopter', 'Truck', 'Car', 'Car', 'Bicycle', 'Cart', 'Bus', 'Van', 'Car', 'Boat', 'Boat', 'Car', 'Bicycle', 'Truck', 'Boat', 'Car', 'Car', 'Car', 'Truck', 'Boat', 'Truck', 'Boat', 'Taxi', 'Car', 'Van', 'Car', 'Bus', 'Van', 'Boat', 'Helicopter', 'Boat', 'Van', 'Truck', 'Van', 'Boat', 'Car', 'Car', 'Truck', 'Boat', 'Truck', 'Car', 'Helicopter', 'Bicycle', 'Car', 'Car', 'Truck', 'Truck', 'Boat', 'Taxi', 'Truck', 'Tank', 'Car', 'Car', 'Motorcycle', 'Car', 'Car', 'Car', 'Car', 'Taxi', 'Car', 'Tank',

In [47]:
predClasses = pd.DataFrame({'Category': stringPredictedList})
ids = pd.DataFrame({'Id':np.arange(len(stringPredictedList))})
output = pd.concat([ids, predClasses], axis=1)
print(output)
output.to_csv('predicted_labels_inceptionV3.csv', index = None, header=True)

        Id Category
0        0     Boat
1        1      Van
2        2    Truck
3        3      Van
4        4  Bicycle
...    ...      ...
7953  7953      Bus
7954  7954  Bicycle
7955  7955      Car
7956  7956     Boat
7957  7957     Boat

[7958 rows x 2 columns]


In [38]:
accuracy_test = 0
for i in range(len(stringPredictedList)):
    if stringPredictedList[i] == y_test[i]:
        accuracy_test +=1
print(accuracy_test/len(stringPredictedList))

0.85
