In [1]:
#Importing libraries
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator,load_img,img_to_array

import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, Flatten 
from tensorflow.keras.models import Model

from sklearn.metrics import precision_recall_fscore_support, classification_report

In [2]:
#loading the dataset
#taking only 1000 images for faster training
train_labels= pd.read_csv("cars/labels_train.csv")
train_labels=train_labels[:1000]
val_labels=pd.read_csv("cars/labels_trainval.csv")
val_labels=val_labels[:1000]


In [3]:
train_labels.head()

Unnamed: 0,frame,xmin,xmax,ymin,ymax,class_id
0,1478019952686311006.jpg,237,251,143,155,1
1,1478019952686311006.jpg,437,454,120,186,3
2,1478019953180167674.jpg,218,231,146,158,1
3,1478019953689774621.jpg,171,182,141,154,2
4,1478019953689774621.jpg,179,191,144,155,1


In [4]:
val_labels.head()

Unnamed: 0,frame,xmin,xmax,ymin,ymax,class_id
0,1478019952686311006.jpg,237,251,143,155,1
1,1478019952686311006.jpg,437,454,120,186,3
2,1478019953180167674.jpg,218,231,146,158,1
3,1478019953689774621.jpg,171,182,141,154,2
4,1478019953689774621.jpg,179,191,144,155,1


In [5]:
#Checking the number of rows and columns
train_labels.shape,val_labels.shape

((1000, 6), (1000, 6))

Since restnet expects the input size of the image as 224x224 imagesize, we preprocess the data

In [6]:
train_labels[['xmin','xmax']]/=224

In [7]:
train_labels[["ymin","ymax"]]/=224

In [8]:
train_labels.head()

Unnamed: 0,frame,xmin,xmax,ymin,ymax,class_id
0,1478019952686311006.jpg,1.058036,1.120536,0.638393,0.691964,1
1,1478019952686311006.jpg,1.950893,2.026786,0.535714,0.830357,3
2,1478019953180167674.jpg,0.973214,1.03125,0.651786,0.705357,1
3,1478019953689774621.jpg,0.763393,0.8125,0.629464,0.6875,2
4,1478019953689774621.jpg,0.799107,0.852679,0.642857,0.691964,1


In [9]:
val_labels[['xmin','xmax']]/=224

In [10]:
val_labels[['ymin','ymax']]/=224

In [11]:
val_labels.head()

Unnamed: 0,frame,xmin,xmax,ymin,ymax,class_id
0,1478019952686311006.jpg,1.058036,1.120536,0.638393,0.691964,1
1,1478019952686311006.jpg,1.950893,2.026786,0.535714,0.830357,3
2,1478019953180167674.jpg,0.973214,1.03125,0.651786,0.705357,1
3,1478019953689774621.jpg,0.763393,0.8125,0.629464,0.6875,2
4,1478019953689774621.jpg,0.799107,0.852679,0.642857,0.691964,1


For splitting training data into train and test

In [12]:
train_data, test_data=train_test_split(train_labels,test_size=0.2,random_state=42)

For data augmentation

In [13]:
augmentor= ImageDataGenerator(rotation_range=20,shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode="nearest", width_shift_range=0.2, height_shift_range=0.2)

Preparing training data

In [14]:
images=[]
labels=[]
for i,row in train_data.iterrows():
    image_path=f"cars/images/{row['frame']}"
    img=load_img(image_path, target_size=(224,224))
    img=img_to_array(img)
    img=augmentor.random_transform(img)
    images.append(img)
    labels.append([row['xmin'],row['xmax'],row['ymin'],row['ymax'],row['class_id']])

In [15]:
images=np.array(images)

In [16]:
labels=np.array(labels)

In [17]:
test_images=[]
test_labels=[]

for i, row in test_data.iterrows():
    image_path=f"cars/images/{row['frame']}"
    img=load_img(image_path,target_size=(224,224))
    img=img_to_array(img)
    test_images.append(img)
    test_labels.append([row['xmin'],row['xmax'],row['ymin'],row['ymax'],row['class_id']])

    

In [18]:
test_images=np.array(test_images)
test_labels=np.array(test_labels)

In [19]:
#creating a base model using ResNet
#include_top=False because we are not importing output layers like dense and flatten
base_model=ResNet50(weights="imagenet",include_top=False, input_shape=(224,224,3))
# Freeze the layers of the base model
for layer in base_model.layers:
    layer.trainable = False

In [20]:
#extracting the feature map of the base model.
x=base_model.output
x=Flatten()(x)
x=Dense(1024,activation="relu")(x)
x=Dense(512,activation="relu")(x)
#5 final memory neurons because 4 for bounding box coordinates and one for class label.
output=Dense(5,activation="linear")(x)

In [21]:
model=Model(inputs=base_model.input,outputs=output)

In [22]:
model.compile(optimizer="adam",loss="mean_squared_error",metrics=['accuracy'])

In [23]:
#model.summary()

Training the model for 10 epochs

In [24]:
model.fit(images,labels,batch_size=8,epochs=20,validation_data=(test_images,test_labels))

Epoch 1/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 1s/step - accuracy: 0.2848 - loss: 753.8194 - val_accuracy: 0.2850 - val_loss: 25.6225
Epoch 2/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 1s/step - accuracy: 0.3122 - loss: 45.0647 - val_accuracy: 0.1950 - val_loss: 4.5328
Epoch 3/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 1s/step - accuracy: 0.3693 - loss: 10.6077 - val_accuracy: 0.0800 - val_loss: 7.6584
Epoch 4/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 1s/step - accuracy: 0.4599 - loss: 6.0644 - val_accuracy: 0.6700 - val_loss: 3.2302
Epoch 5/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 1s/step - accuracy: 0.5687 - loss: 1.7681 - val_accuracy: 0.8000 - val_loss: 2.6190
Epoch 6/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 1s/step - accuracy: 0.6893 - loss: 0.7874 - val_accuracy: 0.8250 - val_loss: 1.0574
Epoch 7/20
[1m10

<keras.src.callbacks.history.History at 0x30666d820>

In [37]:
#Evaluating the model

model.evaluate(test_images, test_labels)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 3s/step - accuracy: 0.5839 - loss: 1.2160


[1.2566773891448975, 0.574999988079071]

In [38]:
predictions=model.predict(test_images)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 2s/step


In [39]:
#Extracting class labels
y_true=test_labels[:,4]
y_prediction=np.argmax(predictions[:,4:],axis=1)#extracting predicted class labels.


In [40]:
precision,recall,f1,support=precision_recall_fscore_support(y_true,y_prediction,average=None)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [41]:
for i, class_id in enumerate(np.unique(y_true)):
    print(f"Class {class_id} - Precision: {precision[i]:.4f}, Recall: {recall[i]:.4f}, F1 Score: {f1[i]:.4f}")

Class 1.0 - Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Class 2.0 - Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Class 3.0 - Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Class 4.0 - Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Class 5.0 - Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000


In [30]:
train_data

Unnamed: 0,frame,xmin,xmax,ymin,ymax,class_id
29,1478019956680248165.jpg,0.808036,0.861607,0.638393,0.683036,1
535,1478020210190790169.jpg,1.232143,1.272321,0.566964,0.616071,5
695,1478020223699479085.jpg,1.196429,1.281250,0.642857,0.705357,1
557,1478020211190321969.jpg,0.941964,0.959821,0.486607,0.522321,5
836,1478020232212661501.jpg,0.665179,0.705357,0.625000,0.678571,1
...,...,...,...,...,...,...
106,1478019965682301515.jpg,1.151786,1.223214,0.629464,0.674107,1
270,1478019975180844551.jpg,1.151786,1.191964,0.419643,0.486607,5
860,1478020232692272848.jpg,1.357143,1.575893,0.607143,0.763393,1
435,1478020205689678816.jpg,1.062500,1.093750,0.553571,0.607143,5


In [31]:
test_data

Unnamed: 0,frame,xmin,xmax,ymin,ymax,class_id
521,1478020209691270577.jpg,1.218750,1.254464,0.558036,0.607143,5
737,1478020228190773357.jpg,1.133929,1.165179,0.562500,0.602679,5
740,1478020228190773357.jpg,1.178571,1.250000,0.625000,0.683036,1
660,1478020220191836915.jpg,1.142857,1.209821,0.629464,0.683036,1
411,1478020203690068016.jpg,0.959821,0.991071,0.558036,0.607143,5
...,...,...,...,...,...,...
408,1478020203690068016.jpg,0.857143,0.928571,0.625000,0.678571,1
332,1478019982681185768.jpg,1.620536,1.691964,0.616071,0.785714,3
208,1478019973687625979.jpg,0.491071,0.517857,0.558036,0.598214,5
613,1478020214191388441.jpg,1.455357,1.486607,0.500000,0.549107,5


In [42]:
%pip install matplotlib

Collecting matplotlib
  Downloading matplotlib-3.9.1.post1-cp312-cp312-macosx_11_0_arm64.whl.metadata (11 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.2.1-cp312-cp312-macosx_11_0_arm64.whl.metadata (5.8 kB)
Collecting cycler>=0.10 (from matplotlib)
  Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.53.1-cp312-cp312-macosx_11_0_arm64.whl.metadata (162 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.6/162.6 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting kiwisolver>=1.3.1 (from matplotlib)
  Downloading kiwisolver-1.4.5-cp312-cp312-macosx_11_0_arm64.whl.metadata (6.4 kB)
Collecting pyparsing>=2.3.1 (from matplotlib)
  Using cached pyparsing-3.1.2-py3-none-any.whl.metadata (5.1 kB)
Downloading matplotlib-3.9.1.post1-cp312-cp312-macosx_11_0_arm64.whl (7.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32