# Vehicle Detection Solution 
## Create parent folder for custom Model training and child folders to store data 

In [1]:
import shutil
# shutil.unpack_archive("Images.zip")

In [2]:
import pandas as pd
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers




## Prepare the dataset for Model Training

In [3]:
#### Step 2: Load and preprocess the data 
# Load the labels form labels.csv
labels_df = pd.read_csv("../../Datasets/Capstone_1/Part1/labels.csv", sep=',', header=None)
labels_df.columns = ['image_id', 'class', 'x_min', 'y_min', 'x_max', 'y_max']

In [4]:
labels_df.head()

Unnamed: 0,image_id,class,x_min,y_min,x_max,y_max
0,0,pickup_truck,213,34,255,50
1,0,car,194,78,273,122
2,0,car,155,27,183,35
3,0,articulated_truck,43,25,109,55
4,0,car,106,32,124,45


In [5]:
# Adjust the image IDs in the dataframe
labels_df['image_id'] = labels_df['image_id'].apply(lambda x: f"{x:08d}")

In [6]:
labels_df.tail()

Unnamed: 0,image_id,class,x_min,y_min,x_max,y_max
351544,110590,car,18,57,97,98
351545,110591,articulated_truck,2,71,690,351
351546,110592,pickup_truck,3,240,214,378
351547,110592,car,465,111,507,135
351548,110592,non-motorized_vehicle,197,187,318,269


In [7]:
# use iloc to pick the first 1000 labels
labels_df = labels_df.iloc[:1000]

In [8]:
# Load the correcponding images
image_dir = '../../Datasets/Capstone_1/Part1/Images/'
# Define the target size for all images (e.g., 224x224)
target_size = (224, 224)

In [9]:
images = []
for index, row in labels_df.iterrows():
    image_path = os.path.join(image_dir, f"{row['image_id']}.jpg")
    img = cv2.imread(image_path)
    if img is not None:
        # Ensure the image is in RGB format
        if len(img.shape) == 2:  # If the image is grayscale
            img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
        elif img.shape[2] == 4:  # If the image has an alpha channel
            img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB)
        
        # Resize the image to the target size
        img_resized = cv2.resize(img, target_size)
        images.append(img_resized)
    else:
        print(f"Error Loading image: {image_path}")
processed_images = np.array(images)

In [10]:
# check if images are loaded
if len(images) == 0:
    print("No eimage loaded. Please check the image paths.")
else:
    print(f"{len(images)} images loaded successfully.")

1000 images loaded successfully.


In [11]:
# Analyze the distribution of vehicle types in the liminted dataset
vehicle_types = labels_df['class'].value_counts()
vehicle_types

car                      682
pickup_truck             111
motorized_vehicle         61
articulated_truck         30
work_van                  29
bus                       28
pedestrian                23
single_unit_truck         18
bicycle                   12
non-motorized_vehicle      5
motorcycle                 1
Name: class, dtype: int64

In [12]:
# Address the quality issues arising from the discrepancy lables and actual image filenames
# Sort the image filenames
labels_df = labels_df.sort_values('image_id')

In [13]:
labels = labels_df['class'].to_numpy()
bounding_boxes = labels_df[['x_min', 'y_min', 'x_max', 'y_max']].to_numpy()

In [14]:
# Convert labels to one-hot enconding 
unique_labels = np.unique(labels)
label_to_index = {label: index for index, label in enumerate(unique_labels)}
index_to_label = {index: label for index, label in enumerate(unique_labels)}
labels = np.array([label_to_index[label] for label in labels])

In [15]:
X_train, X_test, y_train, y_test, bbox_train, bbox_test = train_test_split(processed_images, labels, bounding_boxes, test_size=0.2, random_state=42)

## Create an CNN architecture for Object detection 