This file will implement the dataloder for the headlights


# This one generates the .json file that lists all the image names and their corresponding make and model, as well as which subfolder they are from

In [11]:
COMPCAR_DATASET = "E:\compcars\sv_data_fromZIP\sv_data\image"
MAKE_MODEL_FILE = "E:\compcars\sv_data_fromZIP\sv_data\sv_make_model_name.mat"
JSON_OUTPUT = 'index_of_image_name_to_make_model.json'

import scipy.io
import os
import json

# getting the make and model whatever
ann_labels = scipy.io.loadmat(MAKE_MODEL_FILE)
ann_labels = ann_labels['sv_make_model_name']

# looping through the images folder

# Define the root folder
root_folder = COMPCAR_DATASET

# Initialize an empty list to store file information
file_info_list = []

# Walk through the directory tree
for root, dirs, files in os.walk(root_folder):
    for file_name in files:
        # Get the full path of the file
        file_path = os.path.join(root, file_name)

        # Get the relative subfolder path (relative to the root folder)
        relative_subfolder = os.path.relpath(root, root_folder)

        # Add the file name and relative subfolder path to the list
        # also get the make and model of it as well
        make_model_temp = int(relative_subfolder) - 1
        make_model_temp = ann_labels[make_model_temp]
        label = []
        for i in make_model_temp:
            label.append(str(i[0]))
        file_info_list.append({
            'file_name': file_name,
            'subfolder_path': relative_subfolder,
            'make_model': label
        })

# now shove all of this into a folder,
list_of_json = [json.dumps(item) for item in file_info_list]
with open(JSON_OUTPUT, 'w') as json_file:
    json.dump(list_of_json, json_file)

In [15]:
# loading it back into memory as a list of dictionaries
import json

JSON_OUTPUT = 'index_of_image_name_to_make_model.json'

# loading json file into a list of dicionary
with open(JSON_OUTPUT, 'r') as json_file:
    loaded_json_strings = json.load(json_file)

loaded_list_of_dicts = [json.loads(json_str) for json_str in loaded_json_strings]
del loaded_json_strings     # just to clear it from memory

# This will be the dataloader itself.


In [None]:
import torch
from torch.utils.data import DataLoader, Dataset

class HeadlightsCustomDataset(Dataset):
    '''
    this will be the headlight dataset
    returns:
        headlight image, of c, w, h
        headlight labels, no need make and model, just subfolder number is enough
    '''
    def __init__(self, annotations_dir, img_dir, transform=None, target_transform=None, split=None):
        import os
        import scipy.io
        self.annotations_dir = annotations_dir
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

        # count number of annotations there are, and then internalize it
        if split is None:
            # this means that there is no splitting of whatever, take up everything in the image dir
            # this section would get the total number of images there are, all in the format of subfolder/image.jpg
            self.ann_files = []
            for root, _, files in os.walk(self.img_dir):
                for file in files:
                    relative_path = os.path.relpath(root, self.img_dir)
                    file_path = os.path.join(relative_path, file)
                    self.ann_files.append(file_path)
        else:
            # this one should just read the split file i guess
            self.ann_files = split

        # get the annotation names
        self.ann_labels = scipy.io.loadmat(self.annotations_dir)
        self.ann_labels = self.ann_labels['sv_make_model_name']         # this one should store the name as [N, 3] -> [make, model, web id]

    def __len__(self):
        return len(self.ann_files)

    def __getitem__(self, idx):
        # returns the annotations as well as the image needed
        from torchvision.io import read_image

        # this one gets the filepath of the image
        short_file_path = self.ann_files[idx]              # this one only gives the subfolders i guess
        temp_file_path = short_file_path
        full_file_path = os.path.join(self.img_dir, short_file_path)

        # figure out a way to parse out the subfolders
        folders = []                                        # this should give [subfolder, image.jpg]
        while True:
            temp_file_path, folder = os.path.split(temp_file_path)
            if folder != "":
                folders.insert(0, folder)
            else:
                if temp_file_path != "":
                    folders.insert(0, temp_file_path)
                break

        # utilize the subfolder to figure out the make and model of it. it should be folders[0]
        # matching the subclass based on it
        car_class = int(folders[0]) -1            # changing it to an integer just in case, and then making it sync up with elements
        car_class = self.ann_labels[car_class]

        # converting the car classes to tensor or list or whatever
        label = []
        for i in car_class:
            label.append(str(i[0]))

        # also adding in the name of the image as well
        label.append(folders[-1])


        # getting the image, and applying the transformations to it
        image = read_image(full_file_path)

        # transforming the image
        if self.transform:
            image = self.transform(image)

        return image, label