In [1]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt 

In [8]:
df = pd.read_csv("../../data/meta/final_dataset_labeled.csv", index_col=0)
df

Unnamed: 0,name,car_name,car_type,is_test
1,00002_Acura TL Sedan 2012.jpg,Acura TL Sedan 2012,Midsize,0
2,00003_Dodge Dakota Club Cab 2007.jpg,Dodge Dakota Club Cab 2007,Large,0
3,00004_Hyundai Sonata Hybrid Sedan 2012.jpg,Hyundai Sonata Hybrid Sedan 2012,Midsize,0
4,00005_Ford F-450 Super Duty Crew Cab 2012.jpg,Ford F-450 Super Duty Crew Cab 2012,Large,0
6,00007_Dodge Journey SUV 2012.jpg,Dodge Journey SUV 2012,Midsize,0
...,...,...,...,...
26212,RamCVCargoVanMinivan201294.jpeg,Ram C/V Cargo Van Minivan 2012,Large,1
26213,RamCVCargoVanMinivan201296.jpeg,Ram C/V Cargo Van Minivan 2012,Large,1
26214,RamCVCargoVanMinivan201297.jpeg,Ram C/V Cargo Van Minivan 2012,Large,1
26215,RamCVCargoVanMinivan201298.jpeg,Ram C/V Cargo Van Minivan 2012,Large,1


In [9]:
def split_train_test(data, test_ratio):
    images = data["name"].to_list()
    labels = data["car_type"].to_list()

    training_images, testing_images, training_labels, testing_labels = train_test_split(
        images, labels, test_size=test_ratio, random_state=1
    )

    sets = {
        "train": training_images,
        "test": testing_images,
    }

    return sets

In [10]:
test = split_train_test(df, 0.1)["test"]
df["is_test"] = 0
for index, data in df.iterrows(): 
    if data["name"] in test: 
        df.at[index, "is_test"] = 1

df[df["is_test"] == 1]

Unnamed: 0,name,car_name,car_type,is_test
6,00007_Dodge Journey SUV 2012.jpg,Dodge Journey SUV 2012,Midsize,1
8,00009_Mitsubishi Lancer Sedan 2012.jpg,Mitsubishi Lancer Sedan 2012,Midsize,1
29,00030_Volvo 240 Sedan 1993.jpg,Volvo 240 Sedan 1993,Midsize,1
38,00039_Fisker Karma Sedan 2012.jpg,Fisker Karma Sedan 2012,Midsize,1
44,00045_Toyota Sequoia SUV 2012.jpg,Toyota Sequoia SUV 2012,Large,1
...,...,...,...,...
26194,RamCVCargoVanMinivan201275.jpeg,Ram C/V Cargo Van Minivan 2012,Large,1
26201,RamCVCargoVanMinivan201282.jpeg,Ram C/V Cargo Van Minivan 2012,Large,1
26205,RamCVCargoVanMinivan201287.jpeg,Ram C/V Cargo Van Minivan 2012,Large,1
26209,RamCVCargoVanMinivan201290.jpeg,Ram C/V Cargo Van Minivan 2012,Large,1


In [11]:
df.to_csv("../../data/meta/final_dataset_labeled.csv")

In [12]:
labels = ["Small", "Midsize", "Large"]
folder_path = "../../data/images/"

# Create 'train' and 'test' folders
train_folder = os.path.join(folder_path, "train")
test_folder = os.path.join(folder_path, "test")
os.makedirs(train_folder, exist_ok=True)
os.makedirs(test_folder, exist_ok=True)

for label in labels:
    train_label_folder = os.path.join(train_folder, label)
    test_label_folder = os.path.join(test_folder, label)
    os.makedirs(train_label_folder, exist_ok=True)
    os.makedirs(test_label_folder, exist_ok=True)
    print(f"{label} folders were successfully created")

total = len(df)
count = 0
for index, data in df.iterrows():
    file_name = data["name"]
    label = data["car_type"]
    is_test = data["is_test"]

    if os.path.exists(os.path.join(folder_path, file_name)):
        src_path = os.path.join(folder_path, file_name)

        if is_test == 1:
            dest_path = os.path.join(test_folder, label, file_name)
        else:
            dest_path = os.path.join(train_folder, label, file_name)

        os.rename(src_path, dest_path)
        count += 1
        print(f"{count} of {total} successfully moved")
    else:
        print(f"File {file_name} not found. Deleting...")
df

Small folders were successfully created
Midsize folders were successfully created
Large folders were successfully created
1 of 18337 successfully moved
2 of 18337 successfully moved
3 of 18337 successfully moved
File 00005_Ford F-450 Super Duty Crew Cab 2012.jpg not found. Deleting...
4 of 18337 successfully moved
5 of 18337 successfully moved
6 of 18337 successfully moved
7 of 18337 successfully moved
8 of 18337 successfully moved
9 of 18337 successfully moved
10 of 18337 successfully moved
11 of 18337 successfully moved
12 of 18337 successfully moved
13 of 18337 successfully moved
14 of 18337 successfully moved
15 of 18337 successfully moved
16 of 18337 successfully moved
17 of 18337 successfully moved
18 of 18337 successfully moved
19 of 18337 successfully moved
20 of 18337 successfully moved
21 of 18337 successfully moved
22 of 18337 successfully moved
23 of 18337 successfully moved
24 of 18337 successfully moved
25 of 18337 successfully moved
26 of 18337 successfully moved
27 of 1

Unnamed: 0,name,car_name,car_type,is_test
1,00002_Acura TL Sedan 2012.jpg,Acura TL Sedan 2012,Midsize,0
2,00003_Dodge Dakota Club Cab 2007.jpg,Dodge Dakota Club Cab 2007,Large,0
3,00004_Hyundai Sonata Hybrid Sedan 2012.jpg,Hyundai Sonata Hybrid Sedan 2012,Midsize,0
6,00007_Dodge Journey SUV 2012.jpg,Dodge Journey SUV 2012,Midsize,1
7,00008_Dodge Charger Sedan 2012.jpg,Dodge Charger Sedan 2012,Midsize,0
...,...,...,...,...
26212,RamCVCargoVanMinivan201294.jpeg,Ram C/V Cargo Van Minivan 2012,Large,1
26213,RamCVCargoVanMinivan201296.jpeg,Ram C/V Cargo Van Minivan 2012,Large,0
26214,RamCVCargoVanMinivan201297.jpeg,Ram C/V Cargo Van Minivan 2012,Large,0
26215,RamCVCargoVanMinivan201298.jpeg,Ram C/V Cargo Van Minivan 2012,Large,0


In [None]:
df.to_csv("../../data/meta/final_dataset_labeled.csv")