In [1]:
from kaggle.api.kaggle_api_extended import KaggleApi
import zipfile
import os

api = KaggleApi()
api.authenticate()

# Descargar el dataset
dataset = 'jutrera/stanford-car-dataset-by-classes-folder'
download_path = 'data/stanford_cars'
os.makedirs(download_path, exist_ok=True)

print("Descargando dataset...")
api.dataset_download_files(dataset, path=download_path, unzip=False)

# Descomprimir
zip_path = os.path.join(download_path, 'stanford-car-dataset-by-classes-folder.zip')
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(download_path)

print("¡Dataset descargado y descomprimido!")


Descargando dataset...
Dataset URL: https://www.kaggle.com/datasets/jutrera/stanford-car-dataset-by-classes-folder
¡Dataset descargado y descomprimido!


In [2]:
import os
import shutil

# Configuration
base_dir = 'data/stanford_cars/car_data/car_data'
train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir, 'test')
offset = 8144  # Rename offset

# Iterate through all subfolders in the test directory
for subfolder in os.listdir(test_dir):
    test_subfolder_path = os.path.join(test_dir, subfolder)
    train_subfolder_path = os.path.join(train_dir, subfolder)

    # Ensure the corresponding train subfolder exists
    os.makedirs(train_subfolder_path, exist_ok=True)

    # Process all image files in the current test subfolder
    for filename in os.listdir(test_subfolder_path):
        if filename.endswith('.jpg'):
            # Extract index, convert to int, apply offset
            index_str = os.path.splitext(filename)[0]
            new_index = int(index_str) + offset
            new_filename = f"{new_index:05d}.jpg"

            # Define full paths
            src_path = os.path.join(test_subfolder_path, filename)
            dst_path = os.path.join(train_subfolder_path, new_filename)

            # Move and rename file
            shutil.move(src_path, dst_path)

# Optional: remove the now-empty test folder
# shutil.rmtree(test_dir)

print("All test images have been renamed and moved to the train set.")


All test images have been renamed and moved to the train set.


In [5]:
import csv

# Configuration
offset = 8144
anno_train_path = 'data/stanford_cars/anno_train.csv'
anno_test_path = 'data/stanford_cars/anno_test.csv'

# Read existing anno_train.csv
with open(anno_train_path, 'r', newline='') as f:
    train_rows = list(csv.reader(f))

# Read, modify, and collect rows from anno_test.csv
modified_test_rows = []
with open(anno_test_path, 'r', newline='') as f:
    reader = csv.reader(f)
    for row in reader:
        filename = row[0]
        index = int(os.path.splitext(filename)[0])
        new_filename = f"{index + offset:05d}.jpg"
        new_row = [new_filename] + row[1:]
        modified_test_rows.append(new_row)

# Append modified test rows to train rows
all_rows = train_rows + modified_test_rows

# Write the updated annotations back to anno_train.csv
with open(anno_train_path, 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(all_rows)

print("Test annotations have been modified and merged into anno_train.csv.")


Test annotations have been modified and merged into anno_train.csv.


In [6]:
import os
import shutil

# Paths
train_dir = os.path.join(base_dir, 'train')
all_data_dir = os.path.join(base_dir, 'all_data')

# Create the all_data directory if it doesn't exist
os.makedirs(all_data_dir, exist_ok=True)

# Walk through all subfolders in train
for subfolder in os.listdir(train_dir):
    subfolder_path = os.path.join(train_dir, subfolder)
    
    # Only proceed if it's a directory
    if os.path.isdir(subfolder_path):
        for filename in os.listdir(subfolder_path):
            if filename.endswith('.jpg'):
                src = os.path.join(subfolder_path, filename)
                dst = os.path.join(all_data_dir, filename)

                # Check for filename conflict (should not happen if previous renaming was correct)
                if os.path.exists(dst):
                    print(f"Warning: file {filename} already exists in all_data. Skipping.")
                else:
                    shutil.move(src, dst)

print("All train images have been moved to 'all_data' folder.")


All train images have been moved to 'all_data' folder.


In [3]:
import os
import shutil
import random
import csv

# Configuration
base_dir = 'data/stanford_cars'
all_data_dir = os.path.join(base_dir, 'all_data')
anno_all_path = os.path.join(base_dir, 'anno_all.csv')

# Destination folders
splits = {
    'train': 0.7,
    'validation': 0.1,
    'test': 0.2,
}
split_dirs = {k: os.path.join(base_dir, k) for k in splits}

# Create folders if not exist
for split_dir in split_dirs.values():
    os.makedirs(split_dir, exist_ok=True)

# Read annotations into a dictionary
annotations = {}
with open(anno_all_path, 'r') as f:
    reader = csv.reader(f)
    for row in reader:
        filename = row[0]
        annotations[filename] = row

# Get and shuffle image list
all_images = list(annotations.keys())
random.shuffle(all_images)

# Calculate split sizes
n = len(all_images)
n_train = int(splits['train'] * n)
n_val = int(splits['validation'] * n)
# The rest goes to test
n_test = n - n_train - n_val

# Partition images
train_images = all_images[:n_train]
val_images = all_images[n_train:n_train + n_val]
test_images = all_images[n_train + n_val:]

split_map = {
    'train': train_images,
    'validation': val_images,
    'test': test_images,
}

# Move images and write CSVs
for split_name, image_list in split_map.items():
    out_dir = split_dirs[split_name]
    anno_path = os.path.join(base_dir, f'anno_{split_name}.csv')

    with open(anno_path, 'w', newline='') as f:
        writer = csv.writer(f)

        for img_name in image_list:
            src_img = os.path.join(all_data_dir, img_name)
            dst_img = os.path.join(out_dir, img_name)

            # Move image
            shutil.move(src_img, dst_img)

            # Write corresponding annotation
            writer.writerow(annotations[img_name])

print("✅ Dataset split into train, validation, and test with corresponding annotations.")


✅ Dataset split into train, validation, and test with corresponding annotations.
