In [8]:
import os

In [9]:
# prepare config yaml file for training 
root = "datasets"
with open(os.path.join(root, "logo", "List", "Logo-2K+classes.txt")) as file:
    class_names = file.read()

class_names = class_names.split("\n")
for index, item in enumerate(class_names):
    class_names[index] = f"{index}: {class_names[index]}"
    print(class_names[index])



0: 24seven
1: Ace
2: aceh
3: Admiral
4: Aetrex
5: Agio
6: Aim
7: Akubra
8: ALAN
9: Alexia
10: Allstar
11: ara
12: Artemide
13: Ashbury
14: Atlantic
15: Atlantic Airlines
16: Audar
17: Aurora
18: Avanti
19: Baba
20: BACCARAT
21: BAE
22: Baker Skateboards
23: Balance
24: Bandit
25: Barcelo
26: Basso
27: Beeline
28: Berghaus
29: Berkshire
30: Bexley
31: Black and Red
32: Black Label Skateboards
33: Bobdog
34: Bona
35: Boscov's
36: BOSS
37: Boxfresh
38: Braun
39: Bravo
40: Breguet
41: BridgePort
42: Brooke
43: Brooklyn
44: BT
45: Butler
46: Buxton
47: C&M Airways
48: Cabana
49: Calypso
50: Carhartt
51: Cartier
52: Casa Blanca
53: Casino
54: Centurion
55: CHRISTOFLE
56: Church's
57: CMC
58: Colt
59: Consort
60: Converse
61: Cresta
62: Crown
63: Crumpler
64: Crystal
65: Dansko
66: DC Shoes
67: diamond
68: Dignity
69: Domestos
70: Domino
71: Domus
72: Don Jose
73: Dor
74: Drifter
75: DUX
76: DVS Shoes
77: DY
78: Eagle Creek
79: Eastpak
80: Ecco
81: Elbe
82: Emerica
83: Emo
84: Estrella
85: Et

In [13]:
for case in ["train", "test"]:    
    with open(os.path.join(root, "logo", f"{case}_images_root.txt")) as file:
        paths = file.read().strip()

    paths = paths.split("\n")
    paths = [ f"{case}/{item}"for item in paths ]
    print(paths)
    with open(os.path.join(root, "logo", f"{case}_images_root.txt"), mode="w") as file:
        file.write("\n".join(paths))

['train/Accessories/24seven/1.jpg', 'train/Accessories/24seven/10.jpg', 'train/Accessories/24seven/11.jpg', 'train/Accessories/24seven/14.jpg', 'train/Accessories/24seven/16.jpg', 'train/Accessories/24seven/17.jpg', 'train/Accessories/24seven/18.jpg', 'train/Accessories/24seven/19.jpg', 'train/Accessories/24seven/2.jpg', 'train/Accessories/24seven/20.jpg', 'train/Accessories/24seven/21.jpg', 'train/Accessories/24seven/22.jpg', 'train/Accessories/24seven/23.jpg', 'train/Accessories/24seven/25.jpg', 'train/Accessories/24seven/26.jpg', 'train/Accessories/24seven/27.jpg', 'train/Accessories/24seven/29.jpg', 'train/Accessories/24seven/3.jpg', 'train/Accessories/24seven/30.jpg', 'train/Accessories/24seven/31.jpg', 'train/Accessories/24seven/32.jpg', 'train/Accessories/24seven/33.jpg', 'train/Accessories/24seven/34.jpg', 'train/Accessories/24seven/35.jpg', 'train/Accessories/24seven/4.jpg', 'train/Accessories/24seven/40.jpg', 'train/Accessories/24seven/41.jpg', 'train/Accessories/24seven/42.j

In [16]:
import random
import os
from collections import defaultdict

def read_entries(file_path):
    subcat_entries = defaultdict(list)
    
    with open(file_path, 'r') as file:
        for line in file:
            parts = line.strip().split('/')
            subcat = parts[-2]
            subcat_entries[subcat].append(line.strip())
    
    return subcat_entries

def sample_entries(subcat_entries, sample_percentage=20):
    sampled_entries = []
    
    for subcat, entries in subcat_entries.items():
        sample_size = max(1, int(len(entries) * sample_percentage / 100))
        sampled_entries.extend(random.sample(entries, sample_size))
    
    return sampled_entries

def write_sampled_entries(sampled_entries, output_file):
    with open(output_file, 'w') as file:
        for entry in sampled_entries:
            file.write(entry + '\n')

def main(input_file, output_file):
    subcat_entries = read_entries(input_file)
    sampled_entries = sample_entries(subcat_entries)
    write_sampled_entries(sampled_entries, output_file)

if __name__ == "__main__":
    input_file = os.path.join(root, "logo", "test_images_root.txt")
    output_file = os.path.join(root, "logo", "val_images_root.txt")
    main(input_file, output_file)

In [14]:
import os
import shutil
import yaml

def load_classes(yaml_file):
    with open(yaml_file, 'r') as file:
        data = yaml.safe_load(file)
    class_map = {name: idx - 1 for idx, name in data['names'].items()}
    return class_map

def rearrange_dataset(base_dir, output_dir, class_map):
    images_dir = os.path.join(output_dir, 'images')
    labels_dir = os.path.join(output_dir, 'labels')
    
    if not os.path.exists(images_dir):
        os.makedirs(images_dir)
    
    if not os.path.exists(labels_dir):
        os.makedirs(labels_dir)

    new_paths = {split: [] for split in ['train', 'val', 'test']}
    
    for split in ['train', 'test']:
        split_file = f"{split}_images_root.txt"
        split_path = os.path.join(base_dir, split_file)

        with open(split_path, 'r') as file:
            for line in file:
                image_path = line.strip()
                full_image_path = os.path.join(base_dir, split, image_path)
                if not os.path.exists(full_image_path):
                    print(f"File not found: {full_image_path}")
                    continue
                
                class_name = os.path.basename(os.path.dirname(image_path))
                new_image_path = os.path.join(images_dir, f"{split}_{class_name}_{os.path.basename(image_path)}")
                new_image_dir = os.path.dirname(new_image_path)
                if not os.path.exists(new_image_dir):
                    os.makedirs(new_image_dir)
                
                shutil.copy(full_image_path, new_image_path)
                generate_label_file(new_image_path, labels_dir, class_name, class_map)
                new_paths[split].append(new_image_path)

    # Handle validation images separately
    split = 'val'
    split_file = f"{split}_images_root.txt"
    split_path = os.path.join(base_dir, split_file)

    with open(split_path, 'r') as file:
        for line in file:
            image_path = line.strip()
            full_image_path = os.path.join(base_dir, 'test', image_path)
            if not os.path.exists(full_image_path):
                print(f"File not found: {full_image_path}")
                continue

            class_name = os.path.basename(os.path.dirname(image_path))
            new_image_path = os.path.join(images_dir, f"{split}_{class_name}_{os.path.basename(image_path)}")
            new_image_dir = os.path.dirname(new_image_path)
            if not os.path.exists(new_image_dir):
                os.makedirs(new_image_dir)
            
            shutil.copy(full_image_path, new_image_path)
            generate_label_file(new_image_path, labels_dir, class_name, class_map)
            new_paths[split].append(new_image_path)
    
    return new_paths

def generate_label_file(image_path, label_dir, class_name, class_map):
    class_id = class_map[class_name]
    label_path = os.path.join(label_dir, os.path.splitext(os.path.basename(image_path))[0] + '.txt')
    
    # (x_center, y_center, width, height)
    bbox = [0.5, 0.5, 1.0, 1.0] 
    
    with open(label_path, 'w') as label_file:
        label_file.write(f"{class_id} {' '.join(map(str, bbox))}\n")

def update_text_files(base_dir, output_dir, new_paths):
    for split in ['train', 'val', 'test']:
        split_file = f"{split}_images_root.txt"
        new_split_path = os.path.join(output_dir, split_file)
        
        with open(new_split_path, 'w') as new_file:
            for new_path in new_paths[split]:
                new_file.write(f"{os.path.relpath(new_path, output_dir)}\n")

def main():
    base_dir = './datasets/logo'  
    output_dir = './datasets/logo'
    yaml_file = './data/coco.yaml' 
    
    class_map = load_classes(yaml_file)
    new_paths = rearrange_dataset(base_dir, output_dir, class_map)
    update_text_files(base_dir, output_dir, new_paths)

if __name__ == "__main__":
    main()
