In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import zipfile
import shutil
import random
import os

In [None]:
def unzip_file(zip_file_path, extract_to_path):
    os.makedirs(extract_to_path, exist_ok=True)
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to_path)

In [None]:
unzip_file("/content/drive/MyDrive/smart_glasses/coco_subset.zip", "./coco_subset")

In [None]:
unzip_file("/content/drive/MyDrive/smart_glasses/smart_glasses/Hands_dataset_fixed_index.zip", "./Hands_dataset_fixed_index")

In [None]:
unzip_file("/content/drive/MyDrive/smart_glasses/smart_glasses/Robo_flow_dataset_fixed_index.zip", "./Robo_flow_dataset_fixed_index")

In [None]:
unzip_file("/content/drive/MyDrive/smart_glasses/smart_glasses/my_DS10(faces)_fixed_index.zip", "./my_DS10(faces)_fixed_index")

In [None]:
dataset_paths = [f"/content/Robo_flow_dataset_fixed_index/my_DS{i+1}/my_DS{i+1}" for i in range(9)]
dataset_paths

['/content/Robo_flow_dataset_fixed_index/my_DS1/my_DS1',
 '/content/Robo_flow_dataset_fixed_index/my_DS2/my_DS2',
 '/content/Robo_flow_dataset_fixed_index/my_DS3/my_DS3',
 '/content/Robo_flow_dataset_fixed_index/my_DS4/my_DS4',
 '/content/Robo_flow_dataset_fixed_index/my_DS5/my_DS5',
 '/content/Robo_flow_dataset_fixed_index/my_DS6/my_DS6',
 '/content/Robo_flow_dataset_fixed_index/my_DS7/my_DS7',
 '/content/Robo_flow_dataset_fixed_index/my_DS8/my_DS8',
 '/content/Robo_flow_dataset_fixed_index/my_DS9/my_DS9']

In [None]:
dataset_paths = dataset_paths + ['/content/coco_subset', '/content/Hands_dataset_fixed_index', ]
combined_path = '/content/combined_dataset'
train_path = '/content/combined_dataset/train'
test_path = '/content/combined_dataset/val'
train_img_path = '/content/combined_dataset/train/images'
test_img_path = '/content/combined_dataset/val/images'
train_labels_path = '/content/combined_dataset/train/labels'
test_labels_path = '/content/combined_dataset/val/labels'
split_ratio = 0.8  # For 80-20 split

# Create directories if they don't exist
os.makedirs(combined_path, exist_ok=True)
os.makedirs(train_path, exist_ok=True)
os.makedirs(test_path, exist_ok=True)
os.makedirs(train_img_path, exist_ok=True)
os.makedirs(test_img_path, exist_ok=True)
os.makedirs(train_labels_path, exist_ok=True)
os.makedirs(test_labels_path, exist_ok=True)

# Combine datasets
images = []
annotations = []

for dataset_path in dataset_paths:
    images.extend([os.path.join(dataset_path, 'images', f) for f in os.listdir(os.path.join(dataset_path, 'images'))])
    annotations.extend([os.path.join(dataset_path, 'labels', f) for f in os.listdir(os.path.join(dataset_path, 'labels'))])

# Shuffle the dataset
combined = list(zip(images, annotations))
random.shuffle(combined)
images, annotations = zip(*combined)

# Split the dataset
train_count = int(len(images) * split_ratio)

train_images = images[:train_count]
train_annotations = annotations[:train_count]

test_images = images[train_count:]
test_annotations = annotations[train_count:]

# Copy files to train and test directories
for img, ann in zip(train_images, train_annotations):
    shutil.copy(img, os.path.join(train_path, 'images'))
    shutil.copy(ann, os.path.join(train_path, 'labels'))

for img, ann in zip(test_images, test_annotations):
    shutil.copy(img, os.path.join(test_path, 'images'))
    shutil.copy(ann, os.path.join(test_path, 'labels'))

print("Datasets merged and split successfully!")


Datasets merged and split successfully!


In [None]:
len(os.listdir("/content/combined_dataset/train/labels"))

38255

In [None]:
len(os.listdir("/content/combined_dataset/train/images"))

38255

In [None]:
len(os.listdir("/content/combined_dataset/val/labels"))

17069

In [None]:
len(os.listdir("/content/combined_dataset/val/images"))

17069

In [None]:
classes_names = ["bed",
                  "desk",
                  "table",
                  "door",
                  "fridge",
                  "toilet",
                  "sofa",
                  "sink",
                  "microwave",
                  "chair",
                  "fan",
                  "closet",
                  "stairs",
                  "crosswalk",
                  "pedestrian Traffic Light (walk)",
                  "pedestrian Traffic Light (stop)",
                  "person",
                  "bicycle",
                  "car",
                  "motorcycle",
                  "bus",
                  "bench",
                  "faces",
                  "A",
                  "B",
                  "W",
                  "Y"]

In [None]:
len(classes_names)

27

In [None]:
def zip_folders(main_folder_path, output_zip_file):
    with zipfile.ZipFile(output_zip_file, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for folder_name in os.listdir(main_folder_path):
            folder_path = os.path.join(main_folder_path, folder_name)
            if os.path.isdir(folder_path):
                for root, dirs, files in os.walk(folder_path):
                    for file in files:
                        file_path = os.path.join(root, file)
                        arcname = os.path.relpath(file_path, main_folder_path)
                        zipf.write(file_path, arcname)

In [None]:
zip_size = os.path.getsize("/content/drive/MyDrive/smart_glasses/coco_subset.zip")
print(f'Zip file size: {zip_size} bytes')

Zip file size: 1287862197 bytes


In [None]:
main_folder_path = '/content/combined_dataset'
output_zip_file = 'combined_dataset.zip'
zip_folders(main_folder_path, output_zip_file)

In [None]:
zip_size = os.path.getsize("combined_dataset.zip")
print(f'Zip file size: {zip_size} bytes')

Zip file size: 3118927915 bytes


In [None]:
source_path = 'combined_dataset.zip'
destination_path = '/content/drive/MyDrive/smart_glasses/combined_dataset.zip'
shutil.move(source_path, destination_path)


'/content/drive/MyDrive/smart_glasses/combined_dataset.zip'