# Setup

In [10]:
import os
os.environ["KERAS_BACKEND"] = "tensorflow"
import keras
from keras import ops, layers
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

# Define Hyperparameters

In [11]:
INP_SIZE = (4000, 3000)
TARGET_SIZE = (150, 150)
INTERPOLATION = 'bilinear'

AUTO = tf.data.AUTOTUNE
BATCH_SIZE = 64
EPOCHS = 5

## Splitting the sample data in the directory into train and validation directory

In [1]:
import os
from sklearn.model_selection import train_test_split
import shutil

# Define the root directory of your dataset
root_directory = r'C:\Users\msi1\Videos\Computer_Vision_Solution\Autonomous_Shoe_Spotter\sample_data'

# Define the subdirectories containing your classes
class_directories = [os.path.join(root_directory, d) for d in os.listdir(root_directory) if os.path.isdir(os.path.join(root_directory, d))]

# Initialize lists to store file paths for training and validation sets
train_files = []
val_files = []

# Split each class directory into train and validation sets
for class_dir in class_directories:
    # List all files in the current class directory
    files = [os.path.join(class_dir, f) for f in os.listdir(class_dir) if os.path.isfile(os.path.join(class_dir, f))]
    
    # Split the files into train and validation sets
    train_files_class, val_files_class = train_test_split(files, test_size=0.3, random_state=42)  # Adjust test_size as needed
    
    # Append the split files to the respective lists
    train_files.extend(train_files_class)
    val_files.extend(val_files_class)

# Define directories for training and validation sets
train_directory = os.path.join(root_directory, 'training_data')
val_directory = os.path.join(root_directory, 'validation_data')

# Create train and validation directories if they don't exist
os.makedirs(train_directory, exist_ok=True)
os.makedirs(val_directory, exist_ok=True)

# Copy files to train and validation directories
for file in train_files:
    shutil.copy(file, os.path.join(train_directory, os.path.basename(file)))

for file in val_files:
    shutil.copy(file, os.path.join(val_directory, os.path.basename(file)))


# Resizing the images - Test 1

In [1]:
import cv2
import os

def resize_images(input_directory, output_directory, new_size):
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
        
    for subdir, dirs, files in os.walk(input_directory):
        for file in files:
            file_path = os.path.join(subdir, file)
            img = cv2.imread(file_path)
            img_resized = cv2.resize(img, new_size, interpolation=cv2.INTER_AREA)
            
            subdir_path = subdir.replace(input_directory, output_directory, 1)
            if not os.path.exists(subdir_path):
                os.makedirs(subdir_path)
            
            cv2.imwrite(os.path.join(subdir_path, file), img_resized)
            print(f"Resized and saved: {os.path.join(subdir_path, file)}")

input_directory = r'C:\Users\msi1\Videos\Computer_Vision_Solution\Autonomous_Shoe_Spotter\softmoc_data'
output_directory = r'C:\Users\msi1\Videos\Computer_Vision_Solution\Autonomous_Shoe_Spotter\softmoc_data_resized'
new_size = (600, 450)  # New size (width, height)

resize_images(input_directory, output_directory, new_size)


Resized and saved: C:\Users\msi1\Videos\Computer_Vision_Solution\Autonomous_Shoe_Spotter\softmoc_data_resized\Birkenstock\birk1.jpg
Resized and saved: C:\Users\msi1\Videos\Computer_Vision_Solution\Autonomous_Shoe_Spotter\softmoc_data_resized\Birkenstock\birk10.jpg
Resized and saved: C:\Users\msi1\Videos\Computer_Vision_Solution\Autonomous_Shoe_Spotter\softmoc_data_resized\Birkenstock\birk100.jpg
Resized and saved: C:\Users\msi1\Videos\Computer_Vision_Solution\Autonomous_Shoe_Spotter\softmoc_data_resized\Birkenstock\birk101.jpg
Resized and saved: C:\Users\msi1\Videos\Computer_Vision_Solution\Autonomous_Shoe_Spotter\softmoc_data_resized\Birkenstock\birk102.jpg
Resized and saved: C:\Users\msi1\Videos\Computer_Vision_Solution\Autonomous_Shoe_Spotter\softmoc_data_resized\Birkenstock\birk103.jpg
Resized and saved: C:\Users\msi1\Videos\Computer_Vision_Solution\Autonomous_Shoe_Spotter\softmoc_data_resized\Birkenstock\birk104.jpg
Resized and saved: C:\Users\msi1\Videos\Computer_Vision_Solution\

# Looking for the odd one out in the converse dataset

In [2]:
import os

def find_unmatched_files(image_dir, label_dir):
    # Get the list of image and label files (without extensions)
    image_files = {os.path.splitext(f)[0] for f in os.listdir(image_dir)}
    label_files = {os.path.splitext(f)[0] for f in os.listdir(label_dir)}

    # Find the difference between the two sets
    unmatched_images = image_files - label_files
    unmatched_labels = label_files - image_files

    if unmatched_images:
        print("Unmatched image file(s):")
        for image in unmatched_images:
            print(f"{image} in {image_dir}")

    if unmatched_labels:
        print("Unmatched label file(s):")
        for label in unmatched_labels:
            print(f"{label} in {label_dir}")

    if not unmatched_images and not unmatched_labels:
        print("All images have corresponding labels.")

# Example usage:
image_dir = r'C:\Users\msi1\Videos\Computer_Vision_Solution\Autonomous_Shoe_Spotter\annotated_data\converse\image_data'
label_dir = r'C:\Users\msi1\Videos\Computer_Vision_Solution\Autonomous_Shoe_Spotter\annotated_data\converse\label_text'

find_unmatched_files(image_dir, label_dir)


Unmatched image file(s):
a5eb765c3d464a6a9a1e5ce5d65eebca in C:\Users\msi1\Videos\Computer_Vision_Solution\Autonomous_Shoe_Spotter\annotated_data\converse\image_data


# Splitting the Annotated data into train, test and validation set

In [3]:
import os
import random
import shutil

def split_data(root_dir, output_dir, train_ratio=0.7, val_ratio=0.2, test_ratio=0.1):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Create directories for train, val, and test sets
    for split in ['train', 'val', 'test']:
        os.makedirs(os.path.join(output_dir, split), exist_ok=True)

    for brand in os.listdir(root_dir):
        image_dir = os.path.join(root_dir, brand, 'image_data')
        label_dir = os.path.join(root_dir, brand, 'label_text')

        images = sorted(os.listdir(image_dir))
        labels = sorted(os.listdir(label_dir))

        # Ensure images and labels are paired correctly
        assert len(images) == len(labels), "Number of images and labels must match."
        
        paired_files = list(zip(images, labels))
        random.shuffle(paired_files)  # Shuffle pairs together

        num_files = len(paired_files)
        
        train_cutoff = int(num_files * train_ratio)
        val_cutoff = int(num_files * (train_ratio + val_ratio))
        
        train_files = paired_files[:train_cutoff]
        val_files = paired_files[train_cutoff:val_cutoff]
        test_files = paired_files[val_cutoff:]
        
        # Copy images and corresponding labels to respective directories
        for split, split_files in zip(['train', 'val', 'test'], [train_files, val_files, test_files]):
            split_image_dir = os.path.join(output_dir, split, brand, 'image_data')
            split_label_dir = os.path.join(output_dir, split, brand, 'label_text')
            os.makedirs(split_image_dir, exist_ok=True)
            os.makedirs(split_label_dir, exist_ok=True)

            for image_file, label_file in split_files:
                shutil.copy(os.path.join(image_dir, image_file), os.path.join(split_image_dir, image_file))
                shutil.copy(os.path.join(label_dir, label_file), os.path.join(split_label_dir, label_file))

# Example usage:
root_dir = r'C:\Users\msi1\Videos\Computer_Vision_Solution\Autonomous_Shoe_Spotter\annotated_data'
output_dir = r'C:\Users\msi1\Videos\Computer_Vision_Solution\Autonomous_Shoe_Spotter\annotated_train_test_val'
split_data(root_dir, output_dir)


# Splitting the annotated data into train and test with 80:20 ratio

In [1]:
import os
import random
import shutil

def split_data(root_dir, output_dir, train_ratio=0.8, test_ratio=0.2):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Create directories for train and test sets
    for split in ['train', 'test']:
        os.makedirs(os.path.join(output_dir, split), exist_ok=True)

    for brand in os.listdir(root_dir):
        image_dir = os.path.join(root_dir, brand, 'image_data')
        label_dir = os.path.join(root_dir, brand, 'label_text')

        images = sorted(os.listdir(image_dir))
        labels = sorted(os.listdir(label_dir))

        # Ensure images and labels are paired correctly
        assert len(images) == len(labels), "Number of images and labels must match."
        
        paired_files = list(zip(images, labels))
        random.shuffle(paired_files)  # Shuffle pairs together

        num_files = len(paired_files)
        
        train_cutoff = int(num_files * train_ratio)
        
        train_files = paired_files[:train_cutoff]
        test_files = paired_files[train_cutoff:]
        
        # Copy images and corresponding labels to respective directories
        for split, split_files in zip(['train', 'test'], [train_files, test_files]):
            split_image_dir = os.path.join(output_dir, split, brand, 'image_data')
            split_label_dir = os.path.join(output_dir, split, brand, 'label_text')
            os.makedirs(split_image_dir, exist_ok=True)
            os.makedirs(split_label_dir, exist_ok=True)

            for image_file, label_file in split_files:
                shutil.copy(os.path.join(image_dir, image_file), os.path.join(split_image_dir, image_file))
                shutil.copy(os.path.join(label_dir, label_file), os.path.join(split_label_dir, label_file))

# Example usage:
root_dir = r'C:\Users\msi1\Videos\Computer_Vision_Solution\Autonomous_Shoe_Spotter\annotated_data'
output_dir = r'C:\Users\msi1\Videos\Computer_Vision_Solution\Autonomous_Shoe_Spotter\annotated_train_test'
split_data(root_dir, output_dir)
