In [None]:
import os
import pandas as pd

import numpy as np

import matplotlib.pyplot as plt
import cv2

# 1. Data Load

## Check the data list of the headgear

In [None]:
data_dir = '../data'
headgear_dir = os.path.join(data_dir, 'headgear')
print(os.listdir(headgear_dir))

In [None]:
headgear_csv = os.path.join(headgear_dir, 'headgear.csv')
print(headgear_csv)

- Load the data from the csv file

In [None]:
headgear_df = pd.read_csv(headgear_csv)

# 2. Data Analysis

## Check the data

In [None]:
print(headgear_df)

## Plot the data distribution

In [None]:
headgear_df['data set'].value_counts().plot(kind='bar')

plt.title('Data set distribution')
plt.xlabel('Data set')
plt.ylabel('Count')

for i in range(len(headgear_df['data set'].value_counts())):
    plt.text(i, headgear_df['data set'].value_counts()[i], headgear_df['data set'].value_counts()[i], ha='center', va='bottom')
    
plt.show()

In [None]:
headgear_df['data set'].value_counts()

In [None]:
headgear_dir

## Plot the data sample

In [None]:
image_path = os.path.join(headgear_dir, headgear_df['filepaths'][0])
print(image_path)

In [None]:
image = cv2.imread(image_path)
print('Image Size: ', image.shape)

plt.imshow(image)
plt.title('Image')

### Plot the data sample with Transformed Image

In [None]:
from torchvision import transforms
from PIL import Image
import torch

# Define the individual transforms
transformations = {
    "Original": None,
    "Horizontal Flip": transforms.RandomHorizontalFlip(p=1),  # p=1 to always apply the flip
    "Vertical Flip": transforms.RandomVerticalFlip(p=1),
    "Rotation": transforms.RandomRotation(30),
    "Center Crop": transforms.CenterCrop(64),
    "Blur": transforms.GaussianBlur(5),  # you can blur kernel size = {3, 5, 7} and check the image
    "To Tensor": transforms.ToTensor(),
}

# Select 3 sample images
sample_images = [headgear_df['filepaths'][i] for i in range(3)]

fig, axs = plt.subplots(len(sample_images), len(transformations), figsize=(20, 15))

for i, filepath in enumerate(sample_images):
    image_path = os.path.join(headgear_dir, filepath)
    image = Image.open(image_path).convert('RGB')  # ensure we always start with a 3 channel image

    for j, (transformation_name, transformation) in enumerate(transformations.items()):
        if transformation:
            transformed_image = transformation(image)
            if transformation_name == 'To Tensor':
                image = transformed_image  # From this point forward, work with tensor instead of PIL image
        else:
            transformed_image = image

        if isinstance(transformed_image, torch.Tensor):
            # Convert tensor to numpy array for visualization
            transformed_image = transformed_image.permute(1, 2, 0).numpy()

        axs[i, j].imshow(transformed_image)
        axs[i, j].set_title(f"{transformation_name} Image")

plt.tight_layout()
plt.show()



# 3. Split the data
- This dataset is already split into train and test set. So, we don't need to split the data.

In [None]:
train_data_df = headgear_df[headgear_df['data set'] == 'train']
valid_data_df = headgear_df[headgear_df['data set'] == 'valid']
test_data_df = headgear_df[headgear_df['data set'] == 'test']

- Image to Tensor

In [None]:
train_data = []
train_labels = []

for index, row in train_data_df.iterrows():
    image_path = os.path.join(headgear_dir, row['filepaths'])
    image = cv2.imread(image_path)
    train_data.append(image)
    train_labels.append(row['labels'])

In [None]:
print('Train Data: ', len(train_data))
print('Train Labels: ', len(train_labels))
print('Train Data Shape: ', train_data[0].shape)