In [None]:
!pip install torch
!pip install torchvision
!pip install sklearn
!pip install tqdm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [90]:
import os
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import shutil

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split

import torchvision
import torchvision.transforms as transforms

from sklearn.metrics import confusion_matrix

In [91]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [92]:
root_path = "/content/drive/MyDrive/NUS/CS4243/CS4243_mini_project"
cleaned_image_split_path = os.path.join(root_path, "image_data_cleaned_split")
cleaned_spectrogram_split_path = os.path.join(root_path, "spectrogram_data_split")
cleaned_image_path = os.path.join(root_path, "image_data_cleaned")
spectrogram_path = os.path.join(root_path, "spectrogram_data")
image_without_audio_path = os.path.join(root_path, "image_without_audio")

In [93]:
# Remove existing split
if os.path.exists(cleaned_image_split_path):
    shutil.rmtree(cleaned_image_split_path)
if os.path.exists(cleaned_spectrogram_split_path):
    shutil.rmtree(cleaned_spectrogram_split_path)
if os.path.exists(image_without_audio_path):
    shutil.rmtree(image_without_audio_path)

In [94]:
dataset_types = ["train", "validation", "test"]
class_labels = ["carrying", "normal", "threat"]

input_size = (299, 299)
# train, validation, test
data_split = [0.8, 0.1, 0.1]

# Image transformations
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomRotation(20),
    transforms.Resize(input_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load data
dataset = torchvision.datasets.ImageFolder(spectrogram_path, transform)
n_data = len(dataset)
n_train = int(n_data * data_split[0])
n_validation = int(n_data * data_split[1])
n_test = n_data - n_train - n_validation
datasets = random_split(dataset, (n_train, n_validation, n_test))
print(f'Audio spectrogram data are split into {n_train} for training, {n_validation} for validation, {n_test} for testing')

Audio spectrogram data are split into 1288 for training, 161 for validation, 161 for testing


In [95]:
imgs_with_audio_set = set()

for idx, ds in enumerate(datasets):
    dataset_type = dataset_types[idx]
    
    print(f'\nSplitting {len(ds)} {dataset_type} samples.')
    for sample_idx in tqdm(ds.indices):
        path, label_idx = ds.dataset.samples[sample_idx]
        filename = path.split('/')[-1]
        img_name = filename.split('_audio_spec')[0]

        matching_cleaned_img_folder = os.path.join(cleaned_image_path, class_labels[label_idx])
        matching_imgs = list(filter(lambda filename: img_name in filename, sorted(os.listdir(matching_cleaned_img_folder))))

        audio_output_path = os.path.join(cleaned_spectrogram_split_path, dataset_type, class_labels[label_idx])
        imgs_output_path = os.path.join(cleaned_image_split_path, dataset_type, class_labels[label_idx])

        Path(audio_output_path).mkdir(parents=True, exist_ok=True)
        Path(imgs_output_path).mkdir(parents=True, exist_ok=True)

        shutil.copyfile(path, os.path.join(audio_output_path, filename))
        for matching_img in matching_imgs:
            src = os.path.join(matching_cleaned_img_folder, matching_img)
            dst = os.path.join(imgs_output_path, matching_img)
            shutil.copyfile(src, dst)

            imgs_with_audio_set.add(src)


Splitting 1288 train samples.


  0%|          | 0/1288 [00:00<?, ?it/s]


Splitting 161 validation samples.


  0%|          | 0/161 [00:00<?, ?it/s]


Splitting 161 test samples.


  0%|          | 0/161 [00:00<?, ?it/s]

In [96]:
print(imgs_with_audio_set)
print(len(imgs_with_audio_set))

{'/content/drive/MyDrive/NUS/CS4243/CS4243_mini_project/image_data_cleaned/threat/threat3(TAN YING HUI)0.68526_30.png', '/content/drive/MyDrive/NUS/CS4243/CS4243_mini_project/image_data_cleaned/threat/0406577_20220904_threat_11120.11851_200.png', '/content/drive/MyDrive/NUS/CS4243/CS4243_mini_project/image_data_cleaned/normal/0217708_20220904_normal_73780.19201_30.png', '/content/drive/MyDrive/NUS/CS4243/CS4243_mini_project/image_data_cleaned/normal/a0206131Y_20220828_normal_82070.056004_30.png', '/content/drive/MyDrive/NUS/CS4243/CS4243_mini_project/image_data_cleaned/threat/0201681_20220830_threat_48840.036151_30.png', '/content/drive/MyDrive/NUS/CS4243/CS4243_mini_project/image_data_cleaned/threat/A0260551M_20220904_threat_48950.083796_30.png', '/content/drive/MyDrive/NUS/CS4243/CS4243_mini_project/image_data_cleaned/normal/0034082_20220209_normal_57830.53922_200.png', '/content/drive/MyDrive/NUS/CS4243/CS4243_mini_project/image_data_cleaned/carrying/A0194554W_20220904_carrying_1235

In [97]:
# Get images without audio
for label in class_labels:
    path = os.path.join(cleaned_image_path, label)
    output_path = os.path.join(image_without_audio_path, label)

    for filename in sorted(os.listdir(path)):
        src_full_path = os.path.join(path, filename)
        if src_full_path not in imgs_with_audio_set:
            
            # Mkdir inside to avoid creating empty dir (e.g. all threat images have corresponding sound)
            Path(output_path).mkdir(parents=True, exist_ok=True)

            dst = os.path.join(output_path, filename)
            shutil.copyfile(src_full_path, dst)

In [98]:
dataset = torchvision.datasets.ImageFolder(image_without_audio_path, transform)
n_data = len(dataset)
n_train = int(n_data * data_split[0])
n_validation = int(n_data * data_split[1])
n_test = n_data - n_train - n_validation
datasets = random_split(dataset, (n_train, n_validation, n_test))
print(f'Images without spectrogram data are split into {n_train} for training, {n_validation} for validation, {n_test} for testing')

Images without spectrogram data are split into 507 for training, 63 for validation, 64 for testing


In [99]:
for idx, ds in enumerate(datasets):
    dataset_type = dataset_types[idx]
    
    print(f'\nSplitting {len(ds)} {dataset_type} samples.')
    for sample_idx in tqdm(ds.indices):
        path, label_idx = ds.dataset.samples[sample_idx]
        filename = path.split('/')[-1]
        
        dst = os.path.join(cleaned_image_split_path, dataset_type, class_labels[label_idx], filename)
        shutil.copyfile(path, dst)


Splitting 507 train samples.


  0%|          | 0/507 [00:00<?, ?it/s]


Splitting 63 validation samples.


  0%|          | 0/63 [00:00<?, ?it/s]


Splitting 64 test samples.


  0%|          | 0/64 [00:00<?, ?it/s]