In [1]:
# import libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import shutil
import pathlib
import PIL
from PIL import Image

import torch
import torchvision
from torchvision.transforms import v2, ToTensor, Lambda
from torchvision.io import read_image
from torch.utils.data import Dataset, DataLoader, Subset

from sklearn.model_selection import train_test_split

Matplotlib created a temporary cache directory at /tmp/matplotlib-swysp0s3 because the default path (/.config/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


In [17]:
# create a labels dictionary
img_dir = os.path.join(os.getcwd(), 'instrument-images')

labels = {
    'acoustic_guitar': 0,
    'baglama': 1,
    'electric_guitar': 2,
    'harp': 3,
    'kanun': 4,
    'kemenche': 5,
    'mandolin': 6,
    'oud': 7,
    'violin': 8,
    'yayli_tambur': 9}

inverted_labels = {v: k for k, v in labels.items()}

In [3]:
# ensure all images are in 8-bit RBG format
def convert_to_rgb(img_path):
    with Image.open(img_path) as img:
        if img.mode != 'RGB':
            img = img.convert('RGB')
        img.save(img_path)

for filename in os.listdir(img_dir):
    if filename.lower().endswith((".jpg", ".jpeg", ".png", ".gif")):
        img_path = os.path.join(img_dir, filename)
        convert_to_rgb(img_path)

In [22]:
# read in image labels
image_labels = pd.read_csv(os.path.join(img_dir, 'image_labels.csv'))
print(image_labels.label.value_counts()) # confirm 100 of each label
image_labels.iloc[95:105] # check a "transition" section of the df

label
0    100
1    100
2    100
3    100
4    100
5    100
6    100
7    100
8    100
9    100
Name: count, dtype: int64


Unnamed: 0,image,label
95,acoustic_guitar_95.jpg,0
96,acoustic_guitar_96.jpg,0
97,acoustic_guitar_97.jpg,0
98,acoustic_guitar_98.jpg,0
99,acoustic_guitar_99.jpg,0
100,baglama_1.jpg,1
101,baglama_10.jpg,1
102,baglama_100.jpg,1
103,baglama_11.jpg,1
104,baglama_12.jpg,1


### Build Dataset

In [8]:
class ImgClassDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image=self.transform(image)
        if self.target_transform:
            label=self.target_transform(label)
        return image, label

In [9]:
# input image transforms

torch.manual_seed(1)

img_transform = v2.Compose([
    v2.RandomResizedCrop(size=(224, 224), antialias=True),
    v2.RandomHorizontalFlip(p=0.5),
    # v2.RandomVerticalFlip(p=0.5),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [10]:
instrument_images = ImgClassDataset(
    os.path.join(img_dir, 'image_labels.csv'),
    img_dir,
    img_transform,
    Lambda(lambda y: torch.zeros(10, dtype=torch.float).scatter_(0, torch.tensor(y), value=1))) # one hot encoded tensor output

In [27]:
# check size of a data element and print label
print(instrument_images[0][0].shape)
print(instrument_images[0][1])

torch.Size([3, 224, 224])
tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0.])


### Test/Train Splitting

In [11]:
from sklearn.model_selection import train_test_split

# get index lists for training and testing data
# data_labels = [instrument_images[i][1] for i in range(len(instrument_images))]
data_labels = image_labels.label # faster to just use this list

train, test = train_test_split(
    range(len(instrument_images)),
    test_size=0.2, 
    stratify=data_labels,
    random_state = 1)

# subset full dataset according to indices
training_data = Subset(instrument_images, train)
test_data = Subset(instrument_images, test)

### Dataloader setup

In [12]:
train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)