This piece of code is used to filter to generate the data used for training/testing/validation. The data is filtered by the following criteria:
1. We filter on the following material classes: "plastic", "metal", "wood/paper"
2. we only consider the singular images
3. We know that each item contains 12 of the same images so we only consider the first one and will use random transformations on the remaining 11 images

In [19]:
import numpy as np
import pandas as pd
import os
import shutil
import torchvision.transforms as transforms
import PIL

In [4]:
annotations = pd.read_csv('./annotations_text.csv')
annotations

Unnamed: 0.1,Unnamed: 0,No.,Object name,Material,Stained,Surface properties
0,0,1,Christmas bear,['cloth'],['pluriform'],['']
1,1,2,Lab-keys,"['metal', 'cloth']",[''],"['composite', 'shiny']"
2,2,3,Apricot,['fruit'],['uniform'],['']
3,3,4,Round candle,"['candle', 'metal']",[''],"['composite', 'shiny']"
4,4,5,Nut,['wood'],['uniform'],['ribbed']
...,...,...,...,...,...,...
995,995,996,lotion,"['plastic', 'paper']",['pluriform'],"['shiny', 'composite']"
996,996,997,cleansing milk,"['plastic', 'paper']",['pluriform'],"['shiny', 'composite']"
997,997,998,vogue deodorant,"['metal', 'plastic']",['pluriform'],"['shiny', 'composite']"
998,998,999,ALOI recording,"['metal', 'plastic']",['uniform'],"['shiny', 'composite']"


In [32]:
#convert material string to list
def convert_to_list(x):
    x = x.replace('[','')
    x = x.replace(']','')
    x = x.replace('\'','')
    x = x.replace('\'','')
    x = x.replace(' ','')
    x = x.split(',')
    return x

annotations['Material'] = annotations['Material'].apply(convert_to_list)

Transform the duplicate images

In [10]:
source_dir = "./aloi_red4_col/png4/"
target_dir = "./Transformed/"

In [15]:
if not os.path.exists(target_dir):
    os.mkdir(target_dir)

In [18]:
#list of transformations
transform_list = [transforms.RandomHorizontalFlip(p=1), transforms.RandomVerticalFlip(p=1), 
transforms.RandomRotation(90), transforms.RandomRotation(180), 
transforms.RandomAffine(0, shear=10, scale=(0.8,1.2)), transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 2.0)),
transforms.RandomAutocontrast(p=1), transforms.RandomEqualize(p=1), transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),
transforms.RandomPerspective(distortion_scale=0.5, p=1, interpolation=3), transforms.RandomVerticalFlip(p=1)]
len(transform_list)
    
    

11

In [20]:
for folder in os.listdir(source_dir):
    os.mkdir(target_dir + folder)
    count = 0
    for file in os.listdir(source_dir + folder):
        if (count > 0):
            img = PIL.Image.open(source_dir + folder + "/" + file)
            transformed_img = transform_list[count-1](img)
            transformed_img.save(target_dir + folder + "/" + file)
        else:
            shutil.copy(source_dir + folder + "/" + file, target_dir + folder + "/" + file)
        count += 1

Filter the transformed data such that only:
1. singular material classes are considered
2. "plastic", "metal", "wood/paper"
3. Ignore the other material classes

Finally create label csv files

In [38]:
source_dir = "./Transformed/"
target_dir = "./DataFiltered/"

In [39]:
if not os.path.exists(target_dir):
    os.mkdir(target_dir)

In [40]:
pd_label = pd.DataFrame(columns=['filename', 'label'])

In [41]:
for folder in os.listdir(source_dir):
    item_number = int(folder)
    material = annotations.loc[annotations['No.'] == item_number, 'Material'].iloc[0]
    if (len(material) == 1):
        material = material[0]
        if (material == 'metal'):
            for file in os.listdir(source_dir + folder):
                shutil.copy(source_dir + folder + "/" + file, target_dir + folder + "_" + file)
                pd_label = pd_label.append({'filename': folder + file, 'label': 'metal'}, ignore_index=True)
        elif (material == 'plastic'):
            for file in os.listdir(source_dir + folder):
                shutil.copy(source_dir + folder + "/" + file, target_dir + folder + "_" + file)
                pd_label = pd_label.append({'filename': folder + file, 'label': 'plastic'}, ignore_index=True)
        elif (material == 'wood' or material == 'paper'):
            for file in os.listdir(source_dir + folder):
                shutil.copy(source_dir + folder + "/" + file, target_dir + folder + "_" + file)
                pd_label = pd_label.append({'filename': folder + file, 'label': 'wood,paper'}, ignore_index=True)

  pd_label = pd_label.append({'filename': folder + file, 'label': 'plastic'}, ignore_index=True)
  pd_label = pd_label.append({'filename': folder + file, 'label': 'plastic'}, ignore_index=True)
  pd_label = pd_label.append({'filename': folder + file, 'label': 'plastic'}, ignore_index=True)
  pd_label = pd_label.append({'filename': folder + file, 'label': 'plastic'}, ignore_index=True)
  pd_label = pd_label.append({'filename': folder + file, 'label': 'plastic'}, ignore_index=True)
  pd_label = pd_label.append({'filename': folder + file, 'label': 'plastic'}, ignore_index=True)
  pd_label = pd_label.append({'filename': folder + file, 'label': 'plastic'}, ignore_index=True)
  pd_label = pd_label.append({'filename': folder + file, 'label': 'plastic'}, ignore_index=True)
  pd_label = pd_label.append({'filename': folder + file, 'label': 'plastic'}, ignore_index=True)
  pd_label = pd_label.append({'filename': folder + file, 'label': 'plastic'}, ignore_index=True)
  pd_label = pd_label.append({

In [43]:
pd_label

Unnamed: 0,filename,label
0,100100_i110.png,plastic
1,100100_i120.png,plastic
2,100100_i130.png,plastic
3,100100_i140.png,plastic
4,100100_i150.png,plastic
...,...,...
5959,994994_i180.png,"wood,paper"
5960,994994_i190.png,"wood,paper"
5961,994994_i210.png,"wood,paper"
5962,994994_i230.png,"wood,paper"


In [None]:
pd_label.to_csv('labels.csv', index=False)