This piece of code is used to filter to generate the data used for training/testing/validation. The data is filtered by the following criteria:
1. We filter on the following material classes: "plastic", "metal", "wood/paper"
2. we only consider the singular images
3. We know that each item contains 12 of the same images so we only consider the first one and will use random transformations on the remaining 11 images

In [1]:
import numpy as np
import pandas as pd
import os
import shutil
import torchvision.transforms as transforms
import PIL

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
annotations = pd.read_csv('./annotations_text.csv')
annotations

Unnamed: 0.1,Unnamed: 0,No.,Object name,Material,Stained,Surface properties
0,0,1,Christmas bear,['cloth'],['pluriform'],['']
1,1,2,Lab-keys,"['metal', 'cloth']",[''],"['composite', 'shiny']"
2,2,3,Apricot,['fruit'],['uniform'],['']
3,3,4,Round candle,"['candle', 'metal']",[''],"['composite', 'shiny']"
4,4,5,Nut,['wood'],['uniform'],['ribbed']
...,...,...,...,...,...,...
995,995,996,lotion,"['plastic', 'paper']",['pluriform'],"['shiny', 'composite']"
996,996,997,cleansing milk,"['plastic', 'paper']",['pluriform'],"['shiny', 'composite']"
997,997,998,vogue deodorant,"['metal', 'plastic']",['pluriform'],"['shiny', 'composite']"
998,998,999,ALOI recording,"['metal', 'plastic']",['uniform'],"['shiny', 'composite']"


In [3]:
#convert material string to list
def convert_to_list(x):
    x = x.replace('[','')
    x = x.replace(']','')
    x = x.replace('\'','')
    x = x.replace('\'','')
    x = x.replace(' ','')
    x = x.split(',')
    return x

annotations['Material'] = annotations['Material'].apply(convert_to_list)

Transform the duplicate images

In [31]:
source_dir = "./aloi_red4_col/png4/"
target_dir = "./Transformed/"

In [32]:
#remove target directory if it exists
if os.path.exists(target_dir):
    shutil.rmtree(target_dir)

In [33]:
#make target directory
if not os.path.exists(target_dir):
    os.mkdir(target_dir)

In [34]:
for folder in os.listdir(source_dir):
    os.mkdir(target_dir + folder)
    count = 0
    for file in os.listdir(source_dir + folder):
        if (count > 0):
            img = PIL.Image.open(source_dir + folder + "/" + file)
            transformed_img = transforms.RandAugment()(img)
            transformed_img.save(target_dir + folder + "/" + file)
        else:
            shutil.copy(source_dir + folder + "/" + file, target_dir + folder + "/" + file)
        count += 1

Filter the transformed data such that only:
1. singular material classes are considered
2. "plastic", "metal", "wood/paper"
3. Ignore the other material classes

Finally create label csv files

In [4]:
source_dir = "./Transformed/"
target_dir = "./DataFilteredPlural/"

In [5]:
#delete target directory if it exists
if os.path.exists(target_dir):
    shutil.rmtree(target_dir)

In [6]:
if not os.path.exists(target_dir):
    os.mkdir(target_dir)

In [7]:
pd_label = pd.DataFrame(columns=['filename', 'item_type' ,'label'])

In [8]:
for folder in os.listdir(source_dir):
    item_number = int(folder)
    material = annotations.loc[annotations['No.'] == item_number, 'Material'].iloc[0]
    if (len(material) == 1):
        material = material[0]
        if (material == 'metal'):
            for file in os.listdir(source_dir + folder):
                shutil.copy(source_dir + folder + "/" + file, target_dir + file)
                pd_label = pd_label.append({'filename': file, 'item_type': int(folder), 'label': 'metal'}, ignore_index=True)
        elif (material == 'plastic'):
            for file in os.listdir(source_dir + folder):
                shutil.copy(source_dir + folder + "/" + file, target_dir + file)
                pd_label = pd_label.append({'filename': file,'item_type': int(folder), 'label': 'plastic'}, ignore_index=True)
        elif (material == 'wood' or material == 'paper'):
            for file in os.listdir(source_dir + folder):
                shutil.copy(source_dir + folder + "/" + file, target_dir + file)
                pd_label = pd_label.append({'filename': file,'item_type': int(folder), 'label': 'wood,paper'}, ignore_index=True)
    else:
        #we only want to keep images with which are exclusively metal, plastic or wood/paper
        if (('metal') in material and ('plastic') not in material and ('wood, paper') not in material):
            for file in os.listdir(source_dir + folder):
                shutil.copy(source_dir + folder + "/" + file, target_dir + file)
                pd_label = pd_label.append({'filename': file,'item_type': int(folder), 'label': 'metal'}, ignore_index=True)
        elif (('metal') not in material and ('plastic') in material and ('wood, paper') not in material):
            for file in os.listdir(source_dir + folder):
                shutil.copy(source_dir + folder + "/" + file, target_dir + file)
                pd_label = pd_label.append({'filename': file,'item_type': int(folder), 'label': 'plastic'}, ignore_index=True)
        elif (('metal') not in material and ('plastic') not in material and ('wood, paper') in material):
            for file in os.listdir(source_dir + folder):
                shutil.copy(source_dir + folder + "/" + file, target_dir + file)
                pd_label = pd_label.append({'filename': file,'item_type': int(folder), 'label': 'wood,paper'}, ignore_index=True)

  pd_label = pd_label.append({'filename': file,'item_type': int(folder), 'label': 'plastic'}, ignore_index=True)
  pd_label = pd_label.append({'filename': file,'item_type': int(folder), 'label': 'plastic'}, ignore_index=True)
  pd_label = pd_label.append({'filename': file,'item_type': int(folder), 'label': 'plastic'}, ignore_index=True)
  pd_label = pd_label.append({'filename': file,'item_type': int(folder), 'label': 'plastic'}, ignore_index=True)
  pd_label = pd_label.append({'filename': file,'item_type': int(folder), 'label': 'plastic'}, ignore_index=True)
  pd_label = pd_label.append({'filename': file,'item_type': int(folder), 'label': 'plastic'}, ignore_index=True)
  pd_label = pd_label.append({'filename': file,'item_type': int(folder), 'label': 'plastic'}, ignore_index=True)
  pd_label = pd_label.append({'filename': file,'item_type': int(folder), 'label': 'plastic'}, ignore_index=True)
  pd_label = pd_label.append({'filename': file,'item_type': int(folder), 'label': 'plastic'}, ig

In [9]:
pd_label

Unnamed: 0,filename,item_type,label
0,100_i110.png,100,plastic
1,100_i120.png,100,plastic
2,100_i130.png,100,plastic
3,100_i140.png,100,plastic
4,100_i150.png,100,plastic
...,...,...,...
7747,997_i180.png,997,plastic
7748,997_i190.png,997,plastic
7749,997_i210.png,997,plastic
7750,997_i230.png,997,plastic


In [11]:
pd_label.to_csv('labelsPlural.csv', index=False)