# Relabelling of Annotation

The names of my jpg-files and the xml-files with the annotations contain the names of shops, products as well as additional information. This script replaces these names by the names shop1, product1, ...

## Imports

In [None]:
import xml.etree.ElementTree as ET
import glob
import os
import shutil
import pandas as pd
import numpy as np

## Paths

Relabel_train.txt contains information about my particular data set.

In [None]:
object_dict = {}

with open('relabelling/relabel_train.txt') as f:
    an_img_path = f.readline().strip()
    relative_path_of_folder_with_original_data = f.readline().strip()
    relative_path_of_folder_with_relabelled_data = f.readline().strip()
    path_of_folder_with_relabelled_data = f.readline().strip()
    name_of_folder_with_relabelled_data = f.readline().strip()
    for object_line in f:
        old_object_name, new_object_name = object_line.strip().split(' ')
        object_dict[old_object_name] = new_object_name

## Data Exploration

First, I look at a single annotation file and explore the information that it contains.

In [None]:
mytree = ET.parse(an_img_path)
myroot = mytree.getroot()

In [None]:
for el in myroot[0:7]:
    print(el.tag)

I truncate at position seven, because all the following elements are objects of the same structure.

In [None]:
for el in myroot.findall('folder'):
    print(el.text)

In [None]:
for el in myroot.findall('filename'):
    print(el.text)

In [None]:
for el in myroot.findall('path'):
    print(el.text)

In [None]:
for el in myroot.find('object'):
    print(el.tag)

In [None]:
for el in myroot.findall('object'):
    print(el.find('name').text)

## Move and Relabel XML

Here I take the xml-files from the folder with the original annotations, relabel them, and save them into a new folder.

In [None]:
# The new folder may contain xml-files from previous runs of this script, which I remove here.
for filename in os.listdir(relative_path_of_folder_with_relabelled_data):
    if filename.endswith('.xml'):
	    path_to_file = os.path.join(relative_path_of_folder_with_relabelled_data, filename)
	    os.remove(path_to_file)

shop_dict = {}
shop_nr = 1
for filename in os.listdir(relative_path_of_folder_with_original_data):
    if filename.endswith('.xml'):
        mytree = ET.parse(relative_path_of_folder_with_original_data+'/'+filename)
        myroot = mytree.getroot()       
        shop_dict[filename] = 'shop'+str(shop_nr) 
        for el in myroot.iter('folder'):
            el.text = name_of_folder_with_relabelled_data
        for el in myroot.iter('filename'):
            el.text = 'shop'+str(shop_nr)+'.jpg'
            new_filename = el.text
        for el in myroot.findall('path'):
            el.text = path_of_folder_with_relabelled_data+new_filename
        for el in myroot.findall('object'):
            label = str(el.find('name').text)
            el.find('name').text = object_dict[label]

        mytree.write(relative_path_of_folder_with_relabelled_data+'/'+str(myroot.find('filename').text)[:-4]+'.xml')
        
        shop_nr += 1

## Move and Rename JPG

First, I copy the jpg-files into the new folder. Then I rename the images to shop1, shop2, ...

In [None]:
# The new folder may contain renamed jpg-files from previous runs of this script, which I remove here.
for filename in os.listdir(relative_path_of_folder_with_relabelled_data):
    if filename.endswith('.jpg'):
	    path_to_file = os.path.join(relative_path_of_folder_with_relabelled_data, filename)
	    os.remove(path_to_file)

files = glob.iglob(os.path.join(relative_path_of_folder_with_original_data, '*.jpg'))
for file in files:
    if os.path.isfile(file):
        shutil.copy2(file, relative_path_of_folder_with_relabelled_data)

os.chdir(os.environ.get('CAPSTONE_PATH')+'/'+relative_path_of_folder_with_relabelled_data)

shop_nr = 1
for file_name in glob.glob('*.jpg'):
    new_name = shop_dict[file_name[:-4]+'.xml']+'.jpg'
    os.rename(file_name, new_name)
    shop_nr += 1

## Save Old and New Labels

First, I check if the relabelling worked correctly.

In [None]:
object_labels_df = pd.DataFrame([[key, value] for key, value in object_dict.items()], columns=['original_label', 'new_label'])
object_labels_df.head()

In [None]:
image_labels_df = pd.DataFrame([[key, value] for key, value in shop_dict.items()], columns=['original_label', 'new_label'])
image_labels_df.head()

Then I save the original and new object and image labels in npy-files.

In [None]:
os.chdir(os.environ.get('CAPSTONE_PATH'))
np.save('relabelling/object_relabelling.npy', object_labels_df, allow_pickle=True)
np.save('relabelling/train_image_relabelling.npy', image_labels_df, allow_pickle=True)