In [1]:
import os
import csv
import pickle as pkl
import numpy as np
import pandas as pd
from imageio import imsave

### Set appropriate paths

In [2]:
LABELS_FILE = '../data/fer2013new.csv'
CSV_FILE= '../data/fer2013.csv'
SAVE_LOC = '../data/'

### Read the new labels file

In [3]:
# Read labels file, dropping nas
labels = pd.read_csv(LABELS_FILE).dropna(subset=['Image name'], axis=0)
labels['Image name'] = labels.index

# Get label based on majority vote
labels['label'] = labels.apply(lambda row: row.iloc[2:].apply(int).idxmax(),
                               axis=1)

# Drop non-emotion majority labels
labels = labels.loc[(labels['label'] != 'NF') &
                    (labels['label'] != 'unknown') ]

# Convert to a dictionary mapping file index to label
fidx_to_label = labels.apply(lambda row: (row['Image name'],
                                          row['label']), 
                            axis=1)
fidx_to_label = dict(fidx_to_label.tolist())

### Extract images from CSV and save to disk

In [4]:
with open(CSV_FILE, 'r') as csvfile:
    images_csv = csv.reader(csvfile, delimiter =',')
    headers = next(csvfile)

    for idx, row in enumerate(images_csv):
        try:
            # Get image pixels from CSV file
            _, pixel_row, usage = row
            
            # Get label from new labels file
            emotion = fidx_to_label[idx]
            
            # Convert image to array
            image = np.asarray(pixel_row.split(),
                               dtype=np.uint8).reshape(48, 48)
            image = np.repeat(image[:, :, None], 3, axis=2)

            # Save to disk
            save_dir = os.path.join(SAVE_LOC, usage, emotion)

            if not os.path.exists(save_dir):
                os.makedirs(save_dir)

            image_file = os.path.join(save_dir, str(idx) + '.png')
            imsave(image_file, image)
        
        except KeyError:
            pass
            
        if (idx+1) % 1000 == 0:
            print('Processed %i images.' % (idx+1))
       
print('Processed %i images' % (idx+1))
print('\nFinished!')

Processed 1000 images.
Processed 2000 images.
Processed 3000 images.
Processed 4000 images.
Processed 5000 images.
Processed 6000 images.
Processed 7000 images.
Processed 8000 images.
Processed 9000 images.
Processed 10000 images.
Processed 11000 images.
Processed 12000 images.
Processed 13000 images.
Processed 14000 images.
Processed 15000 images.
Processed 16000 images.
Processed 17000 images.
Processed 18000 images.
Processed 19000 images.
Processed 20000 images.
Processed 21000 images.
Processed 22000 images.
Processed 23000 images.
Processed 24000 images.
Processed 25000 images.
Processed 26000 images.
Processed 27000 images.
Processed 28000 images.
Processed 29000 images.
Processed 30000 images.
Processed 31000 images.
Processed 32000 images.
Processed 33000 images.
Processed 34000 images.
Processed 35000 images.
Processed 35887 images

Finished!


In [5]:
os.rename(f'{SAVE_LOC}Training', f'{SAVE_LOC}train')
os.rename(f'{SAVE_LOC}PublicTest', f'{SAVE_LOC}valid')
os.rename(f'{SAVE_LOC}PrivateTest', f'{SAVE_LOC}test')