# FTE/BTE Experiment for food-101

In [1]:
import numpy as np
import os
import matplotlib.pyplot as plt
from multiprocessing import Pool
import seaborn as sns
from matplotlib import rcParams
rcParams.update({'figure.autolayout': True})

Generate Data

In [2]:
# Load data set
data_dir = "food-101/images/" # replace with the path name for wherever the downloaded food-101 images have been stored
foods_sorted = sorted(os.listdir(data_dir))

In [21]:
dict_x = {}
for k in range(10):
    # Initialize data_x* with the first image in the first class, then concatenate to acquire all images from the first class
    food_class = os.listdir(os.path.join(data_dir,foods_sorted[10*k]))
    data_xk = [plt.imread(os.path.join(data_dir, foods_sorted[10*k], food_class[0]))]

    for i in range(1,1000):
            data_xk = np.concatenate([data_xk, [(plt.imread(os.path.join(data_dir, foods_sorted[10*k], food_class[i])))]],axis=1)

    # Add to the initialized data_x* array until it contains all images from the 10 classes
    # Concatenating more than 10000 images per batch increases the run time by a lot
    for j in range(((k*10)+1),(10*(k+1))):
        food_class = os.listdir(os.path.join(data_dir,foods_sorted[j]))
        for i in range(0,1000):
            data_xk = np.c_([data_xk, [(plt.imread(os.path.join(data_dir, foods_sorted[j], food_class[i])))]])

    dict_x['data_x' + str(k+1)] = data_xk

ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 2, the array at index 0 has size 308 and the array at index 1 has size 512

In [None]:
# Combine individual numpy arrays for x data for each batch of 10 classes all into one big numpy array
data_x = np.concatenate([dict_x['data_x1'], dict_x['data_x2'], dict_x['data_x3']])
data_x = np.concatenate([data_x, dict_x['data_x4'], dict_x['data_x5']])
data_x = np.concatenate([data_x, dict_x['data_x6'], dict_x['data_x7']])
data_x = np.concatenate([data_x, dict_x['data_x8'], dict_x['data_x9']])
data_x = np.concatenate([data_x, dict_x['data_x10']])

In [None]:
#Create y data containing 100 class labels
data_y = np.full((1000), 0, dtype=int)
for i in range(1,100):
    data_y = np.concatenate([data_y, np.full((1000), i, dtype=int)])

In [None]:
from functions.fte_bte_food101_functions import run_fte_bte_exp

fte = []
bte = []
te = []
accuracies = []

for which_task in range(1,11):

    def run_parallel_exp(shift):

        df_list = run_fte_bte_exp(data_x, data_y, which_task, shift = shift)

        return df_list

    shifts = np.arange(0,10,1)
    acc = []

    with Pool(8) as p:
        # Paralell processing to run the experiment using a different batch for the test set each time
        acc.append(
            p.map(run_parallel_exp, shifts)
        )

    # Average forward transfer accuracies accross all permutations of testing and training batches for each task
    acc_x = []
    acc_y = []
    acc_z = []
    for z in range(which_task):
        for y in range(10):
            for x in range(10):
                acc_x.append(acc[0][x][y]['task_accuracy'][z])
            acc_y.append(np.mean(acc_x))
            acc_x = []
        acc_z.append(np.mean(acc_y))
        acc_y = []

    # Calculate and store FTE
    fte.append((1-acc_z[0])/(1-acc_z[-1]))

    # Average backward transfer accuracies accross all permutations of testing and training batches for each task
    acc_x = []
    acc_y = []
    acc_z = []
    for z in range((which_task - 1), 10):
        for y in range(10):
            for x in range(10):
                acc_x.append(acc[0][x][y]['task_accuracy'][z])
            acc_y.append(np.mean(acc_x))
            acc_x = []
        acc_z.append(np.mean(acc_y))
        acc_y = []

    # Calculate and store accuracies, BTE, and TE
    accuracies.append(acc_z)
    calc_bte = (1-acc_z[0])/([1-a for a in acc_z])
    bte.append(calc_bte)
    te.append([fte[(which_task-1)]*a for a in calc_bte])

In [None]:
from functions.fte_bte_food101_functions import run_fte_bte_exp

fte = []
bte = []
te = []
accuracies = []

for which_task in range(1,11):

    def run_parallel_exp(shift):

        df_list = run_fte_bte_exp(data_x, data_y, which_task, shift = shift)

        return df_list

    shifts = np.arange(0,10,1)
    acc = []

    with Pool(8) as p:
        # Paralell processing to run the experiment using a different batch for the test set each time
        acc.append(
            p.map(run_parallel_exp, shifts)
        )

    # Average forward transfer accuracies accross all permutations of testing and training batches for each task
    acc_x = []
    acc_y = []
    acc_z = []
    for z in range(which_task):
        for y in range(10):
            for x in range(10):
                acc_x.append(acc[0][x][y]['task_accuracy'][z])
            acc_y.append(np.mean(acc_x))
            acc_x = []
        acc_z.append(np.mean(acc_y))
        acc_y = []

    # Calculate and store FTE
    fte.append((1-acc_z[0])/(1-acc_z[-1]))

    # Average backward transfer accuracies accross all permutations of testing and training batches for each task
    acc_x = []
    acc_y = []
    acc_z = []
    for z in range((which_task - 1), 10):
        for y in range(10):
            for x in range(10):
                acc_x.append(acc[0][x][y]['task_accuracy'][z])
            acc_y.append(np.mean(acc_x))
            acc_x = []
        acc_z.append(np.mean(acc_y))
        acc_y = []

    # Calculate and store accuracies, BTE, and TE
    accuracies.append(acc_z)
    calc_bte = (1-acc_z[0])/([1-a for a in acc_z])
    bte.append(calc_bte)
    te.append([fte[(which_task-1)]*a for a in calc_bte])