In [1]:
import requests
import os
import tarfile
import shutil

In [2]:
# Generic function to download a file from target url and write to a designated path
def download_file(url, path, write_type = 'w', verify = False):
    if write_type not in ['w', 'wb']:
        print('Error. write_type must be w or wb')
        
        return
    
    print(f'Retrieving response from {url}')
    r = requests.get(url, verify = verify)
    
    if r.status_code == 200:
        print(f'Response successful, writing to {path}')
        
        with open(path, write_type) as f:
            if write_type == 'w':
                f.write(r.text)
            elif write_type == 'wb':
                f.write(r.content)
        
        print(f'Successfully written to {path}')
        
        return
        
    print(f'Response unsuccessful, response code: {r.status_code}')
    
    return

In [3]:
# Create File Structures
top_path = './data'
paths = ['raw', 'preprocessed', 'vectorized']
# raw_path = './data/raw'
# pre_processed = './data/preprocessed'
# vectorized = './data/vectorized'
# paths = [raw_path, pre_processed, vectorized]
partitions = ['test', 'train']

for path in paths:
    for partition in partitions:
        full_path = os.path.join(top_path, path, partition)
        if not os.path.exists(full_path):
            os.makedirs(full_path)

In [4]:
# Download and unpack dataset
lfw_url = 'http://vis-www.cs.umass.edu/lfw/lfw.tgz'
lfw_target_path = os.path.join(top_path, paths[0], 'lfw.tgz')

download_file(lfw_url, lfw_target_path, 'wb')

file = tarfile.open(lfw_target_path)
file.extractall('/'.join(lfw_target_path.split('/')[:-1]))
file.close()

Retrieving response from http://vis-www.cs.umass.edu/lfw/lfw.tgz
Response successful, writing to ./data/raw/lfw.tgz
Successfully written to ./data/raw/lfw.tgz


In [5]:
# Download train/test split instructions

train_instructions_url = 'https://vis-www.cs.umass.edu/lfw/peopleDevTrain.txt'
test_instructions_url = 'https://vis-www.cs.umass.edu/lfw/peopleDevTest.txt'
train_instructions_path = os.path.join(top_path, paths[0], 'peopleDevTrain.txt')
test_instructions_path = os.path.join(top_path, paths[0], 'peopleDevTest.txt')

urls = [train_instructions_url, test_instructions_url]
instruction_paths = [train_instructions_path, test_instructions_path]

for i, url in enumerate(urls):
    download_file(url, instruction_paths[i], 'w')

Retrieving response from https://vis-www.cs.umass.edu/lfw/peopleDevTrain.txt
Response successful, writing to ./data/raw/peopleDevTrain.txt
Successfully written to ./data/raw/peopleDevTrain.txt
Retrieving response from https://vis-www.cs.umass.edu/lfw/peopleDevTest.txt




Response successful, writing to ./data/raw/peopleDevTest.txt
Successfully written to ./data/raw/peopleDevTest.txt


In [6]:
# Move files outside of unzipped directory into train/test
for i, instruction in enumerate(instruction_paths):
    with open(instruction, 'r') as f:
        name_list = [[int(y) if y.isdigit() else y for y in x.strip().split('\t')] for x in f.readlines()[1:]]
    for pair in name_list:
        name, amount = pair
        image_dir = os.path.join(top_path, paths[0], f'lfw/{name}')
        target_dir = os.path.join(top_path, paths[0], partitions[i])
        
        for j in range(1, amount + 1):
            image_name = f'{name}_{j:04}.jpg'
            image_path = os.path.join(image_dir, image_name)
            target_path = os.path.join(target_dir, image_name)
            shutil.move(image_path, target_path)

./data/raw/lfw/AJ_Cook/AJ_Cook_0001.jpg ./data/raw/test/AJ_Cook_0001.jpg
./data/raw/lfw/Aaron_Eckhart/Aaron_Eckhart_0001.jpg ./data/raw/test/Aaron_Eckhart_0001.jpg
./data/raw/lfw/Aaron_Patterson/Aaron_Patterson_0001.jpg ./data/raw/test/Aaron_Patterson_0001.jpg
./data/raw/lfw/Aaron_Peirsol/Aaron_Peirsol_0001.jpg ./data/raw/test/Aaron_Peirsol_0001.jpg
./data/raw/lfw/Aaron_Peirsol/Aaron_Peirsol_0002.jpg ./data/raw/test/Aaron_Peirsol_0002.jpg
./data/raw/lfw/Aaron_Peirsol/Aaron_Peirsol_0003.jpg ./data/raw/test/Aaron_Peirsol_0003.jpg
./data/raw/lfw/Aaron_Peirsol/Aaron_Peirsol_0004.jpg ./data/raw/test/Aaron_Peirsol_0004.jpg
./data/raw/lfw/Aaron_Pena/Aaron_Pena_0001.jpg ./data/raw/test/Aaron_Pena_0001.jpg
./data/raw/lfw/Aaron_Sorkin/Aaron_Sorkin_0001.jpg ./data/raw/test/Aaron_Sorkin_0001.jpg
./data/raw/lfw/Aaron_Sorkin/Aaron_Sorkin_0002.jpg ./data/raw/test/Aaron_Sorkin_0002.jpg
./data/raw/lfw/Abbas_Kiarostami/Abbas_Kiarostami_0001.jpg ./data/raw/test/Abbas_Kiarostami_0001.jpg
./data/raw/lfw/Ab

./data/raw/lfw/Bettina_Rheims/Bettina_Rheims_0001.jpg ./data/raw/test/Bettina_Rheims_0001.jpg
./data/raw/lfw/Betty_Garrison/Betty_Garrison_0001.jpg ./data/raw/test/Betty_Garrison_0001.jpg
./data/raw/lfw/Beyonce_Knowles/Beyonce_Knowles_0001.jpg ./data/raw/test/Beyonce_Knowles_0001.jpg
./data/raw/lfw/Bijan_Darvish/Bijan_Darvish_0001.jpg ./data/raw/test/Bijan_Darvish_0001.jpg
./data/raw/lfw/Bijan_Darvish/Bijan_Darvish_0002.jpg ./data/raw/test/Bijan_Darvish_0002.jpg
./data/raw/lfw/Bijan_Darvish/Bijan_Darvish_0003.jpg ./data/raw/test/Bijan_Darvish_0003.jpg
./data/raw/lfw/Bijan_Namdar_Zangeneh/Bijan_Namdar_Zangeneh_0001.jpg ./data/raw/test/Bijan_Namdar_Zangeneh_0001.jpg
./data/raw/lfw/Bijan_Namdar_Zangeneh/Bijan_Namdar_Zangeneh_0002.jpg ./data/raw/test/Bijan_Namdar_Zangeneh_0002.jpg
./data/raw/lfw/Bilal_Erdogan/Bilal_Erdogan_0001.jpg ./data/raw/test/Bilal_Erdogan_0001.jpg
./data/raw/lfw/Biljana_Plavsic/Biljana_Plavsic_0001.jpg ./data/raw/test/Biljana_Plavsic_0001.jpg
./data/raw/lfw/Biljana_P

./data/raw/lfw/Binyamin_Ben-Eliezer/Binyamin_Ben-Eliezer_0006.jpg ./data/raw/test/Binyamin_Ben-Eliezer_0006.jpg
./data/raw/lfw/Binyamin_Ben-Eliezer/Binyamin_Ben-Eliezer_0007.jpg ./data/raw/test/Binyamin_Ben-Eliezer_0007.jpg
./data/raw/lfw/Bison_Dele/Bison_Dele_0001.jpg ./data/raw/test/Bison_Dele_0001.jpg
./data/raw/lfw/Bixente_LIzarazu/Bixente_LIzarazu_0001.jpg ./data/raw/test/Bixente_LIzarazu_0001.jpg
./data/raw/lfw/Blas_Ople/Blas_Ople_0001.jpg ./data/raw/test/Blas_Ople_0001.jpg
./data/raw/lfw/Blythe_Danner/Blythe_Danner_0001.jpg ./data/raw/test/Blythe_Danner_0001.jpg
./data/raw/lfw/Blythe_Danner/Blythe_Danner_0002.jpg ./data/raw/test/Blythe_Danner_0002.jpg
./data/raw/lfw/Blythe_Hartley/Blythe_Hartley_0001.jpg ./data/raw/test/Blythe_Hartley_0001.jpg
./data/raw/lfw/Blythe_Hartley/Blythe_Hartley_0002.jpg ./data/raw/test/Blythe_Hartley_0002.jpg
./data/raw/lfw/Bo_Pelini/Bo_Pelini_0001.jpg ./data/raw/test/Bo_Pelini_0001.jpg
./data/raw/lfw/Bo_Pelini/Bo_Pelini_0002.jpg ./data/raw/test/Bo_Pel

./data/raw/lfw/Didier_Defago/Didier_Defago_0001.jpg ./data/raw/test/Didier_Defago_0001.jpg
./data/raw/lfw/Diego_Armando_Maradona/Diego_Armando_Maradona_0001.jpg ./data/raw/test/Diego_Armando_Maradona_0001.jpg
./data/raw/lfw/Diego_Colorado/Diego_Colorado_0001.jpg ./data/raw/test/Diego_Colorado_0001.jpg
./data/raw/lfw/Diego_Diego_Lerman/Diego_Diego_Lerman_0001.jpg ./data/raw/test/Diego_Diego_Lerman_0001.jpg
./data/raw/lfw/Dieter_Holzer/Dieter_Holzer_0001.jpg ./data/raw/test/Dieter_Holzer_0001.jpg
./data/raw/lfw/Dimitar_Berbatov/Dimitar_Berbatov_0001.jpg ./data/raw/test/Dimitar_Berbatov_0001.jpg
./data/raw/lfw/Dimitri_Perricos/Dimitri_Perricos_0001.jpg ./data/raw/test/Dimitri_Perricos_0001.jpg
./data/raw/lfw/Dinah_Turner/Dinah_Turner_0001.jpg ./data/raw/test/Dinah_Turner_0001.jpg
./data/raw/lfw/Dino_Risi/Dino_Risi_0001.jpg ./data/raw/test/Dino_Risi_0001.jpg
./data/raw/lfw/Dino_de_Laurentis/Dino_de_Laurentis_0001.jpg ./data/raw/test/Dino_de_Laurentis_0001.jpg
./data/raw/lfw/Dino_de_Laurent

./data/raw/lfw/Edmund_Hillary/Edmund_Hillary_0003.jpg ./data/raw/test/Edmund_Hillary_0003.jpg
./data/raw/lfw/Edmund_Stoiber/Edmund_Stoiber_0001.jpg ./data/raw/test/Edmund_Stoiber_0001.jpg
./data/raw/lfw/Edmund_Stoiber/Edmund_Stoiber_0002.jpg ./data/raw/test/Edmund_Stoiber_0002.jpg
./data/raw/lfw/Edmund_Stoiber/Edmund_Stoiber_0003.jpg ./data/raw/test/Edmund_Stoiber_0003.jpg
./data/raw/lfw/Edmund_Stoiber/Edmund_Stoiber_0004.jpg ./data/raw/test/Edmund_Stoiber_0004.jpg
./data/raw/lfw/Edmund_Stoiber/Edmund_Stoiber_0005.jpg ./data/raw/test/Edmund_Stoiber_0005.jpg
./data/raw/lfw/Edmund_Stoiber/Edmund_Stoiber_0006.jpg ./data/raw/test/Edmund_Stoiber_0006.jpg
./data/raw/lfw/Edmund_Stoiber/Edmund_Stoiber_0007.jpg ./data/raw/test/Edmund_Stoiber_0007.jpg
./data/raw/lfw/Edmund_Stoiber/Edmund_Stoiber_0008.jpg ./data/raw/test/Edmund_Stoiber_0008.jpg
./data/raw/lfw/Edmund_Stoiber/Edmund_Stoiber_0009.jpg ./data/raw/test/Edmund_Stoiber_0009.jpg
./data/raw/lfw/Edmund_Stoiber/Edmund_Stoiber_0010.jpg ./data

./data/raw/lfw/George_W_Bush/George_W_Bush_0489.jpg ./data/raw/test/George_W_Bush_0489.jpg
./data/raw/lfw/George_W_Bush/George_W_Bush_0490.jpg ./data/raw/test/George_W_Bush_0490.jpg
./data/raw/lfw/George_W_Bush/George_W_Bush_0491.jpg ./data/raw/test/George_W_Bush_0491.jpg
./data/raw/lfw/George_W_Bush/George_W_Bush_0492.jpg ./data/raw/test/George_W_Bush_0492.jpg
./data/raw/lfw/George_W_Bush/George_W_Bush_0493.jpg ./data/raw/test/George_W_Bush_0493.jpg
./data/raw/lfw/George_W_Bush/George_W_Bush_0494.jpg ./data/raw/test/George_W_Bush_0494.jpg
./data/raw/lfw/George_W_Bush/George_W_Bush_0495.jpg ./data/raw/test/George_W_Bush_0495.jpg
./data/raw/lfw/George_W_Bush/George_W_Bush_0496.jpg ./data/raw/test/George_W_Bush_0496.jpg
./data/raw/lfw/George_W_Bush/George_W_Bush_0497.jpg ./data/raw/test/George_W_Bush_0497.jpg
./data/raw/lfw/George_W_Bush/George_W_Bush_0498.jpg ./data/raw/test/George_W_Bush_0498.jpg
./data/raw/lfw/George_W_Bush/George_W_Bush_0499.jpg ./data/raw/test/George_W_Bush_0499.jpg

./data/raw/lfw/Janet_Napolitano/Janet_Napolitano_0001.jpg ./data/raw/test/Janet_Napolitano_0001.jpg
./data/raw/lfw/Janet_Napolitano/Janet_Napolitano_0002.jpg ./data/raw/test/Janet_Napolitano_0002.jpg
./data/raw/lfw/Janet_Napolitano/Janet_Napolitano_0003.jpg ./data/raw/test/Janet_Napolitano_0003.jpg
./data/raw/lfw/Janet_Napolitano/Janet_Napolitano_0004.jpg ./data/raw/test/Janet_Napolitano_0004.jpg
./data/raw/lfw/Janet_Thorpe/Janet_Thorpe_0001.jpg ./data/raw/test/Janet_Thorpe_0001.jpg
./data/raw/lfw/Janet_Thorpe/Janet_Thorpe_0002.jpg ./data/raw/test/Janet_Thorpe_0002.jpg
./data/raw/lfw/Janette_Husarova/Janette_Husarova_0001.jpg ./data/raw/test/Janette_Husarova_0001.jpg
./data/raw/lfw/Janice_Abreu/Janice_Abreu_0001.jpg ./data/raw/test/Janice_Abreu_0001.jpg
./data/raw/lfw/Janine_Pietsch/Janine_Pietsch_0001.jpg ./data/raw/test/Janine_Pietsch_0001.jpg
./data/raw/lfw/Janusz_Kaminski/Janusz_Kaminski_0001.jpg ./data/raw/test/Janusz_Kaminski_0001.jpg
./data/raw/lfw/Jaouad_Gharib/Jaouad_Gharib_00

./data/raw/lfw/Jonathan_Mostow/Jonathan_Mostow_0001.jpg ./data/raw/test/Jonathan_Mostow_0001.jpg
./data/raw/lfw/Jonathan_Mostow/Jonathan_Mostow_0002.jpg ./data/raw/test/Jonathan_Mostow_0002.jpg
./data/raw/lfw/Jonathan_Schroeder/Jonathan_Schroeder_0001.jpg ./data/raw/test/Jonathan_Schroeder_0001.jpg
./data/raw/lfw/Jonathan_Tiomkin/Jonathan_Tiomkin_0001.jpg ./data/raw/test/Jonathan_Tiomkin_0001.jpg
./data/raw/lfw/Jonathan_Woodgate/Jonathan_Woodgate_0001.jpg ./data/raw/test/Jonathan_Woodgate_0001.jpg
./data/raw/lfw/Jong_Thae_Hwa/Jong_Thae_Hwa_0001.jpg ./data/raw/test/Jong_Thae_Hwa_0001.jpg
./data/raw/lfw/Jong_Thae_Hwa/Jong_Thae_Hwa_0002.jpg ./data/raw/test/Jong_Thae_Hwa_0002.jpg
./data/raw/lfw/Jong_Wook_Lee/Jong_Wook_Lee_0001.jpg ./data/raw/test/Jong_Wook_Lee_0001.jpg
./data/raw/lfw/Jong_Wook_Lee/Jong_Wook_Lee_0002.jpg ./data/raw/test/Jong_Wook_Lee_0002.jpg
./data/raw/lfw/Jong_Wook_Lee/Jong_Wook_Lee_0003.jpg ./data/raw/test/Jong_Wook_Lee_0003.jpg
./data/raw/lfw/Jong_Wook_Lee/Jong_Wook_Lee

./data/raw/lfw/Lindsay_Davenport/Lindsay_Davenport_0010.jpg ./data/raw/test/Lindsay_Davenport_0010.jpg
./data/raw/lfw/Lindsay_Davenport/Lindsay_Davenport_0011.jpg ./data/raw/test/Lindsay_Davenport_0011.jpg
./data/raw/lfw/Lindsay_Davenport/Lindsay_Davenport_0012.jpg ./data/raw/test/Lindsay_Davenport_0012.jpg
./data/raw/lfw/Lindsay_Davenport/Lindsay_Davenport_0013.jpg ./data/raw/test/Lindsay_Davenport_0013.jpg
./data/raw/lfw/Lindsay_Davenport/Lindsay_Davenport_0014.jpg ./data/raw/test/Lindsay_Davenport_0014.jpg
./data/raw/lfw/Lindsay_Davenport/Lindsay_Davenport_0015.jpg ./data/raw/test/Lindsay_Davenport_0015.jpg
./data/raw/lfw/Lindsay_Davenport/Lindsay_Davenport_0016.jpg ./data/raw/test/Lindsay_Davenport_0016.jpg
./data/raw/lfw/Lindsay_Davenport/Lindsay_Davenport_0017.jpg ./data/raw/test/Lindsay_Davenport_0017.jpg
./data/raw/lfw/Lindsay_Davenport/Lindsay_Davenport_0018.jpg ./data/raw/test/Lindsay_Davenport_0018.jpg
./data/raw/lfw/Lindsay_Davenport/Lindsay_Davenport_0019.jpg ./data/raw/te

./data/raw/lfw/Molly_Sims/Molly_Sims_0001.jpg ./data/raw/test/Molly_Sims_0001.jpg
./data/raw/lfw/Momcilo_Perisic/Momcilo_Perisic_0001.jpg ./data/raw/test/Momcilo_Perisic_0001.jpg
./data/raw/lfw/Mona_Locke/Mona_Locke_0001.jpg ./data/raw/test/Mona_Locke_0001.jpg
./data/raw/lfw/Mona_Rishmawi/Mona_Rishmawi_0001.jpg ./data/raw/test/Mona_Rishmawi_0001.jpg
./data/raw/lfw/Monica_Bellucci/Monica_Bellucci_0001.jpg ./data/raw/test/Monica_Bellucci_0001.jpg
./data/raw/lfw/Monica_Bellucci/Monica_Bellucci_0002.jpg ./data/raw/test/Monica_Bellucci_0002.jpg
./data/raw/lfw/Monica_Bellucci/Monica_Bellucci_0003.jpg ./data/raw/test/Monica_Bellucci_0003.jpg
./data/raw/lfw/Monica_Bellucci/Monica_Bellucci_0004.jpg ./data/raw/test/Monica_Bellucci_0004.jpg
./data/raw/lfw/Monica_Seles/Monica_Seles_0001.jpg ./data/raw/test/Monica_Seles_0001.jpg
./data/raw/lfw/Monica_Seles/Monica_Seles_0002.jpg ./data/raw/test/Monica_Seles_0002.jpg
./data/raw/lfw/Monica_Seles/Monica_Seles_0003.jpg ./data/raw/test/Monica_Seles_0003.

./data/raw/lfw/Richard_Gephardt/Richard_Gephardt_0001.jpg ./data/raw/test/Richard_Gephardt_0001.jpg
./data/raw/lfw/Richard_Gephardt/Richard_Gephardt_0002.jpg ./data/raw/test/Richard_Gephardt_0002.jpg
./data/raw/lfw/Richard_Gephardt/Richard_Gephardt_0003.jpg ./data/raw/test/Richard_Gephardt_0003.jpg
./data/raw/lfw/Richard_Gephardt/Richard_Gephardt_0004.jpg ./data/raw/test/Richard_Gephardt_0004.jpg
./data/raw/lfw/Richard_Gephardt/Richard_Gephardt_0005.jpg ./data/raw/test/Richard_Gephardt_0005.jpg
./data/raw/lfw/Richard_Gephardt/Richard_Gephardt_0006.jpg ./data/raw/test/Richard_Gephardt_0006.jpg
./data/raw/lfw/Richard_Gephardt/Richard_Gephardt_0007.jpg ./data/raw/test/Richard_Gephardt_0007.jpg
./data/raw/lfw/Richard_Gephardt/Richard_Gephardt_0008.jpg ./data/raw/test/Richard_Gephardt_0008.jpg
./data/raw/lfw/Richard_Gephardt/Richard_Gephardt_0009.jpg ./data/raw/test/Richard_Gephardt_0009.jpg
./data/raw/lfw/Richard_Gephardt/Richard_Gephardt_0010.jpg ./data/raw/test/Richard_Gephardt_0010.jpg


./data/raw/lfw/Taha_Yassin_Ramadan/Taha_Yassin_Ramadan_0015.jpg ./data/raw/test/Taha_Yassin_Ramadan_0015.jpg
./data/raw/lfw/Taia_Balk/Taia_Balk_0001.jpg ./data/raw/test/Taia_Balk_0001.jpg
./data/raw/lfw/Takahiro_Mori/Takahiro_Mori_0001.jpg ./data/raw/test/Takahiro_Mori_0001.jpg
./data/raw/lfw/Takaloo/Takaloo_0001.jpg ./data/raw/test/Takaloo_0001.jpg
./data/raw/lfw/Takashi_Sorimachi/Takashi_Sorimachi_0001.jpg ./data/raw/test/Takashi_Sorimachi_0001.jpg
./data/raw/lfw/Takashi_Sorimachi/Takashi_Sorimachi_0002.jpg ./data/raw/test/Takashi_Sorimachi_0002.jpg
./data/raw/lfw/Takashi_Yamamoto/Takashi_Yamamoto_0001.jpg ./data/raw/test/Takashi_Yamamoto_0001.jpg
./data/raw/lfw/Takenori_Kanzaki/Takenori_Kanzaki_0001.jpg ./data/raw/test/Takenori_Kanzaki_0001.jpg
./data/raw/lfw/Takeo_Hiranuma/Takeo_Hiranuma_0001.jpg ./data/raw/test/Takeo_Hiranuma_0001.jpg
./data/raw/lfw/Takeshi_Kitano/Takeshi_Kitano_0001.jpg ./data/raw/test/Takeshi_Kitano_0001.jpg
./data/raw/lfw/Taku_Yamasaki/Taku_Yamasaki_0001.jpg ./

./data/raw/lfw/Adel_Al-Jubeir/Adel_Al-Jubeir_0001.jpg ./data/raw/train/Adel_Al-Jubeir_0001.jpg
./data/raw/lfw/Adel_Al-Jubeir/Adel_Al-Jubeir_0002.jpg ./data/raw/train/Adel_Al-Jubeir_0002.jpg
./data/raw/lfw/Adel_Al-Jubeir/Adel_Al-Jubeir_0003.jpg ./data/raw/train/Adel_Al-Jubeir_0003.jpg
./data/raw/lfw/Adrian_Annus/Adrian_Annus_0001.jpg ./data/raw/train/Adrian_Annus_0001.jpg
./data/raw/lfw/Adrian_Murrell/Adrian_Murrell_0001.jpg ./data/raw/train/Adrian_Murrell_0001.jpg
./data/raw/lfw/Adriana_Lima/Adriana_Lima_0001.jpg ./data/raw/train/Adriana_Lima_0001.jpg
./data/raw/lfw/Adriana_Perez_Navarro/Adriana_Perez_Navarro_0001.jpg ./data/raw/train/Adriana_Perez_Navarro_0001.jpg
./data/raw/lfw/Adrien_Brody/Adrien_Brody_0001.jpg ./data/raw/train/Adrien_Brody_0001.jpg
./data/raw/lfw/Adrien_Brody/Adrien_Brody_0002.jpg ./data/raw/train/Adrien_Brody_0002.jpg
./data/raw/lfw/Adrien_Brody/Adrien_Brody_0003.jpg ./data/raw/train/Adrien_Brody_0003.jpg
./data/raw/lfw/Adrien_Brody/Adrien_Brody_0004.jpg ./data/ra

./data/raw/lfw/Elsa_Zylberstein/Elsa_Zylberstein_0001.jpg ./data/raw/train/Elsa_Zylberstein_0001.jpg
./data/raw/lfw/Elsa_Zylberstein/Elsa_Zylberstein_0002.jpg ./data/raw/train/Elsa_Zylberstein_0002.jpg
./data/raw/lfw/Elsa_Zylberstein/Elsa_Zylberstein_0003.jpg ./data/raw/train/Elsa_Zylberstein_0003.jpg
./data/raw/lfw/Elsa_Zylberstein/Elsa_Zylberstein_0004.jpg ./data/raw/train/Elsa_Zylberstein_0004.jpg
./data/raw/lfw/Elsa_Zylberstein/Elsa_Zylberstein_0005.jpg ./data/raw/train/Elsa_Zylberstein_0005.jpg
./data/raw/lfw/Elsa_Zylberstein/Elsa_Zylberstein_0006.jpg ./data/raw/train/Elsa_Zylberstein_0006.jpg
./data/raw/lfw/Elvis_Costello/Elvis_Costello_0001.jpg ./data/raw/train/Elvis_Costello_0001.jpg
./data/raw/lfw/Emilio_Botin/Emilio_Botin_0001.jpg ./data/raw/train/Emilio_Botin_0001.jpg
./data/raw/lfw/Emily_Stevens/Emily_Stevens_0001.jpg ./data/raw/train/Emily_Stevens_0001.jpg
./data/raw/lfw/Emma_Watson/Emma_Watson_0001.jpg ./data/raw/train/Emma_Watson_0001.jpg
./data/raw/lfw/Emma_Watson/Emma_

./data/raw/lfw/Fred_Funk/Fred_Funk_0002.jpg ./data/raw/train/Fred_Funk_0002.jpg
./data/raw/lfw/Frederick_Madden/Frederick_Madden_0001.jpg ./data/raw/train/Frederick_Madden_0001.jpg
./data/raw/lfw/Fredric_Seaman/Fredric_Seaman_0001.jpg ./data/raw/train/Fredric_Seaman_0001.jpg
./data/raw/lfw/Fruit_Chan/Fruit_Chan_0001.jpg ./data/raw/train/Fruit_Chan_0001.jpg
./data/raw/lfw/GL_Peiris/GL_Peiris_0001.jpg ./data/raw/train/GL_Peiris_0001.jpg
./data/raw/lfw/GL_Peiris/GL_Peiris_0002.jpg ./data/raw/train/GL_Peiris_0002.jpg
./data/raw/lfw/GL_Peiris/GL_Peiris_0003.jpg ./data/raw/train/GL_Peiris_0003.jpg
./data/raw/lfw/GL_Peiris/GL_Peiris_0004.jpg ./data/raw/train/GL_Peiris_0004.jpg
./data/raw/lfw/Gabrielle_Union/Gabrielle_Union_0001.jpg ./data/raw/train/Gabrielle_Union_0001.jpg
./data/raw/lfw/Gao_Qiang/Gao_Qiang_0001.jpg ./data/raw/train/Gao_Qiang_0001.jpg
./data/raw/lfw/Gao_Qiang/Gao_Qiang_0002.jpg ./data/raw/train/Gao_Qiang_0002.jpg
./data/raw/lfw/Garry_McCoy/Garry_McCoy_0001.jpg ./data/raw/trai

./data/raw/lfw/Malik_Mahmud/Malik_Mahmud_0001.jpg ./data/raw/train/Malik_Mahmud_0001.jpg
./data/raw/lfw/Mamdouh_Habib/Mamdouh_Habib_0001.jpg ./data/raw/train/Mamdouh_Habib_0001.jpg
./data/raw/lfw/Manijeh_Hekmat/Manijeh_Hekmat_0001.jpg ./data/raw/train/Manijeh_Hekmat_0001.jpg
./data/raw/lfw/Manuel_Jesus/Manuel_Jesus_0001.jpg ./data/raw/train/Manuel_Jesus_0001.jpg
./data/raw/lfw/Manuel_Pellegrini/Manuel_Pellegrini_0001.jpg ./data/raw/train/Manuel_Pellegrini_0001.jpg
./data/raw/lfw/Marc_Anthony/Marc_Anthony_0001.jpg ./data/raw/train/Marc_Anthony_0001.jpg
./data/raw/lfw/Marc_Racicot/Marc_Racicot_0001.jpg ./data/raw/train/Marc_Racicot_0001.jpg
./data/raw/lfw/Marcella_Anderson/Marcella_Anderson_0001.jpg ./data/raw/train/Marcella_Anderson_0001.jpg
./data/raw/lfw/Marcelo_Rios/Marcelo_Rios_0001.jpg ./data/raw/train/Marcelo_Rios_0001.jpg
./data/raw/lfw/Marcelo_Rios/Marcelo_Rios_0002.jpg ./data/raw/train/Marcelo_Rios_0002.jpg
./data/raw/lfw/Marcelo_Rios/Marcelo_Rios_0003.jpg ./data/raw/train/Marc

./data/raw/lfw/Steffi_Graf/Steffi_Graf_0003.jpg ./data/raw/train/Steffi_Graf_0003.jpg
./data/raw/lfw/Steffi_Graf/Steffi_Graf_0004.jpg ./data/raw/train/Steffi_Graf_0004.jpg
./data/raw/lfw/Steffi_Graf/Steffi_Graf_0005.jpg ./data/raw/train/Steffi_Graf_0005.jpg
./data/raw/lfw/Stellan_Skarsgard/Stellan_Skarsgard_0001.jpg ./data/raw/train/Stellan_Skarsgard_0001.jpg
./data/raw/lfw/Stellan_Skarsgard/Stellan_Skarsgard_0002.jpg ./data/raw/train/Stellan_Skarsgard_0002.jpg
./data/raw/lfw/Stepan_Demirchian/Stepan_Demirchian_0001.jpg ./data/raw/train/Stepan_Demirchian_0001.jpg
./data/raw/lfw/Stephan_Eberharter/Stephan_Eberharter_0001.jpg ./data/raw/train/Stephan_Eberharter_0001.jpg
./data/raw/lfw/Stephane_Delajoux/Stephane_Delajoux_0001.jpg ./data/raw/train/Stephane_Delajoux_0001.jpg
./data/raw/lfw/Stephanie_Cohen_Aloro/Stephanie_Cohen_Aloro_0001.jpg ./data/raw/train/Stephanie_Cohen_Aloro_0001.jpg
./data/raw/lfw/Stephen_Crampton/Stephen_Crampton_0001.jpg ./data/raw/train/Stephen_Crampton_0001.jpg
./

In [7]:
# Remove empty LFW directory and .tgz file
if os.path.exists('./data/raw/lfw'):
    for directory in os.listdir('./data/raw/lfw'):
        os.rmdir(os.path.join('./data/raw/lfw', directory))
    os.rmdir('./data/raw/lfw')
    
if os.path.exists('./data/raw/lfw.tgz'):
    os.remove('./data/raw/lfw.tgz')