*David Schlangen, 2019-03-20*

# All Preprocessed Data Frames

Showcases the results of the preprocessing.

In [1]:
from __future__ import division
import json
import configparser
import sys
import os
from glob import glob

import pandas as pd
from IPython.display import display

In [3]:
# load config file, set up paths, make project-specific imports
config_path = '../Config/default.cfg'
assert config_path is not None, 'You need to specify the path to the config file via environment variable VISCONF.'        

config = configparser.ConfigParser()
with open(config_path, 'r', encoding='utf-8') as f:
    config.read_file(f)

corpora_base = config.get('DEFAULT', 'corpora_base')
preproc_path = config.get('DSGV-PATHS', 'preproc_path')

dsgv_home = config.get('DSGV-PATHS', 'dsgv_home')
sys.path.append(dsgv_home + '/Utils')
from utils import icorpus_code, plot_labelled_bb, get_image_filename

In [4]:
# display some lines from each DataFrame found in the preprocessing directory

overview_df = []

for this_df_path in glob(os.path.join(preproc_path, '*.json.gz')):
    this_df = os.path.basename(this_df_path).split('.')[0]
    
    df_loaded = pd.read_json(this_df_path, typ='frame', orient='split', compression='gzip')
    
    print('-' * 20, this_df, '-' * 20)
    print('')
    print('n rows: {:,}'.format(len(df_loaded)))
    overview_df.append((this_df, len(df_loaded)))
    display(df_loaded.head(5))
    print('')

print('=' * 20, 'Summary', '=' * 20)
display(pd.DataFrame(overview_df, columns='name rows'.split()))

-------------------- saiapr_bbdf --------------------

n rows: 99,534


Unnamed: 0,i_corpus,image_id,region_id,bb,cat
0,0,112,1,"[76, 50, 371, 308]",120
1,0,112,2,"[0, 260, 89, 96]",204
2,0,112,3,"[390, 206, 89, 151]",204
3,0,112,4,"[0, 0, 156, 191]",29
4,0,112,5,"[374, 0, 104, 165]",29



-------------------- saiapr_refdf --------------------

n rows: 120,081


Unnamed: 0,i_corpus,image_id,region_id,r_corpus,rex_id,refexp,tagged
0,0,8756,2,referit,0,sunray at very top,"[[sunray, NN], [at, IN], [very, RB], [top, JJ]]"
1,0,21905,1,referit,1,sky,"[[sky, NN]]"
2,0,14576,1,referit,2,seal,"[[seal, NN]]"
3,0,15169,5,referit,3,the sand in the bottom right corner,"[[the, DT], [sand, NN], [in, IN], [the, DT], [..."
4,0,6263,1,referit,4,dirt path,"[[dirt, NN], [path, NN]]"



-------------------- mscoco_bbdf --------------------

n rows: 604,907


Unnamed: 0,i_corpus,image_id,region_id,bb,cat
0,1,480023,86,"[116.95, 305.86, 285.3, 266.03]",58
1,1,50518,89,"[245.54, 208.17, 40.14, 19.1]",58
2,1,142589,93,"[288.4, 18.07, 211.6, 331.33]",58
3,1,209263,113,"[126.5, 475.24, 77.68, 76.73]",58
4,1,15307,116,"[185.57, 93.4, 219.97, 420.29]",58



-------------------- refcoco_refdf --------------------

n rows: 142,210


Unnamed: 0,i_corpus,image_id,region_id,r_corpus,rex_id,refexp,tagged
0,1,581857,1719310,refcoco,0,the lady with the blue shirt,"[[the, DT], [lady, NN], [with, IN], [the, DT],..."
1,1,581857,1719310,refcoco,1,lady with back to us,"[[lady, NN], [with, IN], [back, NN], [to, TO],..."
2,1,581857,1719310,refcoco,2,blue shirt,"[[blue, JJ], [shirt, NN]]"
3,1,581857,463958,refcoco,3,woman in gray shirt facing camera on right,"[[woman, NN], [in, IN], [gray, JJ], [shirt, NN..."
4,1,581857,463958,refcoco,4,woman gray right,"[[woman, NN], [gray, NN], [right, NN]]"



-------------------- refcocoplus_refdf --------------------

n rows: 141,564


Unnamed: 0,i_corpus,image_id,region_id,r_corpus,rex_id,refexp,tagged
0,1,581857,1719310,refcoco,0,navy blue shirt,"[[navy, JJ], [blue, NN], [shirt, NN]]"
1,1,581857,1719310,refcoco,1,woman back in blue,"[[woman, NN], [back, RB], [in, IN], [blue, NN]]"
2,1,581857,1719310,refcoco,2,blue shirt,"[[blue, JJ], [shirt, NN]]"
3,1,581857,463958,refcoco,3,gray shirt wearing glasses,"[[gray, JJ], [shirt, NN], [wearing, NN], [glas..."
4,1,581857,463958,refcoco,4,lady with glasses,"[[lady, NN], [with, IN], [glasses, NNS]]"





Unnamed: 0,name,rows
0,saiapr_bbdf,99534
1,saiapr_refdf,120081
2,mscoco_bbdf,604907
3,refcoco_refdf,142210
4,refcocoplus_refdf,141564
