# Analysis of aggregated data

In [291]:
from pathlib import Path
import json
from math import pi

import pandas as pd

from bokeh.io import output_file, show, output_notebook, export_png
from bokeh.palettes import Category20c
from bokeh.plotting import figure
from bokeh.transform import cumsum

output_notebook()

In [292]:
BASE_DIR = Path.cwd().parent
SETTINGS_PATH = BASE_DIR / 'config' / 'settings.json'

def get_settings():
    if not SETTINGS_PATH.exists():
        raise Exception('Settings file not found')

    with open(str(SETTINGS_PATH)) as settings_file:
        data = json.load(settings_file)
    return data

In [293]:
SETTINGS = get_settings()

In [285]:
def print_summary(label, data, is_pie=False, is_bar=False):
    print('#################################### \n {} \n####################################'.format(label))
    total = 0
    plot_dict = {}
    for key, value in data.items():
        tot = len(value['data'])
        total += tot
        print(key, ': ', tot)
        plot_dict["{} ({})".format(key, tot)] = tot
    
    if is_pie:
        plot_pie_chart(plot_dict, label)
        
    if is_bar:
        plot_bar_chart(plot_dict, label)
    
    print('\nTotal: ', total)
    print('#################################### \n')
    return total
    
def print_total_summary(dataset, sample):
    print('Total: ', dataset, 'Percent: ', 100 * sample / dataset, " % ")
    

In [286]:
def plot_pie_chart(source, label):
    data = pd.Series(source).reset_index(name='value').rename(columns={'index':'class'})
    data['angle'] = data['value'] / data['value'].sum() * 2*pi
    data['color'] = Category20c[len(source)]

    p = figure(
        plot_height=350, title="{} Class Distribution".format(label), toolbar_location=None,
        tools="hover", tooltips="@class: @value", x_range=(-0.5, 1.0)
    )

    p.wedge(x=0, y=1, radius=0.4,
            start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
            line_color="white", fill_color='color', legend='class', source=data)

    p.axis.axis_label=None
    p.axis.visible=False
    p.grid.grid_line_color = None

    show(p)
    
    export_png(p, '{} pie.png'.format(label))
    

def plot_bar_chart(source, label):
    x = list(source.keys())
    y = list(source.values())
    
    p = figure(
        x_range=x, title="{} Class Distribution".format(label),
        toolbar_location=None, tools="", plot_width=800
    )

    p.vbar(x=x, top=y, width=0.9)
#     p.hbar(y=y, left='Time_min', right='Time_max', height=0.4, source=source)

    p.xgrid.grid_line_color = None
    p.y_range.start = 0
    p.x_range.range_padding = 0.1
    p.xaxis.major_label_orientation = 1

    show(p)
    
    export_png(p, '{} bar.png'.format(label))

## Activity Net

In [287]:
def get_activity_net():
    JSON_PATH = Path(SETTINGS['activity_net']['json'])
    if not JSON_PATH.exists():
        raise Exception('Activity Net JSON Path does not exist')
    
    with open(str(JSON_PATH)) as settings_file:
        data = json.load(settings_file)
    assert data['version'], 'VERSION 1.3'
    
    # Getting only dance taxonomies
    dance_taxonomy = list(filter(lambda x: x.get('parentName') == 'Dancing', data['taxonomy']))
    result = dict([ (taxonomy['nodeName'], {'meta': taxonomy, 'data': []}) for taxonomy in dance_taxonomy])
    
    for key, value in data['database'].items():
        for annotation in value['annotations']:
            label = annotation['label']

            if result.get(label):
                result[label]['data'].append({ **value, 'key': key, 'source': 'activity_net' })
            
                # Even if one annotation has the required key add the single item
                continue
    # Getting the stats break down
    total = print_summary('Activity Net', result, is_pie=True)
    
    print_total_summary(len(data['database'].keys()), total)
        
    return result

## Kinetics

In [288]:
def get_kinetics_individual(source):
    version = str(SETTINGS['kinetics']['default'])
    
    JSON_PATH = Path(SETTINGS['kinetics'][version]['json']) / '{}.json'.format(source)
    if not JSON_PATH.exists():
        raise Exception('Kinetics Net JSON Path does not exist')
    
    with open(str(JSON_PATH)) as settings_file:
        data = json.load(settings_file)
    
    return data

def get_kinetics_categories():
    JSON_PATH = Path(SETTINGS['kinetics']['categories'])
    if not JSON_PATH.exists():
        raise Exception('Categories does not exist')
    
    with open(str(JSON_PATH)) as settings_file:
        data = json.load(settings_file)
    
    return data


def get_kinetics_classes():
    JSON_PATH = Path(SETTINGS['kinetics']['classes'])
    if not JSON_PATH.exists():
        raise Exception('Classes does not exist')
    
    with open(str(JSON_PATH)) as settings_file:
        data = json.load(settings_file)
    
    return data

def get_kinetics_video_data():
    val_data = get_kinetics_individual('val')
    train_data = get_kinetics_individual('train')
    test_data = get_kinetics_individual('test')
    

    combined = { **val_data, **train_data, **test_data }
    assert len(combined.keys()), len(val_data.keys()) + len(train_data.keys()) + len(test_data.keys())
    
#     key = next(iter(combined.keys()))
#     print(combined[key])
    
    classes = set([value['annotations']['label'].lower() for key, value in combined.items()])
    
#     excluded = set(default_classes) - set(classes)
#     print(len(excluded))
#     print(excluded)
    
    return combined


def get_kinetics():
    result = get_kinetics_video_data()
    default_classes = get_kinetics_classes()
    categories = get_kinetics_categories()
    dance_dict = dict([(dance, { 'data': []}) for dance in categories['dancing']])
    
    for key, value in result.items():
        label = value['annotations']['label']
        if dance_dict.get(label):
            dance_dict[label]['data'].append({
                **value,
                'source': 'kinetics'
            })
    
    total = print_summary('Kinetics', dance_dict, is_bar=True)
    print_total_summary(len(result.keys()), total)
    
    return dance_dict

In [289]:
def run():
    get_activity_net()
    get_kinetics()
run()

#################################### 
 Activity Net 
####################################
Tango :  92
Cheerleading :  143
Cumbia :  86
Breakdancing :  107
Belly dance :  75



Total:  503
#################################### 

Total:  19994 Percent:  2.515754726417925  % 
#################################### 
 Kinetics 
####################################
belly dancing :  1147
breakdancing :  1051
cheerleading :  1149
country line dancing :  993
dancing ballet :  1146
dancing charleston :  713
dancing gangnam style :  665
dancing macarena :  951
jumpstyle dancing :  778
krumping :  708
marching :  990
robot dancing :  925
salsa dancing :  1088
swing dancing :  808
tango dancing :  968
tap dancing :  1035
zumba :  1074
cumbia :  716
square dancing :  704
mosh pit dancing :  637
pirouetting :  859



Total:  19105
#################################### 

Total:  480173 Percent:  3.978774316756669  % 


## UCF-101

In [304]:
def get_ucf():
    DATA_PATH = Path(SETTINGS['ucf']['data'])
    if not DATA_PATH.exists():
        raise Exception('Activity Net Data Path does not exist')
    
    SALSA_SPIN_PATH = DATA_PATH / 'SalsaSpin'
    print(len([x for x in SALSA_SPIN_PATH.iterdir()]))
    total = 0
    for dir in DATA_PATH.iterdir():
        if dir.is_dir():
            t = len([x for x in dir.iterdir()])
            total += t
            print(dir.name, t)
    print(total)
get_ucf()

133
ApplyEyeMakeup 145
ApplyLipstick 114
Archery 145
BabyCrawling 132
BalanceBeam 108
BandMarching 155
BaseballPitch 150
Basketball 134
BasketballDunk 131
BenchPress 160
Biking 134
Billiards 150
BlowDryHair 131
BlowingCandles 109
BodyWeightSquats 112
Bowling 47
SalsaSpin 133
2190
