In [None]:
## Change these variables to your desired values
img_height = 256
img_width = 256
img_channels = 1


label_names=['Human',
         'Interaction frontal',
         'Interaction lateral', 
         'Interaction vertical',
         'Crowded', 
         'Drink',
         'Curiosity', 
         'Queue',
         'Low visibility', 
         'Nothing']

path_labels = '../assets/labeled_photos/' # Where the labels are saved 
path_videos = '../assets/video_data' # Where the videos are saved 
out_path = 'assets/' # Where to save the outputs

pointer_table_path = path_labels + 'pointer_table.json'

In [None]:
# Math manipulation
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Vizualization
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline 

# Image processing
import cv2

# Utilities
import os
import random
import gc
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import img_to_array, load_img
from keras.utils import to_categorical
import codecs, json 
import re

In [None]:
with open(pointer_table_path) as json_file: pt_table = json.load(json_file)
raw = pd.DataFrame (data=pt_table)
raw.head()

In [None]:
raw['Timestamp'] = pd.to_datetime(raw[0] + raw[1]/25 + 10800, utc=True, unit='s')
raw = raw.set_index(raw.Timestamp)
raw = raw.drop ([0, 1, 'Timestamp'], axis=1)

raw = raw.set_index(raw.index.tz_convert(None) + pd.offsets.Hour(+3)) # correct to helsinki timezone
raw.rename(columns={2: 'Class'}, inplace=True)

raw.head()

In [None]:
day = 30
lower_limit = '2019-09-' + str(day) + ' 08:00:00'
upper_limit = '2019-09-' + str(day) + ' 10:00:00'

raw[lower_limit:upper_limit].plot(figsize=(15,5), marker='|', markersize=3, linewidth=0)
plt.gca().grid(True)
plt.savefig(out_path + 'graph_temporal_one_video.eps')

In [None]:
day = 12
lower_limit = '2019-10-' + str(day) + ' 00:00:00'
upper_limit = '2019-10-' + str(day+1) + ' 00:00:00'

raw[lower_limit:upper_limit].plot(figsize=(15,5), marker='x', linewidth=0)
plt.gca().grid(True)
plt.savefig(out_path + 'graph_temporal_one_day.eps')

# Whole dataset statistics

In [None]:
def count_classes_full (list_of_videos, plot=True, verbose=True):
    y = [] # labels

    for filename in list_of_videos:
        with open(filename) as json_file:
            if verbose: print(filename)
            if 'pointer_table' in filename: continue
            y += json.load(json_file)[1:]
        if verbose: print ('Dataset size: ', len(y))
    if plot==True:
        import matplotlib.ticker as ticker
        y = np.array(y)
        ncount = len(y)

        plt.figure(figsize=(12,8))
        ax = sns.countplot(y)

        for p in ax.patches:
            _x=p.get_bbox().get_points()[:,0]
            _y=p.get_bbox().get_points()[1,1]
            ax.annotate('{:.1f}%'.format(100.*_y/ncount), (_x.mean(), _y), 
                    ha='center', va='bottom') # set the alignment of the text

        ax.yaxis.set_major_locator(ticker.LinearLocator(11))
        ax.grid(axis='y')
        n=0
        for i in label_names:
            print('Class ', n, ': ', i)
            n+=1
            
    unique, counts = np.unique(y, return_counts=True)
    return dict(zip(unique, counts))

In [None]:
list_of_videos = [path_labels + i for i in sorted(os.listdir(path_labels))]

In [None]:
count_classes_full(list_of_videos, verbose=False)
plt.savefig(out_path + 'graph_count_classes.eps')

In [None]:
def classes_per_day (list_of_videos):
    videos=[]
    days=[]
    for filename in list_of_videos:
        name = re.search(str(path_labels + '(.+?).json'), filename).group(1)
        if 'pointer_table' in name: continue
        videos.append(pd.to_datetime(int(name) + 10800, utc=True, unit='s').tz_convert(None) + pd.offsets.Hour(+3))
        days.append(str(videos[-1].year)+'-'+str(videos[-1].month).zfill(2)+'-'+str(videos[-1].day).zfill(2))

    unique, counts = np.unique(days, return_counts=True)
    return dict(zip(unique, counts))
    
classes_per_day (list_of_videos)

# Per video statistics

In [None]:
def video_statistics (list_of_videos, plot=True):
    results = [] # labels

    for filename in list_of_videos:
        with open(filename) as json_file:
            if 'pointer_table' in filename: continue
            video = json.load(json_file)[1:]
            
        unique, counts = np.unique(video, return_counts=True)
        results.append([len(video)/25, 
                        len(unique), 
                        int((1 in video) or (2 in video) or (3 in video)),
                        int(0 in video)
                       ])
        
    df = pd.DataFrame (results, columns=['duration_(s)', 'number_of_classes', 'have_interaction', 'have_human'])
    return df

In [None]:
df = video_statistics (list_of_videos)

In [None]:
df.describe()

In [None]:
df['duration_(s)'].plot(kind='kde')

In [None]:
plt.boxplot(df['duration_(s)'],showfliers=False)
plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off
ax = plt.gca()
ax.yaxis.grid(True)
plt.savefig(out_path + 'graph_duration_box_plot.eps')

License: Creative Commons 4.0 Attribute