In [40]:
import os
import numpy as np
import matplotlib.pyplot as plt
import json
import seaborn as sns


min_time = 360

In [41]:
color_map = {
 "brushing_teeth" : sns.color_palette(palette='pastel')[0], 
 "showering" : sns.color_palette(palette='pastel')[1], 
 "breakfast" : sns.color_palette(palette='pastel')[2], 
 "getting_dressed" : sns.color_palette(palette='pastel')[3], 
 "computer_work" : sns.color_palette(palette='pastel')[4], 
 "lunch" : sns.color_palette(palette='pastel')[5], 
 "leave_home" : sns.color_palette(palette='pastel')[6], 
 "going_to_the_bathroom" : sns.color_palette(palette='pastel')[7], 
 
 "cleaning" : sns.color_palette(palette='dark')[0], 
 "kitchen_cleaning" : sns.color_palette(palette='dark')[1], 
 "take_out_trash" : sns.color_palette(palette='dark')[2], 
 "laundry" : sns.color_palette(palette='dark')[3], 
 "vaccuum_cleaning" : sns.color_palette(palette='dark')[4], 
 "wash_dishes" : sns.color_palette(palette='dark')[5], 

 "come_home" : sns.color_palette()[0], 
 "playing_music" : sns.color_palette()[1], 
 "reading" : sns.color_palette()[2], 
 "taking_medication" : sns.color_palette()[3], 
 "dinner" : sns.color_palette()[4], 
 "socializing" : sns.color_palette()[5], 
 "listening_to_music" : sns.color_palette()[6], 
 "watching_tv" : sns.color_palette()[7], 
}

def time_human(time_mins):
    time_mins = int(round(time_mins))
    mins = time_mins%60
    time_mins = time_mins//60
    hrs = time_mins%24
    time_mins = time_mins//24
    days = time_mins
    h = '{:02d}:{:02d}'.format(hrs,mins)
    if days != 0:
        h = str(days)+'day - '+h
    return h

In [72]:
root_dir = 'data/sourcedRoutines/completePersona0214'
os.listdir(root_dir)

['hard_worker', 'home_maker', 'work_from_home', 'senior']

In [None]:

for ind in os.listdir(root_dir):

    directory = os.path.join(root_dir, ind)

    fig,ax = plt.subplots()
    fig.set_size_inches(30,20)

    activities_labeled = []

    sch_cnt = 0
    with open(os.path.join(directory, 'script_usage.txt')) as f:
        prev_start = min_time
        data = f.readlines()
        for line in data[1:]:
            _, activity, start, end = line.split(';')
            start = float(start.strip())
            end = float(end.strip())
            end = min(end, 24*60)
            if prev_start > end:
                prev_start = min_time
                sch_cnt += 1
            if activity not in activities_labeled:
                ax.barh(sch_cnt, end-start, align='center', left=start, label=activity, color=color_map[activity])
                activities_labeled.append(activity)
            ax.barh(sch_cnt, end-start, align='center', left=start, color=color_map[activity])
            prev_start = start
        _ = ax.set_xlabel('Time')
        _ = ax.set_yticks(np.arange(sch_cnt+1))
        _ = ax.set_xticks(np.arange(6*60,24*60, 1*60))
        _ = ax.set_xticklabels([time_human(t) for t in np.arange(6*60,24*60, 1*60)])

    _ = plt.legend(loc='upper right')
    fig.tight_layout()
    fig.suptitle(ind)
    plt.savefig(os.path.join(directory,'schedules.jpg'))

In [None]:
times = np.arange(6*60, 24*60, 10)
num_routines = 60

for ind in os.listdir(root_dir):
    directory = os.path.join(root_dir, ind)

    fig,ax = plt.subplots()
    fig.set_size_inches(30,20)

    activity_freq = {t:{k:0 for k in color_map.keys()} for t in times}
    activities_labeled = []

    sch_cnt = 0
    with open(os.path.join(directory, 'script_usage.txt')) as f:
        data = f.readlines()
        for line in data[1:]:
            _, activity, start, end = line.split(';')
            start = float(start.strip())
            end = float(end.strip())
            end = min(end, 24*60)
            for t in times:
                if t>start and t<end:
                    activity_freq[t][activity] += 1/num_routines
        bottoms = times*0.0
        for act in color_map.keys():
            freqs = [act_fr[act] for act_fr in activity_freq.values()]
            ax.bar(times, freqs, bottom=bottoms, label=act, color=color_map[act], width = 6.5)
            bottoms += np.array(freqs)
        misclassification_prob = [min(sum(activity_freq[t].values()), 1-max(activity_freq[t].values())) for t in times]
        ax.plot(times, misclassification_prob, '-.k', label='misclassification probability')
        _ = ax.set_xlabel('Time')
        _ = ax.set_xticks(np.arange(6*60,24*60, 1*60))
        _ = ax.set_xticklabels([time_human(t) for t in np.arange(6*60,24*60, 1*60)])

    avg_miscl_prob = sum(misclassification_prob)/len(misclassification_prob)
    _ = plt.legend(loc='upper right')
    fig.tight_layout()
    fig.suptitle(ind+'-- avg. misclassification probability = '+'{:1.3f}'.format(avg_miscl_prob))
    plt.savefig(os.path.join(directory,'schedule_distribution.jpg'))

    with open(os.path.join(directory, 'info.json')) as f:
        info = json.load(f)
    info['misclassification_prob'] = avg_miscl_prob
    with open(os.path.join(directory, 'info.json'), 'w') as f:
        json.dump(info, f)

In [76]:
import pandas as pd
from pingouin import cronbach_alpha

for ind in os.listdir(root_dir):

    starts = [{k:[] for k in color_map.keys()}]
    ends = [{k:[] for k in color_map.keys()}]
    

    directory = os.path.join(root_dir, ind)

    activities_labeled = []
    sch_cnt = 0
    with open(os.path.join(directory, 'script_usage.txt')) as f:
        prev_start = min_time
        data = f.readlines()
        for line in data[1:]:
            _, activity, start, end = line.split(';')
            start = float(start.strip())
            end = float(end.strip())
            end = min(end, 24*60)
            starts[-1][activity].append(start)
            ends[-1][activity].append(end)
            if prev_start > end:
                prev_start = min_time
                sch_cnt += 1
                starts.append({k:[] for k in color_map.keys()})
                ends.append({k:[] for k in color_map.keys()})
            prev_start = start
    
    features_st = [[] for _ in range(sch_cnt)]
    features_end = [[] for _ in range(sch_cnt)]
    for activity in color_map.keys():
        for i in range(sch_cnt):
            features_st[i] += (starts[i][activity])
            features_end[i] += (ends[i][activity])
        max_len = max([len(fst) for fst in features_st])
        for i in range(sch_cnt):
            features_st[i] += [float('nan') for _ in range(max_len-len(features_st[i]))]
            # features_st[i] += [0 for _ in range(max_len-len(features_st[i]))]
            features_end[i] += [float('nan') for _ in range(max_len-len(features_end[i]))]
            # features_end[i] += [0 for _ in range(max_len-len(features_end[i]))]

    features_st = pd.DataFrame(np.array(features_st))
    # features_st['name'] = np.arange(sch_cnt)
    features_end = pd.DataFrame(np.array(features_end))
    # features_end['name'] = np.arange(sch_cnt)
    features_mid = pd.DataFrame(np.array((features_st + features_end)/2))


    # print(features_end.head())
    print()
    print(ind)
    print('Starts alpha : ',cronbach_alpha(data=features_st.fillna(features_st.median())))
    print('Ends alpha : ',cronbach_alpha(data=features_end.fillna(features_end.median())))
    print('Mids alpha : ',cronbach_alpha(data=features_mid.fillna(features_mid.median())))


    # for i in range(sch_cnt):
    #     print(features_st[i])
    #     print(features_end[i])
    # break


hard_worker
Starts alpha :  (-0.25333161086878286, array([-0.762,  0.165]))
Ends alpha :  (-0.3180322176872612, array([-0.853,  0.122]))
Mids alpha :  (-0.28610989877176735, array([-0.808,  0.143]))

home_maker
Starts alpha :  (0.27387452903495, array([-0.02 ,  0.516]))
Ends alpha :  (0.2577400544215982, array([-0.042,  0.505]))
Mids alpha :  (0.26649188547503605, array([-0.03 ,  0.511]))

work_from_home
Starts alpha :  (-0.004711898737058977, array([-0.412,  0.33 ]))
Ends alpha :  (-0.07485901838947896, array([-0.51 ,  0.284]))
Mids alpha :  (-0.03709366857108142, array([-0.457,  0.309]))

senior
Starts alpha :  (0.0633260971093551, array([-0.316,  0.375]))
Ends alpha :  (0.044232046626804127, array([-0.342,  0.363]))
Mids alpha :  (0.05324375717840743, array([-0.33 ,  0.369]))


In [None]:
import shutil

target_dir = 'data/dataVisuals'
for root, dirs, files in os.walk('data/sourcedRoutines'):
    for f in files:
        if f.endswith('.jpg'):
            target_dir = root.replace('sourcedRoutines','dataVisuals')
            if not os.path.exists(target_dir):
                os.makedirs(target_dir)
            shutil.copyfile(os.path.join(root,f), os.path.join(target_dir,f))

    