In [1]:
import sys
import os
sys.path.append(os.path.abspath(".."))

import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import re
sns.set(context='paper', style='whitegrid', color_codes=True, font_scale=1.8)
colorcycle = [(0.498, 0.788, 0.498),
              (0.745, 0.682, 0.831),
              (0.992, 0.753, 0.525),
              (0.220, 0.424, 0.690),
              (0.749, 0.357, 0.090),
              (1.000, 1.000, 0.600),
              (0.941, 0.008, 0.498),
              (0.400, 0.400, 0.400)]
sns.set_palette(colorcycle)
mpl.rcParams['figure.max_open_warning'] = 65
mpl.rcParams['figure.figsize'] = [12, 7]

from speclib import misc, plotting, loaders
import collections
import itertools

%matplotlib inline 

In [2]:
userlist = loaders.getUserList() 
useralias = loaders.Useralias()

In [3]:
load_args = [(name, useralias[name], ('call', 'sms')) for name in userlist]

user_dct = loaders.loadUserParallel(load_args)

In [7]:
phonebook = loaders.loadUserPhonenumberDict()
phonebook.setdefault(None) 

In [18]:
df_lst = list()

for user in user_dct.keys():
    print(user)
    try:
        call = pd.DataFrame(user_dct[user]['call'])
        if sum(call.shape):
            call = call.rename(columns={'address': 'number'})
            call['timestamp'] = call.timestamp.astype('datetime64[s]')
            call['user'] = call.user.map(lambda us: useralias[us])
            mapper = lambda nb: useralias[phonebook.get(nb)] if phonebook.get(nb) is not None else None
            call['aliasnumber'] = call.number.map(mapper)
            call['comtype'] = 'call'
            call = call.drop('id', axis=1)

        sms = pd.DataFrame(user_dct[user]['sms'])
        if sum(sms.shape):
            sms = sms.rename(columns={'address': 'number'})
            sms['timestamp'] = sms.timestamp.astype('datetime64[s]')
            sms['user'] = sms.user.map(lambda us: useralias[us])
            mapper = lambda nb: useralias[phonebook.get(nb)] if phonebook.get(nb) is not None else None
            sms['aliasnumber'] = sms.number.map(mapper)
            sms['comtype'] = 'sms'
            sms = sms.drop('id', axis=1)
            
        df = pd.concat((call, sms)).set_index('user')
        df_lst.append(df)
    except KeyError:
        print("No data for user", user)


u0182
u1015
u0778
u0933
u0256
u0659
u0719
u0709
u0748
u0045
u0599
u0346
No data for user u0346
u1027
u0208
u0316
No data for user u0316
u0059
u0812
u0954
u0986
No data for user u0986
u0146
u0862
u0652
u0350
u0431
u0793
No data for user u0793
u0093
u0847
u0001
u0408
u0328
No data for user u0328
u1056
u0562
u0100
u0761
u0013
u0826
u0456
u0505
u0493
u0254
No data for user u0254
u0205
u0801
u0162
No data for user u0162
u0734
u0238
u0817
u0797
u0002
No data for user u0002
u0452
u0213
u0090
u0077
u0945
u0438
No data for user u0438
u0402
No data for user u0402
u0395
u0602
u0136
u1044
No data for user u1044
u0412
u0313
No data for user u0313
u0243
No data for user u0243
u0347
u0294
u0201
u0443
u1005
u0409
u0156
u0484
u0952
u0227
No data for user u0227
u0872
u0051
u0334
u1021
u0966
u0198
u0701
u0379
No data for user u0379
u1032
u0315
u0095
u0765
No data for user u0765
u0573
No data for user u0573
u0863
u0115
No data for user u0115
u0856
No data for user u0856
u0672
u0923
u0258
u0016
u0400
u0914

u0615
No data for user u0615
u0321
u0979
No data for user u0979
u0530
u0844
u0466
u0632
u0908
u0705
No data for user u0705
u0891
u0265
u0007
u0919
u0038
No data for user u0038
u0527
u0406
u0975
u0508
u0936
u0065
u0550
u0271
u0195
u0671
u0034
u0727
u0426
u0819
u0498
u0150
u0123
No data for user u0123
u0262
u0083
u0085
u0276
u0102
u0617
No data for user u0617
u0291
u0332
u0322
u0461
u0916
No data for user u0916
u0948
u0738
u0641
u0577
u0103
u0870
No data for user u0870
u0568
u0118
u0860
u0374
u0301
No data for user u0301
u0520
u0437
u0390
u0440
u0634
u0751
No data for user u0751
u0435
u0781
u0998
u0280
No data for user u0280
u0552
No data for user u0552
u0491
u0785
u0622
u0747
u0845
u0263
u0268
No data for user u0268
u0048
u0376
u0445
u0055
u0699
u0503
No data for user u0503
u1025
u0177
u1011
No data for user u1011
u1033
u0958
u0285
u0377
u0896
u0565
u0312
No data for user u0312
u0803
u0783
u0729
No data for user u0729
u1046
u0306
u0772
No data for user u0772
u0987
No data for user u0987

In [19]:
df = pd.concat(df_lst).sort_index()

In [None]:
df = df[df.timestamp.dt.year >= 2013] 

In [22]:
df.to_pickle('../../allan_data/phone_sms_and_call_dataframe.pkl')