In [180]:
import pandas as pd
import numpy as np
import pprint

In [181]:
def get_closest_row(file_path, columns, dt):
    df = pd.read_csv(file_path, header=0)
    df['time'] = pd.to_datetime(df['time'], unit='ms')
    df.sort_values('time', inplace=True)
    df.drop_duplicates(subset='time', keep="first", inplace=True)
    df.set_index('time', inplace=True)
    closest = df.iloc[[df.index.get_loc(dt, method='nearest')]].values[0]
    return np.asarray(closest)

In [182]:
file_dict = {'activities.csv': ['in_veichle', 'on_bicycle', 'on_foot', 'running', 'still', 'tilting', 'walking', 'unknown'], 
             'audio.csv': ['ringer_mode', 'alarm_volume', 'music_volume', 'notifications_volume', 'ring_volume', 'bt_sco_connected', 'mic_muted', 'music_active', 'speaker_on', 'headset_connected']}

In [183]:
data_path = 'Datasets/MDF/'
user_dir = data_path + 'user_0'
ignored_apps = 'it.cnr.iit.sensapp com.android.systemui com.sec.android.app.launcher com.android.settings'.split()
print(ignored_apps)

df = pd.read_csv(user_dir + '/running_apps.csv', header=0)
df['time'] = pd.to_datetime(df['time'], unit='ms')
df.sort_values('time', inplace=True)
df.drop_duplicates(subset='time', keep="first", inplace=True)
df.reset_index(drop=True, inplace=True)
df

['it.cnr.iit.sensapp', 'com.android.systemui', 'com.sec.android.app.launcher', 'com.android.settings']


Unnamed: 0,time,app,category
0,2018-04-24 06:07:19.466,it.cnr.iit.sensapp,
1,2018-04-24 06:12:19.487,com.android.systemui,
2,2018-04-24 06:17:19.502,com.whatsapp,COMMUNICATION
3,2018-04-24 06:22:19.542,it.cnr.iit.sensapp,
4,2018-04-24 06:57:19.368,com.whatsapp,COMMUNICATION
5,2018-04-24 07:07:19.408,com.sec.android.app.launcher,PERSONALIZATION
6,2018-04-24 07:12:19.403,com.android.captiveportallogin,
7,2018-04-24 07:42:19.457,com.sec.android.app.launcher,PERSONALIZATION
8,2018-04-24 07:57:19.493,com.twitter.android,NEWS_AND_MAGAZINES
9,2018-04-24 08:02:19.496,com.sec.android.app.launcher,PERSONALIZATION


In [184]:
rows = []
i = 0
for dt in df['time']:
    row = []
    for filename, columns in file_dict.items():
        file_path = user_dir + '/' + filename
        row = row + get_closest_row(file_path, columns, dt).tolist()
    rows.append(row)

df2 = pd.DataFrame(rows, columns=np.concatenate(list(file_dict.values())))
df = pd.concat([df, df2], axis=1)

In [185]:
df

Unnamed: 0,time,app,category,in_veichle,on_bicycle,on_foot,running,still,tilting,walking,...,ringer_mode,alarm_volume,music_volume,notifications_volume,ring_volume,bt_sco_connected,mic_muted,music_active,speaker_on,headset_connected
0,2018-04-24 06:07:19.466,it.cnr.iit.sensapp,,8,9,14,8,23,0,14,...,2,0.733333,0.0,0.0,1.0,False,False,False,False,False
1,2018-04-24 06:12:19.487,com.android.systemui,,8,7,35,7,9,0,35,...,2,0.733333,0.0,0.0,1.0,False,False,False,False,False
2,2018-04-24 06:17:19.502,com.whatsapp,COMMUNICATION,27,8,8,8,11,0,8,...,1,0.733333,0.0,0.0,0.0,False,False,False,False,False
3,2018-04-24 06:22:19.542,it.cnr.iit.sensapp,,8,6,6,5,49,0,6,...,1,0.733333,0.0,0.0,0.0,False,False,False,False,False
4,2018-04-24 06:57:19.368,com.whatsapp,COMMUNICATION,7,8,13,5,39,0,13,...,1,0.733333,0.0,0.0,0.0,False,False,False,False,False
5,2018-04-24 07:07:19.408,com.sec.android.app.launcher,PERSONALIZATION,0,0,0,0,0,100,0,...,1,0.733333,0.0,0.0,0.0,False,False,False,False,False
6,2018-04-24 07:12:19.403,com.android.captiveportallogin,,8,6,27,1,53,0,27,...,1,0.733333,0.0,0.0,0.0,False,False,False,False,False
7,2018-04-24 07:42:19.457,com.sec.android.app.launcher,PERSONALIZATION,16,10,10,7,18,0,10,...,1,0.733333,0.0,0.0,0.0,False,False,False,False,False
8,2018-04-24 07:57:19.493,com.twitter.android,NEWS_AND_MAGAZINES,17,3,31,1,33,0,31,...,1,0.733333,0.0,0.0,0.0,False,False,False,False,False
9,2018-04-24 08:02:19.496,com.sec.android.app.launcher,PERSONALIZATION,32,2,8,0,43,0,8,...,1,0.733333,0.0,0.0,0.0,False,False,False,False,False
