In [1]:
data_dir = "/Volumes/Data/Research/Healthy Brain Network/ActiGraph/gt3x_data"
subdirs = ["in process", "processed", "raw"]

In [2]:
import calendar
from datetime import datetime, timedelta
import os
import numpy as np
import pandas as pd
import zipfile

In [3]:
def read_zips(data_dir, subdirs):
    """
    Function to read in all specified raw csvs.
    
    Parameters
    ----------
    data_dir : string
        parent directory
        
    subdirs : list of strings
        subdirectories
        
    Returns
    -------
    dfs : list of 3-tuples of strings, DataFrames, and strings
        ([0], : string
            filename
        [1], : Dataframe
            RAW csv file exported from ActiLife
        [2]) : string
            device serial number
    """
    dfs = list()
    for subdir in subdirs:
        sd = os.path.join(data_dir, subdir)
        for file in os.listdir(sd):
            if file.endswith(".gt3x"):
                zp = os.path.join(sd, file)
                zdict = {}
                try:
                    with zipfile.ZipFile(zp) as z:
                        with z.open("info.txt", "r") as f:
                            for line in f:
                                try:
                                    key, value = line.decode("UTF-8").split(":")
                                    zdict[key] = value[1:-2]
                                except:
                                    continue
                    dfs.append(zdict)
                except:
                    print(zp)
    return(dfs)


def ticks_to_datetime(ticks):
    """
    Function to convert .NET ticks to datetime
    
    Parameter
    ---------
    ticks : int
    
    Returns
    -------
    dt : datetime
    """
    return(datetime(1, 1, 1) + timedelta(microseconds = int(ticks)//10))

In [4]:
z = read_zips(data_dir, subdirs)

/Volumes/Data/Research/Healthy Brain Network/ActiGraph/gt3x_data/in process/5783223 (2017-09-14).gt3x


In [5]:
df = pd.DataFrame.from_dict(z)

In [6]:
for col in ["Start Date", "Stop Date", "Download Date", "Last Sample Time"]:
    df[col] = df[col].map(lambda x: ticks_to_datetime(int(x)))

In [14]:
df = df.append(
    {
    'Last Sample Time': datetime(2017, 9, 7, 5, 7),
    'Serial Number': 'MOS2D17170076',
    'Start Date': datetime(2017, 8, 14, 16, 46),
    'Subject Name': '5783223'
    },
    ignore_index=True
)

In [15]:
df["Days Recorded"] = df["Last Sample Time"].subtract(
    df["Start Date"]
)

In [16]:
df["End Month"] = df["Last Sample Time"].map(
    lambda x: calendar.month_name[x.month]
)
df["End Month (numeric)"] = df["Last Sample Time"].map(
    lambda x: x.month
)

In [17]:
pd.pivot_table(
    df,
    values="Days Recorded",
    index=["End Month (numeric)", "End Month"],
    columns="Sample Rate",
    aggfunc="count"
)

Unnamed: 0_level_0,Sample Rate,30,60
End Month (numeric),End Month,Unnamed: 2_level_1,Unnamed: 3_level_1
7,July,1.0,
8,August,1.0,13.0
9,September,10.0,26.0
10,October,5.0,2.0


In [18]:
subtable = df[[
    "Serial Number",
    "Subject Name",
    "Sample Rate",
    "End Month",
    "Days Recorded",
    "Start Date",
    "Last Sample Time"
]].sort_values(by=[
    "Sample Rate",
    "Start Date",
    "Last Sample Time"
])

In [19]:
subtable.describe()

Unnamed: 0,Days Recorded
count,59
mean,26 days 03:08:33.745762
std,8 days 17:31:09.856592
min,2 days 05:00:54
25%,22 days 15:05:00
50%,23 days 09:08:00
75%,28 days 09:38:30
max,64 days 05:10:14


In [20]:
subtable.reset_index(drop=True)

Unnamed: 0,Serial Number,Subject Name,Sample Rate,End Month,Days Recorded,Start Date,Last Sample Time
0,MOS2D17170019,NDARLN070VX7,30.0,July,2 days 05:00:54,2017-07-28 12:45:00,2017-07-30 17:45:54
1,MOS2D17170036,NDARTN487KDK,30.0,September,37 days 04:12:00,2017-07-28 16:31:00,2017-09-03 20:43:00
2,MOS2D17170029,NDARGB324PM1,30.0,September,34 days 07:09:40,2017-07-28 17:20:00,2017-09-01 00:29:40
3,MOS2D17170014,NDARJZ274PRQ,30.0,September,30 days 22:02:09,2017-08-01 12:09:00,2017-09-01 10:11:09
4,MOS2D17170044,NDARYR829KD8,30.0,August,26 days 19:36:00,2017-08-04 17:24:00,2017-08-31 13:00:00
5,MOS2D17170056,NDAREF164ZVJ,30.0,September,40 days 18:25:51,2017-08-05 15:32:00,2017-09-15 09:57:51
6,MOS2D17170053,NDARGK736HF4,30.0,September,32 days 16:34:00,2017-08-08 16:42:00,2017-09-10 09:16:00
7,MOS2D17170049,NDARLX836EHJ,30.0,September,35 days 15:40:33,2017-08-10 18:10:00,2017-09-15 09:50:33
8,MOS2D17170048,NDARMJ849UKD,30.0,October,64 days 05:10:14,2017-08-14 12:12:00,2017-10-17 17:22:14
9,MOS2D17170102,NDARAC853DTE,30.0,September,35 days 09:48:00,2017-08-14 16:29:00,2017-09-19 02:17:00
