In [1]:
data_dir = "/Volumes/Data/Research/Healthy Brain Network/ActiGraph/gt3x_data"
subdirs = ["in process", "processed", "raw"]

In [12]:
import calendar
from datetime import datetime, timedelta
import os
import numpy as np
import pandas as pd
import zipfile

In [3]:
def read_zips(data_dir, subdirs):
    """
    Function to read in all specified raw csvs.
    
    Parameters
    ----------
    data_dir : string
        parent directory
        
    subdirs : list of strings
        subdirectories
        
    Returns
    -------
    dfs : list of 3-tuples of strings, DataFrames, and strings
        ([0], : string
            filename
        [1], : Dataframe
            RAW csv file exported from ActiLife
        [2]) : string
            device serial number
    """
    dfs = list()
    for subdir in subdirs:
        sd = os.path.join(data_dir, subdir)
        for file in os.listdir(sd):
            if file.endswith(".gt3x"):
                zp = os.path.join(sd, file)
                zdict = {}
                try:
                    with zipfile.ZipFile(zp) as z:
                        with z.open("info.txt", "r") as f:
                            for line in f:
                                try:
                                    key, value = line.decode("UTF-8").split(":")
                                    zdict[key] = value[1:-2]
                                except:
                                    continue
                    dfs.append(zdict)
                except:
                    print(zp)
    return(dfs)


def ticks_to_datetime(ticks):
    """
    Function to convert .NET ticks to datetime
    
    Parameter
    ---------
    ticks : int
    
    Returns
    -------
    dt : datetime
    """
    return(datetime(1, 1, 1) + timedelta(microseconds = int(ticks)//10))

In [4]:
z = read_zips(data_dir, subdirs)

/Volumes/Data/Research/Healthy Brain Network/ActiGraph/gt3x_data/in process/5783223 (2017-09-14).gt3x


In [5]:
df = pd.DataFrame.from_dict(z)

In [6]:
for col in ["Start Date", "Stop Date", "Download Date", "Last Sample Time"]:
    df[col] = df[col].map(lambda x: ticks_to_datetime(int(x)))

In [8]:
df["Days Recorded"] = df["Last Sample Time"].subtract(df["Start Date"])

In [14]:
df["End Month"] = df["Last Sample Time"].map(lambda x: calendar.month_name[x.month])

In [34]:
pd.pivot_table(df, values="Days Recorded", index="End Month", columns="Sample Rate", aggfunc="count")

Sample Rate,30,60
End Month,Unnamed: 1_level_1,Unnamed: 2_level_1
August,1.0,13.0
July,1.0,
October,5.0,2.0
September,10.0,26.0


In [39]:
subtable = df[[
    "Serial Number",
    "Subject Name",
    "Sample Rate",
    "End Month",
    "Days Recorded",
    "Start Date",
    "Last Sample Time"
]].sort_values(by=[
    "Start Date",
    "Last Sample Time"
])

In [45]:
subtable["Sample Rate"].describe()

count     58
unique     2
top       60
freq      41
Name: Sample Rate, dtype: object