In [None]:
import os
import csv
import glob
import pandas as pd
from datetime import datetime
from tsfresh import extract_features

def read_activity_file(filepath, patient_id):
    data = []
    with open(filepath) as f:
        csv_reader = csv.reader(f, delimiter=";")
        next(csv_reader)
        for line in csv_reader:
            data.append([datetime.strptime(line[0], "%m-%d-%Y %H:%M").timestamp(), int(line[1].split(" ")[0])])
    data = pd.DataFrame(data, columns=["TIME", "ACC"])
    data["ID"] = patient_id
    return data

input_dir = "/content/activity_data"
output_dir = os.path.join("/content/preprocessed_data")

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

all_features = pd.DataFrame()  

for filepath in glob.glob(os.path.join(input_dir, "*.csv")):
    print("Reading %s" % filepath)
    patient_id = os.path.splitext(os.path.basename(filepath))[0]
    patient_id = patient_id.split("_")[-1]

    #print(patient_id)
    data = read_activity_file(filepath, patient_id)
    features = extract_features(data, column_id="ID", column_value="ACC", column_sort="TIME", n_jobs=0, show_warnings=False)

    if "ID" not in features.columns:
        features["ID"] = patient_id

    all_features = pd.concat([all_features, features])

all_features.set_index("ID", inplace=True)


all_features.to_csv(os.path.join(output_dir, "activity_features.csv"), sep=";")