In [None]:
import pandas as pd
import os
import glob

In [None]:
#Define paths
currentDir = os.path.dirname(__file__)
accelerometerDataPath = os.path.join(currentDir, "data", "all_accelerometer_data_pids_13.csv")
tacDataPath = os.path.join(currentDir, "data", "clean_tac")
phoneTypesPath = os.path.join(currentDir, "data", "phone_types.csv")

In [None]:
#Combine clean_tac into one dataframe
csvFiles = glob.glob(os.path.join(tacDataPath, '*.csv'))

tacDataFrames = []

for csvFile in csvFiles:
    tacDataFrames.append(pd.read_csv(csvFile))

In [None]:
#Loading data files
tacData = pd.concat(tacDataFrames, ignore_index=True)
accelerometerData = pd.read_csv(accelerometerDataPath)
phoneTypes = pd.read_csv(phoneTypesPath)

In [None]:
#Connect accelerometer data with their respective phone types
accelerometerData = accelerometerData.merge(phoneTypes, on='pid')

In [None]:
#Identify bar crawl events (increased movement)
movementThreshold = .5
segmentLength = 60
barCrawlEvents = []


In [None]:
#Iterate through participants data
for participant_id, participant_data in accelerometerData.groupby('pid'):
    participant_data['magnitude'] = (participant_data['x']**2 + participant_data['y']**2 + participant_data['z']**2)**.2

    numSegments = len(participant_data) // segmentLength
    for i in range(numSegments):
        segmentStart = i * segmentLength
        segmentEnd = (i + 1) * segmentLength
        segmentData = participant_data.iloc[segmentStart:segmentEnd]

        if segmentData['magnitude'].max() > movementThreshold:
            barCrawlEvents.append({ 
                'participant_id': participant_id,
                'segment_start_time': segmentData['time'].iloc[0],
                'segment_end_time': segmentData['time'].iloc[-1],
                'magnitude': segmentData['magnitude'].max()
            })

barCrawlEventsDf = pd.DataFrame(barCrawlEvents)

In [None]:
#Define heavy drinking episodes based on the TAC threshold (united states legislation threshold 0.08)
tacThreshold = 0.08
heavyDrinkingEpisodes = tacData[tacData['TAC_Reading'] >= tacThreshold]

mergeData = pd.merge_asof(barCrawlEventsDf.sort_values('segment_start_time'), heavyDrinkingEpisodes.sort_values('timestamp'), left_on='segment_start_time', right_on='timestamp', tolerance=60000)

correlation = mergeData['magnitude'].corr(mergeData['TAC_Reading'])
print(barCrawlEventsDf)
print("Correlation between bar crawls and heavy drinking episodes: ", correlation)