In [27]:
import pandas as pd
import os

Creating the Dataframes of Camera Data

In [28]:
#Set directories for csvs of pose and activity data
actDir = "test_activitycsvs"
camDir = "test_posecsvs"

In [29]:
#write all filenames of pose csvs into an array
filenames = []
directory = os.fsencode(camDir)
for file in os.listdir(directory):
     filename = os.fsdecode(file)
     if filename.endswith(".csv"):
         filenames.append(filename)

print(filenames)

['2022-07-04T11:12:30.982.csv']


In [30]:
#extract the pose csv data into an array of dataframes
datasets = []
for file in filenames:
    filestring = "./" + camDir + "/" + file
    df = pd.read_csv(filestring)
    if not df.empty:
        datasets.append(pd.read_csv(filestring))

datasets[0].head()

Unnamed: 0,time,gro1,gro2,gro3,gro4,v1,v2,v3,k0x,k0y,...,k30z,k31x,k31y,k31z,k32x,k32y,k32z,k33x,k33y,k33z
0,"2022, 7, 4, 11, 11, 30, 982000",-0.00971,0.99776,-0.04258,0.05069,0.0,0.0,0.0,-2.59662,0.798,...,1.43709,-2.68741,0.03397,1.49462,-2.51162,1.74999,1.35192,-2.71056,1.74525,1.33185
1,"2022, 7, 4, 11, 12, 31, 82000",-0.06807,0.8623,0.02053,0.50137,2.69171,0.00057,-0.63807,-2.3034,0.77512,...,1.24546,-2.29125,-0.01264,1.24226,-2.35692,1.73046,1.36622,-2.45452,1.7111,1.19272
2,"2022, 7, 4, 11, 12, 31, 215000",-0.04008,0.79878,-0.00741,0.60024,2.34727,-0.00045,-0.49736,-2.09211,0.78781,...,1.27582,-2.12522,-0.00013,1.23702,-2.13893,1.74244,1.32042,-2.19417,1.72785,1.12875
3,"2022, 7, 4, 11, 12, 31, 315000",-0.02906,0.79991,-0.00999,0.59932,2.0112,-0.00093,-0.29265,-1.97743,0.7963,...,1.31759,-2.02485,0.00656,1.27493,-2.01673,1.75014,1.36454,-2.07272,1.73844,1.1729
4,"2022, 7, 4, 11, 12, 31, 398000",-0.02548,0.76337,-0.00422,0.64544,1.75821,-0.00082,-0.10898,-1.89362,0.81128,...,1.36105,-1.93454,0.02126,1.30918,-1.94009,1.76431,1.42543,-1.97319,1.75544,1.22839


Creating the Ground Truth and preparing Machine Learning

In [31]:
truths = []
#iterate through all dataframes
for data in datasets:
    #extract the first column which contains the time
    dates = data.loc[:,"time"]
    date = dates[0].replace(" ", "").split(',')[0:3]
    year = date[0]
    month = date[1]
    if len(month) < 2:
        month = '0' + month
    day = date[2]
    if len(day) < 2:
        day = '0' + day
    #create the string of the associated activity csv from the extracted time
    filestring = './' + actDir + '/activities-' + year + '-' + month + '-' + day + '.csv'

    labels = []
    #open the corresponding file if it exists
    try:
        timestamps = pd.read_csv(filestring)
    except:
        continue

    #iterate through all dates in the time column of the dataframe
    for date in dates:
        #extract the time of day in seconds of a given frame
        time = date.replace(" ", "").split(',')[3:6]
        hour = int(time[0])
        minute = int(time[1])
        second = int(time[2])
        total = hour * 3600 + minute * 60 + second

        #set the label to 1 if the time is within 20 seconds of an activity, else leave it at 0
        found = 0
        for timestamp in timestamps:
            if found == 0:
                ts = timestamp.split('T')[1].split('.')[0]
                ts_h = int(ts.split(':')[0])
                ts_m = int(ts.split(':')[1])
                ts_s = int(ts.split(':')[2])
                ts_total = ts_h * 3600 + ts_m * 60 + ts_s

                if ts_total + 20 > total > ts_total - 20:
                    found = 1
                    labels.append(1)

        if found == 0:
            labels.append(0)

    #create a list of lists that contains the labels to each dataframe
    truths.append(labels)

print(truths)

[[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]]


In [32]:
#concatenate all dataframes to one single dataframe
megatable = datasets[0]
for data in datasets[1:]:
    megatable = pd.concat([megatable, data], ignore_index=True)

#concatenate all the label lists to one big list that corresponds row for row with the dataframe
y = []
for truth in truths:
    y += truth

print(megatable)
print(y)

                               time     gro1     gro2     gro3     gro4  \
0    2022, 7, 4, 11, 11, 30, 982000 -0.00971  0.99776 -0.04258  0.05069   
1     2022, 7, 4, 11, 12, 31, 82000 -0.06807  0.86230  0.02053  0.50137   
2    2022, 7, 4, 11, 12, 31, 215000 -0.04008  0.79878 -0.00741  0.60024   
3    2022, 7, 4, 11, 12, 31, 315000 -0.02906  0.79991 -0.00999  0.59932   
4    2022, 7, 4, 11, 12, 31, 398000 -0.02548  0.76337 -0.00422  0.64544   
5    2022, 7, 4, 11, 12, 31, 482000 -0.03185  0.75296 -0.00556  0.65727   
6    2022, 7, 4, 11, 12, 31, 582000 -0.04310  0.75923 -0.01145  0.64928   
7    2022, 7, 4, 11, 12, 31, 698000 -0.03553  0.76396 -0.00204  0.64428   
8    2022, 7, 4, 11, 12, 31, 798000 -0.03834  0.77644  0.01371  0.62888   
9    2022, 7, 4, 11, 12, 31, 882000 -0.04202  0.78866  0.03118  0.61260   
10    2022, 7, 4, 11, 12, 32, 15000 -0.04367  0.77842  0.02810  0.62559   
11   2022, 7, 4, 11, 12, 32, 115000 -0.04197  0.76079  0.01768  0.64740   
12   2022, 7, 4, 11, 12, 

Machine Learning