# Day By Day Collection And Sorting Data

### Finding The  Day By Day Activities And Sleep Amount

In [159]:
import pandas as pd
import numpy as np
from os.path import exists
from functools import reduce
import datetime
import warnings
warnings.filterwarnings("ignore")#there are some warnings that show up from pandas that don't effect us, so we just mute them

#authored by Tom Odem on 12 November 2023
#returns a numpy array of numpy arrays, where the first elements of each sub array is a daily sleep time and the proceeding elements are the activities the user 
#completed throughout the day in sequence
def get_and_dayitise_data():
 
    users_df = pd.read_csv('user_information.csv') #read the user_information.csv file to get user ids, depression scores, etc
    day_by_day = []

    #go through all users in user_information.csv
    for user in users_df['user_id']:
       
        if(exists('user_tags/'+str(user)+'.csv')): #if the user's tags csv exists then open it and continue
            u = pd.read_csv('user_tags/'+str(user)+'.csv')
            u = u.drop(columns=['end'])

            u['date'] = [pd.to_datetime(t).date() for t in u['start']] #add date column




            #find daily sleep time
            #finds the time the user wakes up everyday
            wakeup_time = u.loc[(u['labelName'] == 'Wake up')]
            wakeup_time['start'] = [pd.to_datetime(t)  for t in wakeup_time['start']]
            wakeup_time['date']= [pd.to_datetime(t).date() for t in wakeup_time['start']]
            wakeup_time['hour']= [pd.to_datetime(t).time() for t in wakeup_time['start']]

            #finds the time the user went to sleep everyday
            sleep_time = u.loc[(u['labelName'] == 'Sleep')]
            sleep_time['start'] = [pd.to_datetime(t) for t in sleep_time['start']]
            sleep_time['date']= [(pd.to_datetime(t)+ pd.Timedelta(days=1)).date() for t in sleep_time['start']]
            sleep_time['hour']= [pd.to_datetime(t).time() for t in sleep_time['start']]
            
            #computes the amount of time the user slept daily
            r = pd.merge(wakeup_time, sleep_time, on ='date')
            r['start_y'] = pd.to_datetime(r['start_y'])
            r['start_x'] = pd.to_datetime(r['start_x'])
            r['sleeptime'] = (-1*(r['start_y'] - r['start_x']).astype('timedelta64[m]'))/60 #find the difference between when they woke up from when they went to sleep in hours
            r = r[['sleeptime','date']].groupby('date').mean().reset_index().astype({'date':object})#we only need the date and the sleeptime, we reset the index to change it back 
                                                                                                    #to a dataframe, and we want to force teh date to be of type object so that we can always merge even if there are no entries
            
            #document the activities performed on each day
            for index, row in r.iterrows(): #for each day in the average sleep amount dataframe r
                day = row.to_numpy()[0]#get the day
                activities = u.loc[(u['date'] == day) & (u['labelName'] != 'Sleep') & (u['labelName'] != 'Wake up')] #get all of the activities that happened on this day that are not sleeping related
                activities = activities['labelName'].to_numpy() #get only the activities, as in drop every other column
                activities = np.append(row['sleeptime'],activities) #add the sleep time to the beginning of the array, so we can find it later
                activities = activities[activities != 'None'] #remove any 'None' elements from the array

                day_by_day.append(activities)#add the activities and sleep time from today to the day_by_day list


        

            

            
    
        else:
            print(f'no user_tags: {user}') #the tags csv was missing for this user


    return np.array(day_by_day) #change the list into an industry standard numpy array and return


In [160]:
#example, get thhe activities for all users
activities = get_and_dayitise_data()

no user_tags: 520
no user_tags: 532
no user_tags: 503
no user_tags: 503
no user_tags: 523
no user_tags: 544
no user_tags: 529
no user_tags: 661
no user_tags: 658
no user_tags: 664
no user_tags: 634
no user_tags: 507
no user_tags: 547
no user_tags: 501
no user_tags: 668
no user_tags: 662


In [163]:
#display the array
activities

array([array([11.0, 'Take medicine', 'Take medicine', 'Take medicine',
              'Take medicine', 'Take medicine', 'Take medicine'], dtype=object),
       array([23.166666666666668, 'Go walk', 'Go walk', 'Go walk', 'Go walk',
              'Go walk', 'Go walk', 'Go walk', 'Go walk', 'Go walk', 'Go walk',
              'Go walk', 'Go walk', 'Go walk', 'Go walk', 'Go walk', 'Go walk',
              'Go walk', 'Go walk', 'Go walk', 'Go walk', 'Go walk',
              'Take medicine'], dtype=object)                                  ,
       array([31.944444444444443, 'Take medicine', 'Go walk', 'Go walk',
              'Go to bathroom', 'Eat', 'Take medicine', 'Take medicine',
              'Take medicine', 'Take medicine', 'Take medicine', 'Take medicine',
              'Take medicine', 'Go walk', 'Take medicine', 'Take medicine'],
             dtype=object)                                                       ,
       ...,
       array([28.0, 'Cook', 'Eat', 'Go to bathroom', 'Eat', 