In [26]:
import numpy as np
import pandas as pd
from datetime import datetime


In [27]:
import warnings

# Suppress future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Activity Inference Data
0: Stationary

1: Walking

2: Running

3: Unknown

In [28]:
ActivityData = pd.DataFrame()
ActivityData = pd.read_csv('../dataset/Merged/allActivityRaw.csv')
print(ActivityData)

           timestamp   activity inference userId
0         1364410490                    0    u44
1         1364410498                    0    u44
2         1364410501                    0    u44
3         1364410503                    0    u44
4         1364410506                    0    u44
...              ...                  ...    ...
22842186  1370059187                    0    u49
22842187  1370059189                    0    u49
22842188  1370059192                    0    u49
22842189  1370059195                    0    u49
22842190  1370059197                    0    u49

[22842191 rows x 3 columns]


In [29]:
ActivityData = ActivityData.sort_values(by='userId')

In [30]:
ActivityData['timestamp'] = pd.to_datetime(ActivityData['timestamp'], unit = 's')
ActivityData.rename(columns={' activity inference': 'activityInference'}, inplace=True)
ActivityData['date'] = ActivityData['timestamp'].dt.date
ActivityData['time'] = ActivityData['timestamp'].dt.time
ActivityData['date'] = pd.to_datetime(ActivityData['date'])
ActivityData['week'] = ActivityData['date'].dt.isocalendar().week

In [31]:
print(ActivityData)

                   timestamp  activityInference userId       date      time  \
16829518 2013-04-20 11:27:02                  0    u00 2013-04-20  11:27:02   
16947663 2013-05-07 00:45:32                  0    u00 2013-05-07  00:45:32   
16947662 2013-05-07 00:45:29                  0    u00 2013-05-07  00:45:29   
16947661 2013-05-07 00:45:27                  0    u00 2013-05-07  00:45:27   
16947660 2013-05-07 00:45:24                  0    u00 2013-05-07  00:45:24   
...                      ...                ...    ...        ...       ...   
20799618 2013-05-04 22:51:44                  0    u59 2013-05-04  22:51:44   
20799619 2013-05-04 22:51:46                  0    u59 2013-05-04  22:51:46   
20799620 2013-05-04 22:51:48                  0    u59 2013-05-04  22:51:48   
20799611 2013-05-04 22:51:29                  0    u59 2013-05-04  22:51:29   
20332065 2013-04-22 10:52:45                  0    u59 2013-04-22  10:52:45   

          week  
16829518    16  
16947663    19  


In [32]:
# Convert 'time' column to datetime format
ActivityData['hour'] = ActivityData['timestamp'].dt.hour
# print(ActivityData)

bins = [-1, 6, 12, 18, 24]
labels = ['EarlyMorning', 'Morning', 'Afternoon', 'Evening']
ActivityData['timeCategory'] = pd.cut(ActivityData['hour'], bins=bins, labels=labels, right=False)
ActivityData.drop(columns=['hour'], inplace=True)

print(ActivityData)

                   timestamp  activityInference userId       date      time  \
16829518 2013-04-20 11:27:02                  0    u00 2013-04-20  11:27:02   
16947663 2013-05-07 00:45:32                  0    u00 2013-05-07  00:45:32   
16947662 2013-05-07 00:45:29                  0    u00 2013-05-07  00:45:29   
16947661 2013-05-07 00:45:27                  0    u00 2013-05-07  00:45:27   
16947660 2013-05-07 00:45:24                  0    u00 2013-05-07  00:45:24   
...                      ...                ...    ...        ...       ...   
20799618 2013-05-04 22:51:44                  0    u59 2013-05-04  22:51:44   
20799619 2013-05-04 22:51:46                  0    u59 2013-05-04  22:51:46   
20799620 2013-05-04 22:51:48                  0    u59 2013-05-04  22:51:48   
20799611 2013-05-04 22:51:29                  0    u59 2013-05-04  22:51:29   
20332065 2013-04-22 10:52:45                  0    u59 2013-04-22  10:52:45   

          week  timeCategory  
16829518    16      

In [33]:
ActivityData = ActivityData.sort_values(by='week')

In [34]:
weeks = ActivityData['week'].unique()
# print(weeks)
# print(len(weeks))
# print(weeks[9])
dfs = []

for week in weeks[:-1]:
    twoWeekData = ActivityData[(ActivityData['week'] == week) | (ActivityData['week'] == week+1)]
    # print(twoWeekData['week'].unique())
    grouped = twoWeekData.groupby(['userId','timeCategory', 'activityInference']).size().reset_index(name='count')
    # print(grouped)
    # Pivot the grouped DataFrame
    pivot_df = grouped.pivot_table(index='userId', columns=['timeCategory', 'activityInference'], values='count').fillna(0)

    #  Concatenate 'timeCategory' and 'activityInference' to create column names
    pivot_df.columns = [f"Activity{category}{inference}" for category, inference in pivot_df.columns]

    # Reset index to make 'userId' a column
    pivot_df.reset_index(inplace=True)
    pivot_df['WeekId']= week - weeks[0]
    # print(pivot_df)
    dfs.append(pivot_df)

result_df = pd.concat(dfs)
result_df.reset_index(inplace=True)


In [35]:
result_df = result_df.drop(columns=['index'])
print(result_df)

    userId  ActivityEarlyMorning0  ActivityEarlyMorning1  \
0      u00                21381.0                  709.0   
1      u01                21141.0                  894.0   
2      u02                20705.0                 1075.0   
3      u03                22004.0                    4.0   
4      u04                20595.0                 1490.0   
..     ...                    ...                    ...   
426    u54                 3840.0                   57.0   
427    u56                24619.0                  725.0   
428    u57                20742.0                 2419.0   
429    u58                19235.0                  856.0   
430    u59               105130.0                 5717.0   

     ActivityEarlyMorning2  ActivityEarlyMorning3  ActivityMorning0  \
0                    225.0                  560.0           21806.0   
1                    146.0                  462.0           24970.0   
2                     26.0                  889.0           25012.

In [36]:
result_df.to_csv('../dataset/BasicFeatures/activityCounts.csv', index=False)