In [54]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math
from statsmodels.graphics.tsaplots import plot_acf

# Read in the data
df = pd.read_csv('cleaned_data.csv')
#df

df.loc[df['id'] == 'AS14.01']

Unnamed: 0,date,id,mood,circumplex.arousal,circumplex.valence,activity,screen,call,sms,appCat.builtin,...,appCat.finance,appCat.game,appCat.office,appCat.other,appCat.social,appCat.travel,appCat.unknown,appCat.weather,appCat.utilities,MA
0,2014-03-21,AS14.01,6.2,0.2,0.2,0.13405,17978.907,6.0,0.0,3139.218,...,49.544,0.0,172.206,239.751,4508.5,915.445,0.0,0.0,598.754,0.13405
1,2014-03-22,AS14.01,6.4,0.6,0.4,0.23688,6142.161,3.0,1.0,731.429,...,21.076,0.0,0.0,98.143,439.632,37.305,0.0,0.0,117.621,0.190607
2,2014-03-23,AS14.01,6.8,0.2,0.8,0.142741,6773.832001,0.0,0.0,1286.246,...,43.403,0.0,0.0,72.823,900.839,0.0,0.0,30.386,30.086,0.171365
3,2014-03-24,AS14.01,6.0,0.8,0.0,0.078961,15047.351001,10.0,0.0,866.956,...,34.106,0.0,3.01,66.558,3223.626,419.805,0.0,0.0,178.732,0.140922
4,2014-03-25,AS14.01,6.75,0.5,0.5,0.098374,21475.354999,0.0,1.0,1032.768,...,43.054,0.0,0.0,178.819,1919.471,0.0,235.223,0.0,222.893,0.128708
5,2014-03-26,AS14.01,6.6,-0.2,0.6,0.101308,16423.801,0.0,0.0,1167.497,...,52.331,0.0,0.0,97.498,4592.059,0.0,0.0,0.0,33.365,0.121591
6,2014-03-27,AS14.01,7.0,0.2,0.8,0.159511,17442.149999,2.0,1.0,1229.327,...,42.219,0.0,182.451,58.532,935.381,47.314,0.0,0.0,179.029,0.130728
7,2014-03-28,AS14.01,6.4,-0.6,0.6,0.095698,4923.489,5.0,0.0,10062.595,...,89.166,233.036,0.0,225.951,512.741,1133.009,0.0,0.0,301.717,0.122759
8,2014-03-29,AS14.01,8.0,0.2,1.0,0.068203,8322.622,4.0,1.0,1952.63,...,0.0,0.0,0.0,169.594,472.888,52.435,0.0,0.0,600.637,0.110889
9,2014-03-30,AS14.01,7.5,-0.5,0.75,0.049093,4523.214001,0.0,0.0,414.365,...,0.0,0.0,0.0,74.003,167.685,0.0,66.477,0.0,38.296,0.097909


In [55]:
# Convert 'date' column to datetime type
df['date'] = pd.to_datetime(df['date'])

# Set 'id' as index
df.set_index('id', inplace=True)

# Find the earliest date for each 'id'
earliest_dates = df.groupby('id')['date'].min()

# Create a dictionary to store the date ranges for each 'id'
date_ranges = {}

# Loop through each 'id' and generate date ranges for each 5-day period
for id, earliest_date in earliest_dates.items():
    date_range = pd.date_range(start=earliest_date, freq='5D', closed='left', periods=(len(df) // 5) + 1)
    date_ranges[id] = date_range

# Flatten the list of date ranges
all_dates = pd.to_datetime([date for date_range in date_ranges.values() for date in date_range])

# Map dates to corresponding period values using searchsorted
df['period'] = np.searchsorted(all_dates, df['date']) + 1

# Define custom aggregation functions
agg_dict = {'mood': 'last', 'circumplex.arousal': 'mean', 'circumplex.valence': 'mean', 'activity': 'mean', 'screen': 'sum', 'call': 'sum', 'sms': 'sum', 'appCat.builtin': 'sum', 'appCat.communication': 'sum', 'appCat.entertainment': 'sum', 'appCat.finance': 'sum', 'appCat.game': 'sum', 'appCat.office': 'sum', 'appCat.other': 'sum', 'appCat.social': 'sum', 'appCat.travel': 'sum', 'appCat.unknown': 'sum', 'appCat.weather': 'sum', 'appCat.utilities': 'sum'}

# Group by 'id' and 'period', and apply the aggregation functions
df = df.groupby(['id', 'period']).agg(agg_dict)

# Reset index
df.reset_index(inplace=True)

# Print the result
df


Unnamed: 0,id,period,mood,circumplex.arousal,circumplex.valence,activity,screen,call,sms,appCat.builtin,...,appCat.entertainment,appCat.finance,appCat.game,appCat.office,appCat.other,appCat.social,appCat.travel,appCat.unknown,appCat.weather,appCat.utilities
0,AS14.01,1,6.20,0.200000,0.200000,0.134050,17978.907000,6.0,0.0,3139.218,...,1007.456,49.544,0.000,172.206,239.751,4508.500,915.445,0.000,0.000,598.754
1,AS14.01,2,6.60,0.380000,0.460000,0.131653,65862.500001,13.0,2.0,5084.896,...,2143.326,193.970,0.000,3.010,513.841,11075.627,457.110,235.223,30.386,582.697
2,AS14.01,3,7.40,-0.140000,0.750000,0.088602,47048.309000,17.0,2.0,16620.322,...,6566.768,172.640,284.212,182.451,796.634,3439.350,1416.139,95.767,0.000,1329.984
3,AS14.01,4,6.80,-0.190000,0.300000,0.103058,75654.094003,11.0,1.0,24438.349,...,5289.240,871.837,234.741,182.869,1134.567,14072.505,1979.889,0.000,0.000,1289.427
4,AS14.01,5,7.40,-0.570000,0.510000,0.077951,55269.223002,17.0,0.0,8722.700,...,6900.477,65.259,115.465,40.302,953.555,8453.066,60.247,0.000,0.000,1243.249
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,AS14.33,12,6.00,-0.116667,0.313333,0.060332,59691.757998,9.0,2.0,7511.488,...,3898.637,0.000,0.000,0.000,566.346,22361.588,237.870,2.007,0.000,64.356
262,AS14.33,13,6.25,-0.130000,0.390000,0.022743,46369.721001,13.0,1.0,6711.250,...,2839.927,0.000,0.000,0.000,314.390,24230.355,0.000,2.010,0.000,40.459
263,AS14.33,14,6.25,-0.470000,0.106667,0.045012,39257.282000,9.0,2.0,6027.681,...,3691.500,0.000,0.000,0.000,345.870,23037.320,0.000,0.000,0.000,105.820
264,AS14.33,15,6.80,-0.626667,0.440000,0.118693,50402.922997,30.0,7.0,7770.240,...,1825.127,0.000,0.000,488.439,990.281,17769.170,1053.587,8.072,0.000,322.863


In [56]:
# df.loc['AS14.01']
# result.loc['AS14.01']
df.loc[df['id'] == 'AS14.01']

Unnamed: 0,id,period,mood,circumplex.arousal,circumplex.valence,activity,screen,call,sms,appCat.builtin,...,appCat.entertainment,appCat.finance,appCat.game,appCat.office,appCat.other,appCat.social,appCat.travel,appCat.unknown,appCat.weather,appCat.utilities
0,AS14.01,1,6.2,0.2,0.2,0.13405,17978.907,6.0,0.0,3139.218,...,1007.456,49.544,0.0,172.206,239.751,4508.5,915.445,0.0,0.0,598.754
1,AS14.01,2,6.6,0.38,0.46,0.131653,65862.500001,13.0,2.0,5084.896,...,2143.326,193.97,0.0,3.01,513.841,11075.627,457.11,235.223,30.386,582.697
2,AS14.01,3,7.4,-0.14,0.75,0.088602,47048.309,17.0,2.0,16620.322,...,6566.768,172.64,284.212,182.451,796.634,3439.35,1416.139,95.767,0.0,1329.984
3,AS14.01,4,6.8,-0.19,0.3,0.103058,75654.094003,11.0,1.0,24438.349,...,5289.24,871.837,234.741,182.869,1134.567,14072.505,1979.889,0.0,0.0,1289.427
4,AS14.01,5,7.4,-0.57,0.51,0.077951,55269.223002,17.0,0.0,8722.7,...,6900.477,65.259,115.465,40.302,953.555,8453.066,60.247,0.0,0.0,1243.249
5,AS14.01,6,6.4,-0.88,0.79,0.068051,55854.076998,9.0,2.0,6676.829,...,11622.09,278.872,0.0,0.0,480.205,8245.936,76.732,0.0,0.0,1832.847
6,AS14.01,7,7.25,-0.23,0.7,0.088262,62668.45,6.0,3.0,5179.988,...,2540.095,225.398,29.084,0.0,208.02,8648.334,192.764,0.0,0.0,788.816
7,AS14.01,8,7.2,-0.08,0.96,0.079984,69919.020998,8.0,1.0,9069.912,...,6639.131,320.778,0.0,0.0,264.387,7294.348,182.778,0.0,0.0,1945.212
8,AS14.01,9,7.8,-0.32,0.85,0.103643,49039.307997,6.0,4.0,9602.71,...,5251.61,867.542,333.305,0.0,1401.728,5320.431,785.553,3.148,0.0,1279.025
9,AS14.01,10,7.709987,-0.323551,0.938835,0.066036,46469.435999,7.0,5.0,5621.591,...,4832.624,2687.165,0.0,28.098,152.967,6415.591,143.631,30.102,0.0,670.588


In [59]:
# Still need to remove the first column

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


In [60]:
result.loc[result['id'] == 'AS14.01']

Unnamed: 0,id,period,mood,circumplex.arousal,circumplex.valence,activity,screen,call,sms,appCat.builtin,...,appCat.entertainment,appCat.finance,appCat.game,appCat.office,appCat.other,appCat.social,appCat.travel,appCat.unknown,appCat.weather,appCat.utilities
0,AS14.01,1,6.2,0.2,0.2,0.13405,17978.907,6.0,0.0,3139.218,...,1007.456,49.544,0.0,172.206,239.751,4508.5,915.445,0.0,0.0,598.754
1,AS14.01,2,6.6,0.38,0.46,0.131653,65862.500001,13.0,2.0,5084.896,...,2143.326,193.97,0.0,3.01,513.841,11075.627,457.11,235.223,30.386,582.697
2,AS14.01,3,7.4,-0.14,0.75,0.088602,47048.309,17.0,2.0,16620.322,...,6566.768,172.64,284.212,182.451,796.634,3439.35,1416.139,95.767,0.0,1329.984
3,AS14.01,4,6.8,-0.19,0.3,0.103058,75654.094003,11.0,1.0,24438.349,...,5289.24,871.837,234.741,182.869,1134.567,14072.505,1979.889,0.0,0.0,1289.427
4,AS14.01,5,7.4,-0.57,0.51,0.077951,55269.223002,17.0,0.0,8722.7,...,6900.477,65.259,115.465,40.302,953.555,8453.066,60.247,0.0,0.0,1243.249
5,AS14.01,6,6.4,-0.88,0.79,0.068051,55854.076998,9.0,2.0,6676.829,...,11622.09,278.872,0.0,0.0,480.205,8245.936,76.732,0.0,0.0,1832.847
6,AS14.01,7,7.25,-0.23,0.7,0.088262,62668.45,6.0,3.0,5179.988,...,2540.095,225.398,29.084,0.0,208.02,8648.334,192.764,0.0,0.0,788.816
7,AS14.01,8,7.2,-0.08,0.96,0.079984,69919.020998,8.0,1.0,9069.912,...,6639.131,320.778,0.0,0.0,264.387,7294.348,182.778,0.0,0.0,1945.212
8,AS14.01,9,7.8,-0.32,0.85,0.103643,49039.307997,6.0,4.0,9602.71,...,5251.61,867.542,333.305,0.0,1401.728,5320.431,785.553,3.148,0.0,1279.025
9,AS14.01,10,7.709987,-0.323551,0.938835,0.066036,46469.435999,7.0,5.0,5621.591,...,4832.624,2687.165,0.0,28.098,152.967,6415.591,143.631,30.102,0.0,670.588
