# Generation of Random Dailies Example Data

In [2]:
dailies_columns = [
    "User Id",
    "User First Name",
    "User Last Name",
    "User Email",
    "Team Names",
    "Group Names",
    "Calendar Date (Local)",
    "Start Time (Local)",
    "End Time (Local)",
    "Time Zone (Local)",
    "Calendar Date (UTC)",
    "Start Time (UTC)",
    "End Time (UTC)",
    "Start Time (s)",
    "Time Zone (s)",
    "Duration (s)",
    "Summary Id",
    "Activity Type",
    "Steps",
    "Distance  (m)",
    "Moderate Intensity Duration (s)",
    "Vigorous Intensity Duration (s)",
    "Floors Climbed",
    "Heart Rate (min bpm)",
    "Heart Rate (avg bpm)",
    "Heart Rate (max bpm)",
    "Stress Level (avg)",
    "Stress Level (max)",
    "Stress Duration (s)",
    "Rest Stress Duration (s)",
    "Activity Stress Duration (s)",
    "Low Stress Duration (s)",
    "Medium Stress Duration (s)",
    "High Stress Duration (s)",
    "Stress Qualifier",
    "Steps Goal",
    "Net Kilocalories Goal",
    "Intensity Duration Goal (s)",
    "Floors Climbed Gloal",
]

In [14]:
import pandas as pd
import datetime as dt
import numpy as np

study_start_time = dt.datetime(2022, 6, 1)
study_end_time = dt.datetime(2023, 11, 25)

dailies_ids = pd.read_csv("dailies_id_baseline.csv")

# Define timezones (example: UTC offset -5 to +5)
timezones = [f"UTC{offset:+}" for offset in range(-5, 6)]

# Function to generate clustered activity days
def generate_active_days(start, end, prob_user_inactive):
    total_days = (end - start).days
    active_days = []
    prob_active = 0.5 * prob_user_inactive  # Initial probability of an active day
    for day in range(total_days):
        if np.random.rand() < prob_active:
            active_days.append(start + dt.timedelta(days=day))
            prob_active = 0.8 * (prob_user_inactive)  # Increase likelihood of next day being active
        else:
            prob_active = 0.2 * (1 - prob_user_inactive) # Reduce likelihood to create clusters
    return active_days

records = []

for _, row in dailies_ids.iterrows():
    user_id = row["User Id"]
    user_last_name = row["User Last Name"]
    
    active_days = generate_active_days(study_start_time, study_end_time, np.random.uniform(0, 0.15, 1))
    
    for day in active_days:
        # Generate random start and end times within the day
        start_hour = np.random.randint(5, 20)  # Activities occur between 5 AM - 8 PM
        start_minute = np.random.randint(0, 60)
        duration = np.random.randint(1800, 10800)  # 30 min to 3 hours
        end_time = day + dt.timedelta(seconds=duration)
        timezone = np.random.choice(timezones)
        
        # Convert to UTC assuming random offset (-5 to +5 hours)
        utc_offset = int(timezone.replace("UTC", ""))
        start_time_utc = day + dt.timedelta(hours=-utc_offset)
        end_time_utc = end_time + dt.timedelta(hours=-utc_offset)

        record = {
            key: np.nan for key in dailies_columns
        }
        record["User Id"] = user_id
        record["User Last Name"] = user_last_name
        record["Calendar Date (Local)"] = day.strftime('%Y-%m-%d')
        record["Start Time (Local)"] = day.strftime('%Y-%m-%dT%H:%M:%S')
        record["End Time (Local)"] = end_time.strftime('%Y-%m-%dT%H:%M:%S')
        record["Time Zone (Local)"] = timezone
        record["Calendar Date (UTC)"] = start_time_utc.strftime('%Y-%m-%d')
        record["Start Time (UTC)"] = start_time_utc.strftime('%Y-%m-%dT%H:%M:%S')
        record["End Time (UTC)"] = end_time_utc.strftime('%Y-%m-%dT%H:%M:%S')
        record["Start Time (s)"] = int(start_time_utc.timestamp())
        record["Time Zone (s)"] = timezone
        record["Duration (s)"] = duration

        records.append(record)

df = pd.DataFrame(records)
df.to_excel("generated_dailies.xlsx", index=False)