In [1]:
import sys
from pathlib import Path

# Project root
PROJECT_ROOT = Path("..").resolve()

# Make src importable
sys.path.append(str(PROJECT_ROOT))

# Data paths
RAW_DATA = PROJECT_ROOT / "data" / "raw"
SCORES_PATH = PROJECT_ROOT / "data" / "scores.csv"


In [2]:
from pathlib import Path
from src.utils import load_all_subjects, load_scores

df = load_all_subjects(RAW_DATA)
scores = load_scores(SCORES_PATH)

print("Total subjects:", df["participant_id"].nunique())
print(df.groupby("group")["participant_id"].nunique())

print("\nFirst rows:")
df.head()


Total subjects: 55
group
condition    23
control      32
Name: participant_id, dtype: int64

First rows:


Unnamed: 0,timestamp,activity,participant_id,group
0,2003-05-07 12:00:00,0,condition_1,condition
1,2003-05-07 12:01:00,143,condition_1,condition
2,2003-05-07 12:02:00,0,condition_1,condition
3,2003-05-07 12:03:00,20,condition_1,condition
4,2003-05-07 12:04:00,166,condition_1,condition


In [3]:
from src.windowing import add_day_column, get_unique_days

# Pick one subject to inspect
pid = df["participant_id"].iloc[0]
df_one = df[df["participant_id"] == pid]

df_one = add_day_column(df_one)
days = get_unique_days(df_one)

print("Participant:", pid)
print("Number of unique days:", len(days))
print("First 5 days:")
print(days.head())

# Check minutes per day
counts = df_one.groupby("day")["timestamp"].count()
print("\nMinutes per day (first 5):")
print(counts.head())

Participant: condition_1
Number of unique days: 17
First 5 days:
0   2003-05-07
1   2003-05-08
2   2003-05-09
3   2003-05-10
4   2003-05-11
dtype: datetime64[ns]

Minutes per day (first 5):
day
2003-05-07     720
2003-05-08    1440
2003-05-09    1440
2003-05-10    1440
2003-05-11    1440
Name: timestamp, dtype: int64


In [4]:
from src.windowing import daily_minute_counts, is_day_complete

counts = daily_minute_counts(df_one)

print("Daily minute counts (first 5):")
print(counts.head())

print("\nCompleteness check (first 5 days):")
for day, count in counts.head().items():
    print(day.date(), count, "complete?" , is_day_complete(count))

Daily minute counts (first 5):
day
2003-05-07     720
2003-05-08    1440
2003-05-09    1440
2003-05-10    1440
2003-05-11    1440
Name: timestamp, dtype: int64

Completeness check (first 5 days):
2003-05-07 720 complete? False
2003-05-08 1440 complete? True
2003-05-09 1440 complete? True
2003-05-10 1440 complete? True
2003-05-11 1440 complete? True
