In [None]:
from activity_tracker import utils
import sqlite3
import pandas as pd
import pathlib

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

data_dir = pathlib.Path("../data")
db_path = data_dir / "sqlite" / "analysis.db"
conn = sqlite3.connect(db_path)

In [None]:
df_subject = pd.read_sql_query("SELECT * FROM subject", conn)
df_visit = pd.read_sql_query("SELECT * FROM visit", conn)
df_measurement = pd.read_sql_query("SELECT * FROM daily_measurement", conn)

In [None]:
start_col = df_measurement.columns.get_loc("percentage_wear_time") + 1
right_cols = df_measurement.columns[start_col:]

# Drop rows where all right-side values are NaN
filtered_df = df_measurement[~df_measurement[right_cols].isna().all(axis=1)].reset_index(drop=True)
filtered_df

In [None]:
len(df_measurement["subject_id"].unique())
len(filtered_df["subject_id"].unique())

In [None]:
len(df_visit["subject_id"].unique())

In [None]:
df_subject

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")
fig, axes = plt.subplots(3, 3, figsize=(12, 12), dpi=100)
cat_vars = [
    ("sex", "Sex Distribution"),
    ("ethnicity", "Ethnicity Distribution"),
    ("race", "Race Distribution"),
    ("marital_status", "Marital Status"),
    ("living_situation", "Living Situation"),
    ("group", "Study Group"),
]

for idx, (col, title) in enumerate(cat_vars):
    ax = axes[idx // 3, idx % 3]
    counts = df_subject[col].value_counts()
    sns.barplot(x=counts.index, y=counts.values, ax=ax, palette="Set2")
    ax.set_title(title)
    ax.set_ylabel("Count")
    ax.set_xlabel("")
    if col in ["marital_status", "living_situation"]:
        ax.tick_params(axis='x', rotation=45)
    else:
        ax.tick_params(axis='x', rotation=0)
    for p in ax.patches:
        ax.annotate(f'{int(p.get_height())}', (p.get_x() + p.get_width() / 2., p.get_height()), 
                    ha='center', va='bottom')
    sns.despine(ax=ax)

sns.histplot(df_subject["age"].dropna(), bins=15, kde=True, ax=axes[2, 0], color="#4c72b0")
axes[2, 0].set_title("Age")
axes[2, 0].set_xlabel("Age")
sns.despine(ax=axes[2, 0])

sns.histplot(df_subject["monthly_income"].dropna(), bins=15, kde=True, ax=axes[2, 1], color="#dd8452")
axes[2, 1].set_title("Monthly Income")
axes[2, 1].set_xlabel("Monthly Income")
sns.despine(ax=axes[2, 1])

sns.histplot(df_subject["education"].dropna(), bins=10, kde=False, ax=axes[2, 2], color="#55a868")
axes[2, 2].set_title("Education Level")
axes[2, 2].set_xlabel("Years of Education")
sns.despine(ax=axes[2, 2])

plt.tight_layout()
plt.show()

In [None]:
cursor = conn.cursor()
cursor.execute("""
SELECT COUNT(subject_id), "group"
FROM subject
GROUP BY "group";
""")
results = cursor.fetchall()
results