### Import data into database

In [None]:
from activity_tracker import utils
import sqlite3
import pandas as pd
import pathlib
import re

# Import raw data into raw schema
data_dir = pathlib.Path("../data/raw/")
db_path = "../data/sqlite/activity_tracker.db"
excluded_files = [
    "fitbitCoreTemperature_merged.csv",
    "fitbitSkinTemperature_merged.csv",
    "weightLogInfo_merged.csv",
    "mde_clinical_data.csv"
]

In [None]:
def camel_to_snake(name: str) -> str:
    name = name.replace("_merged", "")
    return re.sub(r'(?<=[a-z0-9])([A-Z])', r'_\1', name).lower()

conn = sqlite3.connect(db_path)

for file_path in data_dir.glob("*.csv"):
    if file_path.name in excluded_files:
        continue

    table_name = camel_to_snake(file_path.stem)
    df = pd.read_csv(file_path)
    try:
        df.to_sql(table_name, conn, if_exists="fail", index=False)
        print(f"Table {table_name} created with {len(df)} rows.")
    except ValueError:
        print(f"Skipping {table_name} — table already exists.")
conn.close()

### Create a subject table

In [None]:
# Load Excel file
excel_path = pathlib.Path("../data/raw/MDE clinical data.xlsx")
xls = pd.ExcelFile(excel_path)
df_control = pd.read_excel(xls, sheet_name=xls.sheet_names[0])
df_exercise = pd.read_excel(xls, sheet_name=xls.sheet_names[1])

# Add group labels
df_control["group"] = "control"
df_exercise["group"] = "exercise"
df_combined = pd.concat([df_control, df_exercise], ignore_index=True)

subject_columns = [
    "Participant ID", "group", "Sex", "Age", "Eth", "Race",
    "Mth Inc", "Educ", "Mari", "Liv Sit"
]
df_subject = df_combined[subject_columns].copy()

df_subject.columns = (
    df_subject.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_")
    .str.replace(".", "_")
)

# Rename columns
rename_map = {
    "participant_id": "subject_id",
    "eth": "ethnicity",
    "mth_inc": "monthly_income",
    "educ": "education",
    "mari": "marital_status",
    "liv_sit": "living_situation"
}
df_subject = df_subject.rename(columns=rename_map)
df_subject = df_subject.sort_values(by="subject_id").reset_index(drop=True)

excel_path = pathlib.Path("../data/raw/MDE clinical data.xlsx")
data_dictionary = utils.load_data_dictionary(excel_path)

# Apply data dictionary to map integer values to strings
for column in data_dictionary:
    if column in df_subject.columns:
        df_subject[column] = df_subject[column].map(data_dictionary[column])
        
conn = sqlite3.connect(db_path)
df_subject.to_sql("subject", conn, if_exists="replace", index=False)

conn.commit()
conn.close()