In [1]:
from pathlib import Path
import polars as pl
from cogmood_analysis.load import load_task, boxcoxmask, nanboxcox, load_survey, proc_survey
import cogmood_analysis.survey_helpers as sh
from scipy.stats import boxcox
import seaborn as sns
from matplotlib import pyplot as plt
import numpy as np
from numpy.typing import ArrayLike, NDArray
from joblib import Parallel, delayed
from datetime import datetime
import json
pl.Config(tbl_rows=300)
pl.Config(tbl_cols=300)

<polars.config.Config at 0x11b28ed50>

In [2]:
data_dir = Path('../data/')
survey_dir = data_dir / 'survey'
complete_dir = survey_dir / 'complete'
ongoing_dir = survey_dir / 'ongoing'
start_time = pl.Series(['2025-07-11 00:00:00']).str.to_datetime()[0]

In [3]:
bad_subids = [
    'no0z2yzyloa58hcsb5cyxxwz',
    'jvj53cg6gm44jattfxws849e',
    'fqebjziam9e7e9vnpzqghiv9',
    'lwk7rgfebcajlfttz1f3euzs',
    'b1c6cj5oy3wv9sh4qyj379s9',
    'in6dp60i65swuwbnbyjz8m6i',
    's7qczd3ccbwvvv54rkg2s3xh',
    '3y3tn37wv2libdutqxbcat3d',
    'p1h1eval1q08k2beesprnfwq'
]

In [4]:
survey_resps = [load_survey(sr_path) for sr_path in complete_dir.glob("*.json")]
srdf = proc_survey(survey_resps)


In [5]:
srdf = srdf.filter(pl.col('survey_date')>start_time).filter(~pl.col('sub_id').is_in(bad_subids))

In [6]:
group_order = ['hv', 'dep', 'anx', 'atn', 'dep_anx', 'dep_atn', 'anx_atn', 'dep_anx_atn', 'othermh']
order_mapping = {val: i for i, val in enumerate(group_order)}

In [8]:
srdf.select('age').max(), srdf.select('age').min()

(shape: (1, 1)
 ┌──────┐
 │ age  │
 │ ---  │
 │ f64  │
 ╞══════╡
 │ 84.0 │
 └──────┘,
 shape: (1, 1)
 ┌──────┐
 │ age  │
 │ ---  │
 │ f64  │
 ╞══════╡
 │ 18.0 │
 └──────┘)

In [9]:
srdf.group_by('screen_group').agg([
    pl.n_unique('sub_id').alias('n')
    ]).sort(pl.col("screen_group").replace(order_mapping)), 'foo'

(shape: (9, 2)
 ┌──────────────┬──────┐
 │ screen_group ┆ n    │
 │ ---          ┆ ---  │
 │ str          ┆ u32  │
 ╞══════════════╪══════╡
 │ hv           ┆ 1039 │
 │ dep          ┆ 153  │
 │ anx          ┆ 718  │
 │ atn          ┆ 105  │
 │ dep_anx      ┆ 943  │
 │ dep_atn      ┆ 47   │
 │ anx_atn      ┆ 231  │
 │ dep_anx_atn  ┆ 807  │
 │ othermh      ┆ 25   │
 └──────────────┴──────┘,
 'foo')

In [10]:
srdf = srdf.with_columns(
    pl.when(pl.col('sub_id') == '80mpipxnvluq0stsd3o9ln23')
    .then(pl.col('fried').str.slice(25))
    .otherwise(pl.col('fried'))
    .alias('fried')
).with_columns(
    pl.when(pl.col('sub_id') == 'svte0ghvvjo62vbd7xohqkfr')
    .then(pl.lit("Dysfunctional family situation and abuse by authority figures"))
    .otherwise(pl.col('fried'))
    .alias('fried')
).with_columns(
    pl.when(pl.col('sub_id') == 'e4wyi6h1clyi69oxty3egh91')
    .then(pl.col('fried').str.slice(0,279))
    .otherwise(pl.col('fried'))
    .alias('fried')
)

In [14]:
srdf.write_csv(survey_dir / 'survey_responses.csv')