In [None]:
import cdt
import dcct

from importlib import reload
from joblib import Parallel, delayed
import numpy as np
import numpy.random as npr
import pandas as pd
import sqlite3

In [None]:
db_path = f'{cdt.base_path}/data/study_data.db'
smbg_data_path = 'smbg_profile_data_formatted'
output_path = 'time_series_data'

## Load the motifs

In [None]:
omega = dcct.load_rtss_set(f'{cdt.base_path}/omega/RTSS_sets/omega_tss_24hr.json')

In [None]:
motifs = omega['RTSSs']

## Load motif index-daily CGM profile information

In [None]:
dp_df = pd.read_csv('../../dp_classification/pmill_runs/2022_02_09/dp_data/all_dp_data.csv')
dp_df = dp_df[['Motif_Idx', 'SID', 'Date']].set_index('Motif_Idx')
dp_df.head(3)

In [None]:
def name_change(k):
    if 'DIAMONDT1D' in k:
        return 'DIA1-' + k.split('-')[1]
    if 'DIAMONDT2D' in k:
        return 'DIA2-' + k.split('-')[1]
    return k


dp_df['SID'] = dp_df['SID'].apply(lambda k: name_change(k))

In [None]:
dp_df['Key'] = dp_df.apply(lambda row: (row['SID'], row['Date']), axis=1)

In [None]:
motif_dp_indices = {}
for motif_idx in range(483):   
    motif_dp_df = dp_df.loc[motif_idx][['Key']]
    
    if motif_dp_df.shape[0] < 2:
        motif_dp_indices[motif_idx] = sorted(motif_dp_df[['Key']].values)
    else:
        motif_dp_indices[motif_idx] = sorted(motif_dp_df['Key'])

In [None]:
del dp_df

### Generate CGM data

In [None]:
df = pd.read_csv(f'{cdt.base_path}/Study_Data/dcct/master_c.csv')
pat_arm_map = df.set_index('MASK_PAT')['GROUP']
print(pat_arm_map.head(3))

intensive_PIDs = pat_arm_map.loc[pat_arm_map == 'Intensive'].index
print(f'Number of intensive-therapy patients: {len(intensive_PIDs)}')

conventional_PIDs = pat_arm_map.loc[pat_arm_map == 'Conventional'].index
print(f'Number of conventional-therapy patients: {len(conventional_PIDs)}')

In [None]:
cdt.create_clean_directory(output_path)

seed_seq = npr.SeedSequence(62812474)

PID_seeds = seed_seq.spawn(len(intensive_PIDs) + len(conventional_PIDs))
PID_rng_streams = [npr.default_rng(s) for s in PID_seeds]

parallel_stats = Parallel(n_jobs=5)(
                    delayed(dcct.generate_CGM_data)(PID,
                                                    PID_rng_streams[PID - 1],
                                                    smbg_data_path,
                                                    db_path,
                                                    output_path)
                    for PID in intensive_PIDs)

parallel_stats = Parallel(n_jobs=5)(
                    delayed(dcct.generate_CGM_data)(PID,
                                                    PID_rng_streams[PID - 1],
                                                    smbg_data_path,
                                                    db_path,
                                                    output_path)
                    for PID in conventional_PIDs)