In [272]:
import json
import pandas as pd

In [109]:
df = pd.read_csv('SIMSLD.csv')

In [110]:
df.head()

Unnamed: 0,Subject_ID,Time,Visit,SLD,dSLD,Target_PD,Non_Target_PD,New_Lesion_PD,Rebound_PD,Any_PD,trial_arm
0,1,77,2,36.291448,-74.382599,0,0,0,0,0,P
1,1,175,3,22.187976,-84.337956,0,0,0,0,0,P
2,1,245,4,18.62392,-86.853751,0,0,0,0,0,P
3,1,329,5,16.377856,-88.439202,0,0,0,0,0,P
4,1,413,6,15.104767,-89.33785,0,0,0,0,0,P


In [112]:
def subjects(df):
    subjects_df = pd.DataFrame()
    
    df['trial'] = 'phase 3'
    df['trial_internal_id'] = 20221104
    df['regimen'] = df['trial_arm']
    df['drug'] = df['trial_arm']
    df['trial_arm_type'] = df.apply(lambda row: 'active comparator' if row['trial_arm'] == 'P' 
                                    else 'experimental', axis = 1)

    subject_id = df.groupby(['Subject_ID'])
    for ind_id in subject_id.groups.keys():
        df_subject_id = subject_id.get_group(ind_id)[['Subject_ID', 'trial', 'trial_internal_id', 
                                        'trial_arm', 'regimen', 'drug', 'trial_arm_type']]
        subjects_df = pd.concat([subjects_df, 
                                 df_subject_id.groupby(['Subject_ID', 'trial_arm']).first().reset_index()], 
                               ignore_index = True)
    subjects_df = subjects_df.rename(columns = {'Subject_ID': 'individual_id'})
    subjects_df.to_csv('subjects.csv', index = False)
    

In [113]:
def biomarkers(df):
    biomarkers_df = pd.DataFrame()
    measuments = {
        'sld': 'mm',
        'dsld': 'mm/day',
        'target_pd': None,
        'non_target_pd': None,
        'new_lesion_pd': None,
        'rebound_pd': None,
        'any_pd': None
    }
    
    df = df.rename(columns = {'Subject_ID': 'individual_id'})
    df.columns= df.columns.str.lower()
    df['trial'] = 'phase 3'
    df['trial_day'] = df['time']
    df['visit_num'] = df['visit']
    df['visit_desc'] = df['visit'].astype(str)
    df['params'] = df.apply(lambda row: {'visit_desc': 'Visit {}'.format(str(row['visit_num']))}, axis = 1)
    
    ind_ids = df.groupby(['individual_id'])
    for ind_id in ind_ids.groups.keys():
        for measurement, unit in measuments.items():
            ind_df = ind_ids.get_group(ind_id)[['individual_id', 
                                               'trial', 
                                               'trial_day', 
                                               measurement, 
                                               'visit_num', 
                                               'visit_desc', 
                                               'params'
                                              ]]
            ind_df['measurement_name'] = measurement
            ind_df['measurement_unit'] = unit
            ind_df = ind_df.rename(columns = {measurement: 'measurement_value'})
            biomarkers_df = pd.concat([biomarkers_df, ind_df], ignore_index = True)
    biomarkers_df.to_csv('biomarkers.csv', index = False)   

In [114]:
subjects(df)
biomarkers(df)

In [275]:
def trials(df):
    trials = []
    
    # 1. get all unique trial name and id from df
    trial_infos = list(df[['trial', 'trial_internal_id']].value_counts().to_dict())
    for trial_info in trial_infos:
        trial = {}
        trial['name'] = trial_info[0]
        trial['internal_id'] = trial_info[1]
        trial['trial_arms'] = []
        trials.append(trial)
    
    
    # 2. for each trial name, group df by trial name
    for trial in trials:
        trial_arm_list = []
        
        trial_arms = df[df['trial'] == trial['name']].groupby('trial_arm')
        for trial_arm in trial_arms.groups.keys():
            trial_arm_type = trial_arms.get_group(trial_arm).groupby('trial_arm_type')['trial_arm_type'].first().item()
            trial_arm_dict = {}
            trial_arm_dict['name'] = trial_arm
            trial_arm_dict['type'] = trial_arm_type
            trial_arm_dict['params'] = {'type': trial_arm_type}
            trial_arm_dict['regimen'] = [{'name': trial_arm, 'treatments': [{'drug_id': 'get_drug_id()'}]}]
            trial_arm_list.append(trial_arm_dict)
        trial['trial_arms'] = trial_arm_list
    
    json_object = json.dumps(trials, indent = 4)
    with open("trials.json", "w") as outfile:
        outfile.write(json_object)

In [276]:
df = pd.read_csv('subjects.csv')
trials(df)

Unnamed: 0,individual_id,trial_arm,trial,trial_internal_id,regimen,drug,trial_arm_type
0,1,1.3,phase 3,20221104,1.3,1.3,experimental
1,1,1.6,phase 3,20221104,1.6,1.6,experimental
2,1,1.9,phase 3,20221104,1.9,1.9,experimental
3,1,P,phase 3,20221104,P,P,active comparator
4,2,1.3,phase 3,20221104,1.3,1.3,experimental
