In [1]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

In [2]:
def generate_dates(start_date, num_days, num_dates):
    dates = [start_date + timedelta(days=random.randint(0, num_days)) for _ in range(num_dates)]
    return dates

In [3]:
def generate_rs_data(num_patients):
    rs_data = []
    for i in range(1, num_patients + 1):
        patient_id = f"PAT{i:03d}"
        for visit in range(1, 6):  # Assuming 5 visits per patient
            date = generate_dates(datetime(2023, 1, 1), 365, 1)[0]
            rs_data.append({
                "USUBJID": patient_id,
                "RSSEQ": visit,
                "RSDTC": date.strftime("%Y-%m-%d"),
                "RSDY": (date - datetime(2023, 1, 1)).days,
                "RSORRES": random.choice(["CR", "PR", "SD", "PD"]),
                "RSSTRESC": random.choice(["Complete Response", "Partial Response", "Stable Disease", "Progressive Disease"]),
            })
    return pd.DataFrame(rs_data)

In [4]:
def generate_tu_data(num_patients):
    tu_data = []
    for i in range(1, num_patients + 1):
        patient_id = f"PAT{i:03d}"
        num_tumors = random.randint(1, 5)  # Each patient can have 1 to 5 tumors
        for tumor in range(1, num_tumors + 1):
            tu_data.append({
                "USUBJID": patient_id,
                "TUSEQ": tumor,
                "TULOC": random.choice(["Lung", "Liver", "Brain", "Bone"]),
                "TUMETHOD": random.choice(["CT Scan", "MRI", "Ultrasound"]),
            })
    return pd.DataFrame(tu_data)

In [5]:
def generate_tr_data(num_patients):
    tr_data = []
    for i in range(1, num_patients + 1):
        patient_id = f"PAT{i:03d}"
        num_tumors = random.randint(1, 5)  # Each patient can have 1 to 5 tumors
        for tumor in range(1, num_tumors + 1):
            for visit in range(1, 6):  # Assuming 5 visits per patient
                date = generate_dates(datetime(2023, 1, 1), 365, 1)[0]
                tr_data.append({
                    "USUBJID": patient_id,
                    "TRSEQ": visit,
                    "TRDTC": date.strftime("%Y-%m-%d"),
                    "TRDY": (date - datetime(2023, 1, 1)).days,
                    "TRLINKID": f"TUMOR{tumor:03d}",
                    "TRORRES": random.uniform(0.5, 10.0),  # Tumor size in cm
                    "TRSTRESC": random.uniform(0.5, 10.0),  # Tumor size in cm (standardized)
                })
    return pd.DataFrame(tr_data)


In [6]:
num_patients = 5
generate_rs_data(num_patients)

Unnamed: 0,USUBJID,RSSEQ,RSDTC,RSDY,RSORRES,RSSTRESC
0,PAT001,1,2023-06-25,175,PD,Complete Response
1,PAT001,2,2023-02-24,54,CR,Complete Response
2,PAT001,3,2023-12-05,338,CR,Complete Response
3,PAT001,4,2023-09-01,243,PD,Stable Disease
4,PAT001,5,2023-12-15,348,PD,Stable Disease
5,PAT002,1,2023-08-15,226,PD,Stable Disease
6,PAT002,2,2023-07-11,191,CR,Progressive Disease
7,PAT002,3,2023-11-12,315,CR,Partial Response
8,PAT002,4,2023-04-13,102,PD,Complete Response
9,PAT002,5,2023-03-01,59,PD,Progressive Disease


In [7]:
num_patients = 5
generate_tu_data(num_patients)

Unnamed: 0,USUBJID,TUSEQ,TULOC,TUMETHOD
0,PAT001,1,Bone,CT Scan
1,PAT001,2,Liver,CT Scan
2,PAT001,3,Bone,MRI
3,PAT002,1,Brain,CT Scan
4,PAT003,1,Liver,CT Scan
5,PAT003,2,Brain,Ultrasound
6,PAT003,3,Bone,Ultrasound
7,PAT003,4,Liver,Ultrasound
8,PAT004,1,Liver,MRI
9,PAT004,2,Lung,MRI


In [8]:
num_patients = 5
generate_tr_data(num_patients)

Unnamed: 0,USUBJID,TRSEQ,TRDTC,TRDY,TRLINKID,TRORRES,TRSTRESC
0,PAT001,1,2023-11-23,326,TUMOR001,4.429638,5.715001
1,PAT001,2,2023-09-25,267,TUMOR001,3.423087,3.314348
2,PAT001,3,2023-02-27,57,TUMOR001,9.496162,4.778881
3,PAT001,4,2023-11-10,313,TUMOR001,4.525736,8.578379
4,PAT001,5,2023-01-09,8,TUMOR001,8.991569,7.749790
...,...,...,...,...,...,...,...
70,PAT005,1,2023-09-23,265,TUMOR004,3.863206,8.856597
71,PAT005,2,2023-09-23,265,TUMOR004,2.726119,9.650541
72,PAT005,3,2023-08-17,228,TUMOR004,4.141048,7.212950
73,PAT005,4,2023-05-13,132,TUMOR004,9.694573,8.898449


In [None]:
def generate_sdtm_data(num_patients):
    rs_df = generate_rs_data(num_patients)
    tu_df = generate_tu_data(num_patients)
    tr_df = generate_tr_data(num_patients)
    
    rs_df.to_csv("RS.csv", index=False)
    tu_df.to_csv("TU.csv", index=False)
    tr_df.to_csv("TR.csv", index=False)
    
    print("SDTM data files generated: RS.csv, TU.csv, TR.csv")

# Specify the number of patients
num_patients = 100
generate_sdtm_data(num_patients)