In [1]:
#To remove all variables from the namespace
%reset -f

#Creating a log file to record the commands and outputs
%logstop
%logstart -t -o "E:/Python Clinical Course/ADAE log.txt"

Logging hadn't been started.
Activating auto-logging. Current session state plus future input saved.
Filename       : E:/Python Clinical Course/ADAE log.txt
Mode           : backup
Output logging : True
Raw input log  : False
Timestamping   : True
State          : active


In [2]:
import pandas as pd
import numpy as np
import os
import pyreadstat
from datetime import datetime
from pathlib import Path

# Define paths (adjust these to your actual paths)
adam_path = r"E:\Python Clinical Course\ADAM datasets\ADaM Datasets"
sdtm_path = r"E:\Python Clinical Course\SDTM"
raw_path = r"E:\Python Clinical Course\RAW"

In [3]:
sdtm_datasets = {}

for file in os.listdir(sdtm_path):
    if file.endswith(".sas7bdat"):
        dataset_name = file.replace(".sas7bdat", "")
        file_path = os.path.join(sdtm_path, file)
        df, meta = pyreadstat.read_sas7bdat(file_path)
        sdtm_datasets[dataset_name] = df

In [4]:

# 1) Read the main SDTM dataset
ae1 = sdtm_datasets.get("ae")
ae1 = ae1.drop(columns=['ARM', 'ACTARM'], errors='ignore')
ae1 = ae1.sort_values(['USUBJID', 'AESEQ']).reset_index(drop=True)

In [5]:
sdtm_suppae = sdtm_datasets.get("suppae")
sdtm_suppae["AESEQ"] = pd.to_numeric(sdtm_suppae["IDVARVAL"], errors='coerce')
suppae = sdtm_suppae.sort_values(by=["USUBJID", "AESEQ"])

suppae_trans = suppae.pivot_table(
    index=["USUBJID", "AESEQ"],
    columns="QNAM",
    values="QVAL",
    aggfunc="first"
).reset_index()


In [6]:
adam_datasets = {}

for file in os.listdir(adam_path):
    if file.endswith(".sas7bdat"):
        dataset_name = file.replace(".sas7bdat", "")
        file_path = os.path.join(adam_path, file)
        df, meta = pyreadstat.read_sas7bdat(file_path)
        adam_datasets[dataset_name] = df

In [7]:
# 4) Merge with ADSL
adsl = adam_datasets.get("adsl")

In [8]:

# Merge AE, SUPPAE, ADSL
ae2 = pd.merge(ae1, suppae_trans, on=["USUBJID", "AESEQ"], how="left")
ae3 = pd.merge(ae2, adsl, on="USUBJID", how="inner")


In [9]:

# Derive Severity, Relatedness, Action Taken
severity_map = {
    "MILD": (1, "Mild"),
    "MODERATE": (2, "Moderate"),
    "SEVERE": (3, "Severe"),
    "LIFE-THREATENING": (4, "Life-threatening")
}
ae3[["ASEVN", "ASEV"]] = ae3["AESEV"].apply(
    lambda x: severity_map.get(x, (np.nan, np.nan))
).apply(pd.Series)


In [10]:

rel_map = {
    "DEFINITELY RELATED": (1, "Definitely related"),
    "PROBABLY RELATED": (2, "Probably related"),
    "POSSIBLY RELATED": (3, "Possibly related"),
    "UNLIKELY RELATED": (4, "Unlikely related"),
    "NOT RELATED": (5, "Not related")
}
ae3[["ARELN", "AREL"]] = ae3["AEREL"].apply(
    lambda x: rel_map.get(x, (np.nan, np.nan))
).apply(pd.Series)

ae3["RELGR1"] = ae3["ARELN"].apply(lambda x: "Related" if x in [1, 2, 3] else "Not Related")


In [11]:

acn_map = {
    "NONE": (1, "None"),
    "INTERRUPTED": (2, "Interrupted"),
    "DISCONTINUED": (3, "Discontinued"),
    "DRUG WITHDRAWN": (3, "Discontinued"),
    "NOT APPLICABLE": (4, "Not Applicable"),
    "UNKNOWN": (5, "Unknown"),
    "DOSE NOT CHANGED": (6, "Dose not changed")
}
ae3[["AACNN", "AACN"]] = ae3["AEACN"].apply(
    lambda x: acn_map.get(x, (np.nan, np.nan))
).apply(pd.Series)


In [14]:
from pandas.tseries.offsets import MonthEnd 
def parse_date(dt_str, default_time):
    if pd.isna(dt_str):
        return pd.NaT, None, None
    
    parts = str(dt_str).split("T")
    date_part = parts[0]
    time_part = parts[1] if len(parts) > 1 and parts[1] else default_time

    dt_parts = date_part.split("-")
    try:
        if len(dt_parts) == 3:
            date = datetime.strptime(date_part, "%Y-%m-%d")
            dt_flag = ""  # Complete date
        elif len(dt_parts) == 2:
            date = datetime.strptime(f"{dt_parts[0]}-{dt_parts[1]}-01", "%Y-%m-%d")
            dt_flag = "D"  # Missing day
        elif len(dt_parts) == 1:
            date = datetime.strptime(f"{dt_parts[0]}-01-01", "%Y-%m-%d")
            dt_flag = "M"  # Missing month and day
        else:
            return pd.NaT, None, None
        return date, time_part, dt_flag
    except:
        return pd.NaT, None, None

# --- Apply to AESTDTC (Start Date)
ae3[['ASTDT', '_ast_time', 'ASTDTF']] = ae3['AESTDTC'].apply(lambda x: pd.Series(parse_date(x, "00:01")))

# --- Apply to AEENDTC (End Date)
ae3[['AENDT_raw', '_aen_time', 'AENDTF']] = ae3['AEENDTC'].apply(lambda x: pd.Series(parse_date(x, "23:59")))

# --- Adjust incomplete END date to last day of month
ae3['AENDT'] = np.where(
    ae3['AENDTF'] == 'D',
    (ae3['AENDT_raw'] + MonthEnd(0)),
    ae3['AENDT_raw']
)
ae3['AENDT'] = pd.to_datetime(ae3['AENDT'])

# --- Construct ISO datetime (ASTDTM, AENDTM)
ae3['ASTDTM'] = pd.to_datetime(ae3['ASTDT'].astype(str) + ' ' + ae3['_ast_time'], errors='coerce')
ae3['AENDTM'] = pd.to_datetime(ae3['AENDT'].astype(str) + ' ' + ae3['_aen_time'], errors='coerce')

# --- Derive ASTDY and AENDY
ae3["ASTDY"] = (ae3["ASTDT"] - pd.to_datetime(ae3["TRTSDT"])).dt.days + 1
ae3["AENDY"] = (ae3["AENDT"] - pd.to_datetime(ae3["TRTSDT"])).dt.days + 1

# --- Derive TRTEMFL
ae3["TRTEMFL"] = np.where(
    (ae3["AETRTEM"] == 'Y') |
    ((pd.to_datetime(ae3["TRTSDT"]) <= ae3["ASTDT"]) & (ae3["ASTDT"] <= pd.to_datetime(ae3["RFPENDTC"]))),
    "Y", ""
)



In [15]:

adae_vars = [
    "STUDYID", "USUBJID", "SUBJID", "SITEID", "AGE", "AGEU", "SEX", "RACE",
    "ETHNIC", "COUNTRY", "SAFFL", "TRT01P", "TRT01PN", "TRT01A", "TRT01AN",
    "TRTSDT", "TRTEDT", "AESEQ", "AETERM", "AEDECOD", "AEBODSYS", "AEBDSYCD",
    "AELLT", "AELLTCD", "AEPTCD", "AEHLT", "AEHLTCD", "AEHLGT", "AEHLGTCD",
    "AESOC", "AESOCCD", "AESTDTC", "ASTDT", "AEENDTC", "AENDT",
    "ASTDY", "AENDY", "TRTEMFL", "AESER", "ASEV", "ASEVN",
    "AREL", "ARELN", "RELGR1", "AEACNOTH", "AACN", "AACNN", "AEOUT",
    "AESCONG", "AESDISAB", "AESDTH", "AESHOSP", "AESLIFE", "AESMIE"
]


In [16]:
existing_columns = [col for col in adae_vars if col in ae3.columns]

In [17]:
ae4 = ae3[existing_columns].copy()

In [18]:
column_labels={
'STUDYID': "Study Identifier",
'USUBJID': "Unique Subject Identifier",
'SUBJID': "Subject Identifier for the Study",
'SITEID': "Study Site Identifier",
'AGE': "Age",
'AGEU': "Age Units",
'SEX': "Sex",
'RACE': "Race",
'ETHNIC': "Ethnicity",
'COUNTRY': "Country",
'SAFFL': "Safety Population Flag",
'TRT01P': "Planned Treatment for Period 01",
'TRT01PN': "Planned Treatment for Period 01 (N)",
'TRT01A': "Actual Treatment for Period 01",
'TRT01AN': "Actual Treatment for Period 01 (N)",
'TRTSDT': "Date of First Exposure to Treatment",
'TRTEDT': "Date of Last Exposure to Treatment",
'AESEQ': "Sequence Number",
'AETERM': "Reported Term for the Adverse Event",
'AEDECOD': "Dictionary-Derived Term",
'AEBODSYS': "Body System or Organ Class",
'AEBDSYCD': "Body System or Organ Class Code",
'AELLT': "Lowest Level Term",
'AELLTCD': "Lowest Level Term Code",
'AEPTCD': "Preferred Term Code",
'AEHLT': "High Level Term",
'AEHLTCD': "High Level Term Code",
'AEHLGT': "High Level Group Term",
'AEHLGTCD': "High Level Group Term Code",
'AESOC': "Primary System Organ Class",
'AESOCCD': "Primary System Organ Class Code",
'AESTDTC': "Start Date/Time of Adverse Event",
'ASTDT': "Analysis Start Date",
'ASTDTM': "Analysis Start Datetime",
'ASTDTF': "Analysis Start Date Imputation Flag",
'AEENDTC': "End Date/Time of Adverse Event",
'AENDT': "Analysis End Date",
'AENDTM': "Analysis End Datetime",
'AENDTF': "Analysis End Date Imputation Flag",
'AESTDY': "Study Day of Start of Adverse Event",
'ASTDY': "Analysis Start Relative Day",
'AEENDY': "Study Day of End of Adverse Event",
'AENDY': "Analysis End Relative Day",
'TRTEMFL': "Treatment Emergent Analysis Flag",
'AESER': "Serious Event",
'ASEV': "Analysis Severity/Intensity",
'ASEVN': "Analysis Severity/Intensity (N)",
'AREL': "Analysis Causality",
'ARELN': "Analysis Causality (N)",
'RELGR1': "Pooled Casuality Group 1",
'AEACNOTH': "Other Action Taken",
'AACN': "Action Taken with Study Treatment",
'AACNN': "Action Taken with Study Treatment Numeric",
'AEOUT': "Outcome of Adverse Event",
'AESCONG': "Congenital Anomaly or Birth Defect",
'AESDISAB': "Persist or Signif Disability/Incapacity",
'AESDTH': "Results in Death",
'AESHOSP': "Requires or Prolongs Hospitalization",
'AESLIFE': "Is Life Threatening",
'AESMIE': "Other Medically Important Serious Event",
}

In [19]:
ae4.attrs['column_labels'] = column_labels

In [20]:
for col in ae4.columns:
    label = ae4.attrs.get('column_labels', {}).get(col, '')
    print(f"{col}: {label}")

USUBJID: Unique Subject Identifier
SUBJID: Subject Identifier for the Study
SITEID: Study Site Identifier
AGE: Age
AGEU: Age Units
SEX: Sex
RACE: Race
ETHNIC: Ethnicity
COUNTRY: Country
SAFFL: Safety Population Flag
TRT01P: Planned Treatment for Period 01
TRT01PN: Planned Treatment for Period 01 (N)
TRT01A: Actual Treatment for Period 01
TRT01AN: Actual Treatment for Period 01 (N)
TRTSDT: Date of First Exposure to Treatment
TRTEDT: Date of Last Exposure to Treatment
AESEQ: Sequence Number
AETERM: Reported Term for the Adverse Event
AEDECOD: Dictionary-Derived Term
AEBODSYS: Body System or Organ Class
AEBDSYCD: Body System or Organ Class Code
AELLT: Lowest Level Term
AELLTCD: Lowest Level Term Code
AEPTCD: Preferred Term Code
AEHLT: High Level Term
AEHLTCD: High Level Term Code
AEHLGT: High Level Group Term
AEHLGTCD: High Level Group Term Code
AESOC: Primary System Organ Class
AESOCCD: Primary System Organ Class Code
AESTDTC: Start Date/Time of Adverse Event
ASTDT: Analysis Start Date
A

In [22]:
#  Save the final dataset
output = "E:/Python Clinical Course/ADAM datasets"
output_path = f"{output}/ADAE.csv"
ae4.to_csv(output_path, index=False)

print(f"ADAE dataset created successfully with {len(ae4)} subjects and {len(ae4.columns)} variables.")
print(f"Dataset saved to: {output_path}")

# Display basic info about the dataset
print("\nDataset Info:")
print(f"Shape: {ae4.shape}")
print(f"Columns: {list(ae4.columns)}")

ADAE dataset created successfully with 26 subjects and 53 variables.
Dataset saved to: E:/Python Clinical Course/ADAM datasets/ADAE.csv

Dataset Info:
Shape: (26, 53)
Columns: ['USUBJID', 'SUBJID', 'SITEID', 'AGE', 'AGEU', 'SEX', 'RACE', 'ETHNIC', 'COUNTRY', 'SAFFL', 'TRT01P', 'TRT01PN', 'TRT01A', 'TRT01AN', 'TRTSDT', 'TRTEDT', 'AESEQ', 'AETERM', 'AEDECOD', 'AEBODSYS', 'AEBDSYCD', 'AELLT', 'AELLTCD', 'AEPTCD', 'AEHLT', 'AEHLTCD', 'AEHLGT', 'AEHLGTCD', 'AESOC', 'AESOCCD', 'AESTDTC', 'ASTDT', 'AEENDTC', 'AENDT', 'ASTDY', 'AENDY', 'TRTEMFL', 'AESER', 'ASEV', 'ASEVN', 'AREL', 'ARELN', 'RELGR1', 'AEACNOTH', 'AACN', 'AACNN', 'AEOUT', 'AESCONG', 'AESDISAB', 'AESDTH', 'AESHOSP', 'AESLIFE', 'AESMIE']
