In [18]:
#To remove all variables from the namespace
%reset -f

#Creating a log file to record the commands and outputs
%logstop
%logstart -t -o "E:/Python Clinical Course/Table2 log.txt"

Activating auto-logging. Current session state plus future input saved.
Filename       : E:/Python Clinical Course/Table2 log.txt
Mode           : backup
Output logging : True
Raw input log  : False
Timestamping   : True
State          : active


In [19]:
import pandas as pd
import pyreadstat
import os
from docx import Document
from docx.shared import Pt, Inches
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.oxml.ns import qn
from docx.oxml import OxmlElement

adam_path = r"E:\Python Clinical Course\ADAM datasets\ADaM Datasets"

In [20]:
adam_datasets = {}

for file in os.listdir(adam_path):
    if file.endswith(".sas7bdat"):
        dataset_name = file.replace(".sas7bdat", "")
        file_path = os.path.join(adam_path, file)
        df, meta = pyreadstat.read_sas7bdat(file_path)
        adam_datasets[dataset_name] = df

In [21]:
adsl = adam_datasets.get("adsl")

In [22]:
# Filter safety population
adsl_safety = adsl[(adsl['TRT01A'].notna()) & (adsl['TRT01A'] != '') & (adsl['SAFFL'] == 'Y')]

# Create ALL group
adsl_all = adsl_safety.copy()
adsl_all['TRT01A'] = 'ALL'
adsl_all['TRT01AN'] = 3

# Combine
adsl_combined = pd.concat([adsl_safety, adsl_all], ignore_index=True)


In [23]:

# Denominators
denom = adsl_combined.groupby(['TRT01AN', 'TRT01A'])['USUBJID'].nunique().reset_index(name='DENOM')
n_dict = denom.set_index('TRT01AN')['DENOM'].to_dict()
N1, N2, N3 = n_dict.get(1, 0), n_dict.get(2, 0), n_dict.get(3, 0)


In [24]:

# Base for counts
def pop_summary(filter_cond, label, order):
    df = adsl_combined[filter_cond]
    grouped = df.groupby(['TRT01AN', 'TRT01A'])['USUBJID'].nunique().reset_index()
    grouped['POP'] = label
    grouped['ORD'] = order
    return grouped

summary = pd.concat([
    pop_summary(adsl_combined['TRT01A'] != '', 'Subjects Actual treatment', 1),
    pop_summary(adsl_combined['RANDFL'] == 'Y', 'Subjects Randomized', 2),
    pop_summary(adsl_combined['EOSSTT'] == 'Discontinued', 'Subjects Withdrawn', 3)
])


In [25]:

# Withdrawal reason breakdown
withdrawn = adsl_combined[adsl_combined['EOSSTT'] == 'Discontinued']
wth_by_reason = withdrawn.groupby(['TRT01AN', 'TRT01A', 'DCSREAS'])['USUBJID'].nunique().reset_index()
wth_by_reason = wth_by_reason.rename(columns={'DCSREAS': 'POP'})
wth_by_reason['ORD'] = 4
summary = pd.concat([summary, wth_by_reason], ignore_index=True)


In [26]:

# Merge denominator
summary = summary.merge(denom, on=['TRT01AN', 'TRT01A'], how='left')
summary['grp'] = summary.apply(lambda row: f"{row['USUBJID']} ({(row['USUBJID']/row['DENOM']*100):.1f})", axis=1)


In [30]:

# Transpose layout
pivot = summary.pivot_table(index=['ORD', 'POP'], columns='TRT01AN', values='grp', aggfunc='first').reset_index()
pivot.columns.name = None
#pivot['stat'] = 'n (%)'
pivot = pivot[['POP',  1, 2, 3]]
pivot.columns = ['Population',  f'DRUG A \n (N={N1})', f'DRUG B \n (N={N2})', f'ALL \n (N={N3})']

# Fill missing
pivot.fillna('  0', inplace=True)


In [31]:

# ---------------- Word Output ----------------

doc = Document()
for section in doc.sections:
    section.top_margin = Inches(1)
    section.bottom_margin = Inches(1)
    section.left_margin = Inches(1)
    section.right_margin = Inches(1)

style = doc.styles['Normal']
style.font.name = 'Courier New'
style.font.size = Pt(9)
style.element.rPr.rFonts.set(qn('w:eastAsia'), 'Courier New')

# Titles
doc.add_paragraph('COVID-19 AA').alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
doc.add_paragraph('Protocol: 043').alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
doc.add_paragraph('Table 14.1.2  Subject Disposition by Treatment (Safety Population)').alignment = WD_PARAGRAPH_ALIGNMENT.CENTER

# Table
table = doc.add_table(rows=1, cols=len(pivot.columns))
table.style = 'Light Shading Accent 1'

# Header row
for i, col in enumerate(pivot.columns):
    table.cell(0, i).text = col

# Data rows
for _, row in pivot.iterrows():
    cells = table.add_row().cells
    for i, val in enumerate(row):
        cells[i].text = str(val)

# Simulated border line
#doc.add_paragraph('__________________________________________________________')

# Footnote
doc.add_paragraph(r"E:\TAB1_2.SAS").alignment = WD_PARAGRAPH_ALIGNMENT.LEFT


In [32]:

# Save file
doc.save(r"E:\Python Clinical Course\TLF\output\t_14_1_2.docx")
