In [1]:
#To remove all variables from the namespace
%reset -f

#Creating a log file to record the commands and outputs
%logstop
%logstart -t -o "E:/Python Clinical Course/Table1 log.txt"

Logging hadn't been started.
Activating auto-logging. Current session state plus future input saved.
Filename       : E:/Python Clinical Course/Table1 log.txt
Mode           : backup
Output logging : True
Raw input log  : False
Timestamping   : True
State          : active


In [2]:
import pandas as pd
import pyreadstat
import os
from docx import Document
from docx.shared import Pt, Inches
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.oxml.ns import qn
from docx.oxml import OxmlElement

adam_path = r"E:\Python Clinical Course\ADAM datasets\ADaM Datasets"


In [3]:
adam_datasets = {}

for file in os.listdir(adam_path):
    if file.endswith(".sas7bdat"):
        dataset_name = file.replace(".sas7bdat", "")
        file_path = os.path.join(adam_path, file)
        df, meta = pyreadstat.read_sas7bdat(file_path)
        adam_datasets[dataset_name] = df

In [4]:
# Load the data
adsl = adam_datasets.get("adsl")

In [5]:
# Filter valid treatment arms
adsl_filtered = adsl[adsl['TRT01A'].notna() & (adsl['TRT01A'] != '')]

# Create ALL group
adsl_all = adsl_filtered.copy()
adsl_all['TRT01A'] = 'ALL'
adsl_all['TRT01AN'] = 3

# Combine
adsl_combined = pd.concat([adsl_filtered, adsl_all], ignore_index=True)

# Denominators
denom = adsl_combined.groupby(['TRT01AN', 'TRT01A'])['USUBJID'].nunique().reset_index(name='DENOM')
n_dict = denom.set_index('TRT01AN')['DENOM'].to_dict()
N1, N2, N3 = n_dict.get(1, 0), n_dict.get(2, 0), n_dict.get(3, 0)


In [6]:

# -------------------- STEP 2: POPULATION COUNTS --------------------

def pop_summary(flag_var, label, order):
    df = adsl_combined[adsl_combined[flag_var] == 'Y']
    grouped = df.groupby(['TRT01AN', 'TRT01A'])['USUBJID'].nunique().reset_index()
    grouped['POP'] = label
    grouped['ORD'] = order
    return grouped

summary = pd.concat([
    pop_summary('SAFFL', 'Safety Population', 1),
    pop_summary('ITTFL', 'ITT Population', 2),
    pop_summary('RANDFL', 'Randomization Population', 3),
    pop_summary('PPROTFL', 'Per-Protocol Population', 4)
])

summary = summary.merge(denom, on=['TRT01AN', 'TRT01A'], how='left')
summary['grp'] = summary.apply(lambda row: f"{row['USUBJID']} ({(row['USUBJID']/row['DENOM']*100):.1f})", axis=1)


In [10]:

# -------------------- STEP 3: TRANSPOSE TO FINAL LAYOUT --------------------

pivot = summary.pivot_table(index=['ORD', 'POP'], columns='TRT01AN', values='grp', aggfunc='first').reset_index()
pivot.columns.name = None
pivot['stat'] = 'n (%)'
pivot = pivot[[ 'POP', 'stat', 1, 2, 3]]
pivot.columns = ['Population', 'Statistic', f'DRUG A \n (N={N1})', f'DRUG B \n (N={N2})', f'ALL \n (N={N3})']


In [11]:

# -------------------- STEP 4: WORD DOCUMENT OUTPUT --------------------

doc = Document()

# Set 1-inch margins
for section in doc.sections:
    section.top_margin = Inches(1)
    section.bottom_margin = Inches(1)
    section.left_margin = Inches(1)
    section.right_margin = Inches(1)

# Set font to Courier New, 9pt
style = doc.styles['Normal']
style.font.name = 'Courier New'
style.font.size = Pt(9)
style.element.rPr.rFonts.set(qn('w:eastAsia'), 'Courier New')

# Titles
doc.add_paragraph('COVID-19 AA').alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
doc.add_paragraph('Protocol: 043').alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
doc.add_paragraph('Table 14.1.1 Subject Assignment to Analysis Populations').alignment = WD_PARAGRAPH_ALIGNMENT.CENTER

# Add Table
table = doc.add_table(rows=1, cols=len(pivot.columns))
table.style = 'Light Shading Accent 1'

# Remove padding in header
for cell in table.rows[0].cells:
    tc_pr = cell._element.tcPr
    tc_mar = OxmlElement('w:tcMar')
    for m in ['top', 'start', 'bottom', 'end']:
        node = OxmlElement(f'w:{m}')
        node.set(qn('w:w'), "0")
        node.set(qn('w:type'), 'dxa')
        tc_mar.append(node)
    tc_pr.append(tc_mar)

# Header row
for i, col in enumerate(pivot.columns):
    table.cell(0, i).text = col

# Data rows
for _, row in pivot.iterrows():
    row_cells = table.add_row().cells
    for i, val in enumerate(row):
        row_cells[i].text = str(val)

# Line separator
#doc.add_paragraph("_____________________________________________________________________")

# Footnote
doc.add_paragraph(r"E:\TAB1_1.SAS").alignment = WD_PARAGRAPH_ALIGNMENT.LEFT


In [12]:

# Save file
doc.save(r"E:\Python Clinical Course\TLF\output\t_14_1_1.docx")
