In [1]:
#To remove all variables from the namespace
%reset -f

#Creating a log file to record the commands and outputs
%logstop
%logstart -t -o "E:/Python Clinical Course/list3 log.txt"

Logging hadn't been started.
Activating auto-logging. Current session state plus future input saved.
Filename       : E:/Python Clinical Course/list3 log.txt
Mode           : backup
Output logging : True
Raw input log  : False
Timestamping   : True
State          : active


In [2]:
import pandas as pd
import numpy as np
import pyreadstat
import os
from docx import Document
from docx.shared import Inches, Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT

In [3]:
adam_path = r"E:\Python Clinical Course\ADAM datasets\ADaM Datasets"

In [4]:
adam_datasets = {}

for file in os.listdir(adam_path):
    if file.endswith(".sas7bdat"):
        dataset_name = file.replace(".sas7bdat", "")
        file_path = os.path.join(adam_path, file)
        df, meta = pyreadstat.read_sas7bdat(file_path)
        adam_datasets[dataset_name] = df

In [5]:
adsl = adam_datasets.get("adsl")

In [6]:
# Step 2: Filter records where EOSSTT is 'Discontinued' and TRTEDT is not missing
adsl = adsl[(adsl['EOSSTT'] == 'Discontinued') & (adsl['TRTEDT'].notna())].copy()

# Step 3: Format date TRTEDT as DATE9 (e.g., 01JAN2020)
adsl['TRTEDT_L'] = pd.to_datetime(adsl['TRTEDT']).dt.strftime('%d%b%Y').str.upper()

# Step 4: Keep only required columns
adsl = adsl[['USUBJID', 'DCSREAS', 'TRTEDT_L', 'EOSSTT']]


In [7]:

# Step 5: Add pagination variables
adsl = adsl.reset_index(drop=True)
adsl['lnt'] = (adsl.index % 20) + 1
adsl['page1'] = (adsl.index // 20) + 1


In [8]:

# Create the document
doc = Document()

# Titles
doc.add_paragraph('COVID-19 AA').alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
doc.add_paragraph('Protocol: 043').alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
title_para = doc.add_paragraph('16.2.1.4 Withdrawals from the Study')
title_para.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER

# Iterate by pages
for page_num in sorted(adsl['page1'].unique()):
    sub_df = adsl[adsl['page1'] == page_num]
    
    table = doc.add_table(rows=1, cols=4)
    table.style = 'Light Shading Accent 1'

    # Set header
    hdr_cells = table.rows[0].cells
    widths = [Inches(6.0), Inches(4.0), Inches(4.0), Inches(3.0)] 
    hdr_cells[0].text = 'Subject Number'
    hdr_cells[1].text = 'Reason for Discontinuation\nfrom Study'
    hdr_cells[2].text = 'Date of Last Exposure\nto Treatment'
    hdr_cells[3].text = 'End of Study\nStatus'
    for i, cell in enumerate(hdr_cells):
        cell.width = widths[i]

    # Fill rows
    for _, row in sub_df.iterrows():
        row_cells = table.add_row().cells
        widths = [Inches(6.0), Inches(4.0), Inches(4.0), Inches(3.0)] 
        row_cells[0].text = str(row['USUBJID'])
        row_cells[1].text = str(row['DCSREAS']) if pd.notna(row['DCSREAS']) else ''
        row_cells[2].text = str(row['TRTEDT_L'])
        row_cells[3].text = str(row['EOSSTT'])
        for i, cell in enumerate(row_cells):
            cell.width = widths[i]

    # Horizontal line separator (simulate compute block)
    #doc.add_paragraph('_________________________________________')
# Step 8: Add footnote
doc.add_paragraph(r"E:\LIS3.SAS").alignment = WD_PARAGRAPH_ALIGNMENT.LEFT


In [9]:

# Step 9: Save the document
doc.save(r"E:\Python Clinical Course\TLF\output\l_16_2_1_4.docx")
