In [1]:
#To remove all variables from the namespace
%reset -f

#Creating a log file to record the commands and outputs
%logstop
%logstart -t -o "E:/Python Clinical Course/list1 log.txt"

Logging hadn't been started.
Activating auto-logging. Current session state plus future input saved.
Filename       : E:/Python Clinical Course/list1 log.txt
Mode           : backup
Output logging : True
Raw input log  : False
Timestamping   : True
State          : active


In [2]:
import pandas as pd
import numpy as np
import pyreadstat
import os
from docx import Document
from docx.shared import Inches, Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT

In [3]:
adam_path = r"E:\Python Clinical Course\ADAM datasets\ADaM Datasets"

In [4]:
adam_datasets = {}

for file in os.listdir(adam_path):
    if file.endswith(".sas7bdat"):
        dataset_name = file.replace(".sas7bdat", "")
        file_path = os.path.join(adam_path, file)
        df, meta = pyreadstat.read_sas7bdat(file_path)
        adam_datasets[dataset_name] = df

In [5]:

# Load the data
adsl = adam_datasets.get("adsl")
adsl = adsl[['USUBJID', 'SAFFL', 'ITTFL', 'PPROTFL', 'RANDFL']]


In [6]:

# Line number and page number
adsl = adsl.reset_index(drop=True)
adsl['lnt'] = (adsl.index % 20) + 1
adsl['page1'] = (adsl.index // 20) + 1


In [7]:
# Create the document
doc = Document()

# Titles
doc.add_paragraph('COVID-19 AA').alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
doc.add_paragraph('Protocol: 043').alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
title_para = doc.add_paragraph('16.2.1.1 Assignment to Analysis Populations')
title_para.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER

# Iterate by pages
for page_num in sorted(adsl['page1'].unique()):
    sub_df = adsl[adsl['page1'] == page_num]
    
    table = doc.add_table(rows=1, cols=5)
    table.style = 'Light Shading Accent 1'

    # Header row
    hdr_cells = table.rows[0].cells
    widths = [Inches(6.0), Inches(2.0), Inches(2.0), Inches(2.0), Inches(2.0)] 
    hdr_cells[0].text = 'Subject\nNumber'
    hdr_cells[1].text = 'Safety\nPopulation'
    hdr_cells[2].text = 'Intent-To-Treat\nPopulation'
    hdr_cells[3].text = 'Per-Protocol\nPopulation'
    hdr_cells[4].text = 'Randomized\nPopulation'

    for i, cell in enumerate(hdr_cells):
        cell.width = widths[i]

    # Data rows
    for _, row in sub_df.iterrows():
        row_cells = table.add_row().cells
        widths = [Inches(6.0), Inches(1.0), Inches(2.0), Inches(2.0), Inches(2.0)]  
        row_cells[0].text = str(row['USUBJID'])
        row_cells[1].text = str(row['SAFFL'])
        row_cells[2].text = str(row['ITTFL'])
        row_cells[3].text = str(row['PPROTFL'])
        row_cells[4].text = str(row['RANDFL'])
        for i, cell in enumerate(row_cells):
            cell.width = widths[i]

    # Add a horizontal line (imitating compute block)
    #doc.add_paragraph('_________________________________________')
    doc.add_page_break()

# Footer
footer_para = doc.add_paragraph(r"E:\LIS1.SAS")
footer_para.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT


In [8]:

# Save the document
doc.save(r"E:\Python Clinical Course\TLF\output\l_16_2_1_1.docx")
