In [1]:
#To remove all variables from the namespace
%reset -f

#Creating a log file to record the commands and outputs
%logstop
%logstart -t -o "E:/Python Clinical Course/list4 log.txt"

Logging hadn't been started.
Activating auto-logging. Current session state plus future input saved.
Filename       : E:/Python Clinical Course/list4 log.txt
Mode           : backup
Output logging : True
Raw input log  : False
Timestamping   : True
State          : active


In [2]:
import pandas as pd
import numpy as np
import pyreadstat
import os
from docx import Document
from docx.shared import Inches, Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT

In [3]:
adam_path = r"E:\Python Clinical Course\ADAM datasets\ADaM Datasets"

In [4]:
adam_datasets = {}

for file in os.listdir(adam_path):
    if file.endswith(".sas7bdat"):
        dataset_name = file.replace(".sas7bdat", "")
        file_path = os.path.join(adam_path, file)
        df, meta = pyreadstat.read_sas7bdat(file_path)
        adam_datasets[dataset_name] = df

In [5]:
#adsl = adam_datasets.get("adsl")
adlb = adam_datasets.get("adlb")

In [6]:

# Step 2: Filter hematology records with abnormal values
adlb = adlb[(adlb['PARCAT1'] == 'HEMATOLOGY') & (adlb['ANRIND'].notna()) & (adlb['ANRIND'] != 'NORMAL')].copy()

# Step 3: Create L_H (Normal range) and DT (datetime string)
adlb['L_H'] = adlb['ANRLO'].astype(str).str.strip() + '-' + adlb['ANRHI'].astype(str).str.strip()
adlb['DT'] = pd.to_datetime(adlb['ADTM']).dt.strftime('%d%b%Y:%H:%M:%S').str.upper()

# Step 4: Keep only required columns
adlb = adlb[['USUBJID', 'PARAMN', 'PARAM', 'AVISITN', 'AVISIT', 'L_H', 'DT', 'AVALC', 'ANRIND']]

# Step 5: Sort data
adlb = adlb.sort_values(by=['USUBJID', 'PARAMN', 'PARAM', 'AVISITN', 'AVISIT']).reset_index(drop=True)


In [7]:

# Step 6: Add pagination
adlb['lnt'] = (adlb.index % 15) + 1
adlb['page1'] = (adlb.index // 15) + 1


In [8]:

# Step 7: Create Word document
doc = Document()

# --- Titles ---
doc.add_paragraph('COVID-19 AA').alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
doc.add_paragraph('Protocol: 043').alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
doc.add_paragraph('16.2.1.6 Abnormal Hematology Values').alignment = WD_PARAGRAPH_ALIGNMENT.CENTER

# Step 8: Generate page-by-page report
for page in sorted(adlb['page1'].unique()):
    sub_df = adlb[adlb['page1'] == page]

    table = doc.add_table(rows=1, cols=7)
    table.style = 'Light Shading Accent 1'

    # Header row
    hdr_cells = table.rows[0].cells
    widths = [Inches(6.0), Inches(3.0), Inches(3.0), Inches(3.0),Inches(3.0), Inches(3.0), Inches(3.0)] 
    hdr_cells[0].text = 'Subject Number'
    hdr_cells[1].text = 'Test'
    hdr_cells[2].text = 'Visit'
    hdr_cells[3].text = 'Normal Range'
    hdr_cells[4].text = 'Date/Time of\nMeasurement'
    hdr_cells[5].text = 'Result'
    hdr_cells[6].text = 'Flag'
    for i, cell in enumerate(hdr_cells):
        cell.width = widths[i]

    # Data rows
    for _, row in sub_df.iterrows():
        row_cells = table.add_row().cells
        widths = [Inches(6.0), Inches(3.0), Inches(3.0), Inches(3.0),Inches(3.0), Inches(3.0), Inches(3.0)]  
        row_cells[0].text = str(row['USUBJID'])
        row_cells[1].text = str(row['PARAM'])
        row_cells[2].text = str(row['AVISIT'])
        row_cells[3].text = str(row['L_H'])
        row_cells[4].text = str(row['DT'])
        row_cells[5].text = str(row['AVALC'])   
        row_cells[6].text = str(row['ANRIND'])
    for i, cell in enumerate(hdr_cells):
        cell.width = widths[i]

    # Simulate line after page
    #doc.add_paragraph('_________________________________________')
    doc.add_page_break()

# --- FOOTNOTE ---
doc.add_paragraph(r"E:\LIS6.SAS").alignment = WD_PARAGRAPH_ALIGNMENT.LEFT


In [9]:

# Step 9: Save as .docx (you can manually save as RTF from Word if needed)
doc.save(r"E:\Python Clinical Course\TLF\output\l_16_2_1_6.docx")
