In [1]:
import os
from docx.enum.section import WD_ORIENT
from docx.enum.table import WD_TABLE_ALIGNMENT
import pandas as pd
from typing import List
from docx.shared import Pt, Inches, Cm
from docx import Document
import pypandoc

import sys
sys.path.append('../src')
import cequalw2 as w2

In [2]:
def change_orientation(document, orientation: str):
    current_section = document.sections[-1]
    new_width, new_height = current_section.page_height, current_section.page_width
    if orientation.lower() == 'landscape':
        current_section.orientation = WD_ORIENT.LANDSCAPE
    elif orientation.lower() == 'portrait':
        current_section.orientation = WD_ORIENT.PORTRAIT
    else:
        raise ValueError('Specify "landscape" or "portrait" for the orientation.')
    current_section.page_width = new_width
    current_section.page_height = new_height

def format_cell(cell, **kwargs):
    # Parse keyword arguments
    table_font_size = kwargs.get('table_font_size', None)
    bold = kwargs.get('bold', None)
    paragraph_font_size = kwargs.get('paragraph_font_size', 11)
    table_font_size = kwargs.get('table_font_size', 10)

    # Get pointer
    paragraph = cell.paragraphs[0]
    run = paragraph.runs[0]
    
    # Format the cell
    run.font.size = Pt(table_font_size)
    if bold:
        run.bold = True

In [3]:
df1 = w2.read('data/BerlinMilton2006/2006_DeerCrk_Qin.npt', 2006, ['Inflow'])
df2 = w2.read('data/BerlinMilton2006/2006_DeerCrk_Cin.npt', 2006, ['TDS', 'SO4', 'Cl', 'ISS', 'OP', 'NH4', 'NOx', 'Fe', 'LDOM', 'RDOM', 'LPOM', 'RPOM', 'BG', 'DIAT', 'OTH', 'DO'])

In [4]:
# Create a new document
doc = Document()

df = df1 # For loop starts here

# Add a table with a header in 10-point font
table = doc.add_table(rows=1, cols=len(df.columns))
# table.autofit = True
table.style = "Table Grid"
table_header_cells = table.rows[0].cells
cell = table_header_cells[0]
cell.text = "Time"
format_cell(cell, table_font_size=6, bold=True)

# Save the document
doc.save("aaa.docx")

In [5]:
from typing import List
import pandas as pd
from docx import Document
from docx.shared import Pt


def generate_report(data_frames: List[pd.DataFrame], **kwargs) -> Document:
    """
    Generate a water quality model report with summary statistics using python-docx.

    :param data_frames: List of Pandas DataFrames containing water quality data.
    :type data_frames: List[pd.DataFrame]
    :param **kwargs: Additional keyword arguments for model_name, location, and time_period.
    :type **kwargs: Any
    :return: The generated report document.
    :rtype: docx.Document
    """

    # Assign keyword arguments to variables
    model_name = kwargs.get('model_name', None)
    location = kwargs.get('location', None)
    time_period = kwargs.get('time_period', None)
    table_font_size = kwargs.get('table_font_size', 10)

    # Create a new document
    document = Document()

    # Change orientation to landscape
    change_orientation(document, "landscape")

    # Add the report title
    document.add_heading("Water Quality Model Report", level=0)

    # Add model information
    document.add_heading("Model Information", level=1)
    if model_name:
        document.add_paragraph(f"Model Name: {model_name}")
    if location:
        document.add_paragraph(f"Location: {location}")
    if time_period:
        document.add_paragraph(f"Time Period: {time_period}")

    # Add data summary
    document.add_heading("Data Summary", level=1)
    document.add_paragraph("Summary Statistics:")

    for df in data_frames:
        # Compute summary statistics for the current dataframe
        summary_statistics = df.describe().to_dict()
        col1 = df.columns[0]
        dict1 = summary_statistics[col1]
        keys = dict1.keys()

        # Add sub-heading for data filename
        file_path = df.attrs['Filename']
        directory, filename = os.path.split(file_path)
        document.add_heading(filename, level=2)

        # Create a table for summary statistics
        table = document.add_table(rows=1, cols=len(df.columns) + 1)
        table.autofit = True
        table.style = "Table Grid"

        # Add table headers
        table_header_cells = table.rows[0].cells
        cell = table_header_cells[0]
        cell.text = "Statistic"
        format_cell(cell, table_font_size=table_font_size, bold=True)
        for j, col in enumerate(df.columns):
            cell = table_header_cells[j + 1]
            cell.text = col
            format_cell(cell, table_font_size=table_font_size, bold=True)

        # Populate the table
        for i, key in enumerate(keys):
            row = table.add_row().cells
            cell = row[0]
            cell.text = key
            format_cell(cell, table_font_size=table_font_size, bold=True)
            table_header_cells = table.rows[i + 1].cells
            for j, col in enumerate(df.columns):
                d = summary_statistics[col][key]
                cell = table_header_cells[j + 1]
                cell.text = f'{d:.1f}'
                format_cell(cell, table_font_size=table_font_size, bold=False)

    return document


In [6]:
# Example data for the report
model_name = "CE-QUAL-W2"
location = "Berlin and Milton"
time_period = "2006"

# Generate the report
report = generate_report([df1, df2], model_name=model_name, location=location, time_period=time_period,
    table_font_size=6)

# Save the report to a file
report.save("water_quality_report.docx")

AttributeError: 'str' object has no attribute 'paragraphs'

In [None]:
pypandoc.convert_file('water_quality_report.docx', 'pdf', outputfile='water_quality_report.pdf', extra_args=['--pdf-engine=xelatex', '-V', 'geometry:margin=1.5cm,landscape'])