In [1]:
import csv
import re
from transformers import TFAutoModelForSeq2SeqLM, AutoTokenizer, pipeline


model = TFAutoModelForSeq2SeqLM.from_pretrained("t5-small")
tokenizer = AutoTokenizer.from_pretrained("t5-small")


summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)


executive_summary = """
The introduction of the new antihypertensive medication, Hypertrol, has shown promising results in controlling blood pressure among patients with Stage 2 Hypertension. Over a 12-month period, 75% of patients experienced a significant reduction in systolic blood pressure (average reduction of 20 mmHg) and 70% achieved the target blood pressure of <140/90 mmHg. The medication was generally well-tolerated, with minimal side effects reported.
"""

results_section = """
Systolic Blood Pressure: The average reduction in systolic blood pressure was 20 mmHg in the Hypertrol group, compared to 5 mmHg in the placebo group. Diastolic Blood Pressure: The average reduction in diastolic blood pressure was 10 mmHg in the Hypertrol group, compared to 3 mmHg in the placebo group. Target Achievement: 70% of patients in the Hypertrol group achieved the target blood pressure of <140/90 mmHg.
Adverse Events: Mild dizziness (15% of patients), fatigue (10% of patients), and nausea (8% of patients) were the most commonly reported side effects. One case of severe hypotension was reported, leading to discontinuation of the medication.
"""

discussion_section = """
The results indicate that Hypertrol is highly effective in lowering blood pressure in patients with Stage 2 Hypertension. The medication was well-tolerated, with a low incidence of serious adverse events. These findings suggest that Hypertrol could be a valuable addition to the current antihypertensive treatment options.
"""


executive_summary_summary = summarizer(executive_summary, max_length=50, min_length=25, do_sample=False)[0]['summary_text']
results_summary = summarizer(results_section, max_length=50, min_length=25, do_sample=False)[0]['summary_text']
discussion_summary = summarizer(discussion_section, max_length=50, min_length=25, do_sample=False)[0]['summary_text']


def extract_metrics(summary, category):
    metrics = {
        "Patients with reduced BP (%)": r"(\d+)% of patients experienced a significant reduction in systolic blood pressure",
        "Patients achieved target BP (%)": r"(\d+)% achieved the target blood pressure",
        "Average systolic BP reduction (mmHg)": r"average reduction in systolic blood pressure was (\d+) mmHg",
        "Average diastolic BP reduction (mmHg)": r"average reduction in diastolic blood pressure was (\d+) mmHg",
        "Placebo systolic BP reduction (mmHg)": r"placebo group.*?(\d+) mmHg",
        "Placebo diastolic BP reduction (mmHg)": r"placebo group.*?(\d+) mmHg",
        "Patients with adverse events (%)": r"(\d+)% of patients",
        "Overall effectiveness": r"effectiveness.*?(\w+)",
        "Tolerance level": r"well-tolerated, with a (\w+) incidence of serious adverse events"
    }
    
    extracted_data = []
    for metric, pattern in metrics.items():
        match = re.search(pattern, summary, re.IGNORECASE)
        if match:
            extracted_data.append({"Category": category, "Metric": metric, "Value": match.group(1)})
    return extracted_data


data = extract_metrics(executive_summary_summary, "Executive Summary") + \
       extract_metrics(results_summary, "Results") + \
       extract_metrics(discussion_summary, "Discussion")

filename = 'summaries_metrics_automated.csv'

with open(filename, mode='w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=["Category", "Metric", "Value"])
    
    # Write the header
    writer.writeheader()
    
    # Write the data
    for row in data:
        writer.writerow(row)

print(f"CSV file '{filename}' created successfully.")


All PyTorch model weights were used when initializing TFT5ForConditionalGeneration.

All the weights of TFT5ForConditionalGeneration were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
I0000 00:00:1723748476.188666 1722328 service.cc:146] XLA service 0x600003af6e00 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1723748476.189359 1722328 service.cc:154]   StreamExecutor device (0): Host, Default Version
2024-08-15 14:01:16.230052: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1723748476.421029 1722328 device_compi

CSV file 'summaries_metrics_automated.csv' created successfully.
