# IN_SYS - SW14 Exercise 1

## PDF summarization

In [None]:
# !pip install pypdf
# !pip install python-docx

In [10]:
from pypdf import PdfReader, PdfWriter
import openai
from dotenv import load_dotenv
from docx import Document
import os

# Load OpenAI API key
load_dotenv()

client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [11]:
# Folder containing the PDFs
pdf_folder = "PDF"

# Create a new Word document to store all summaries
document = Document()

# System message to guide the assistant’s behavior
system_message = {
    "role": "system",
    "content": (
        "You are an assistant that extracts key information from provided text. "
        "You will be given text describing a Bachelor Degree course. "
        "Please identify the name of the BA degree course and write a short summary. "
        "Format as follows:\n\n"
        "1) A headline that is the name of the Bachelor Degree course\n"
        "2) A short paragraph summarizing the course.\n"
        "3) The name and contact details of the Studiengangsleiter or any other contact person (if provided in the text).\n"
    )
}

# Loop through all PDF files in the folder
for filename in os.listdir(pdf_folder):
    if filename.lower().endswith(".pdf"):
        pdf_path = os.path.join(pdf_folder, filename)

        # Extract text from PDF
        pdf_reader = PdfReader(pdf_path)
        all_text = []
        for page in pdf_reader.pages:
            text = page.extract_text()
            if text:
                all_text.append(text)
        extracted_text = "\n".join(all_text)

        # If no text extracted, skip this file
        if not extracted_text.strip():
            print(f"No text found in {filename}, skipping.")
            continue

        # Prepare the messages for the API
        messages = [
            system_message,
            {
                "role": "user",
                "content": extracted_text
            }
        ]

        # Call OpenAI API to summarize the course
        completion = openai.chat.completions.create(
            model="gpt-4o",
            messages=messages,
            max_tokens=1000,
            temperature=0.8
        )

        # Extract the response
        assistant_response = completion.choices[0].message.content.strip()
        print(f"Summary for {filename}:\n{assistant_response}\n{'-'*40}\n")

        # Add the summary to the Word document
        # We assume the response is in a suitable format as requested.
        document.add_heading(f"Summary for {filename}", level=1)
        document.add_paragraph(assistant_response)

# Save the combined Word document
output_doc_path = "Summaries.docx"
document.save(output_doc_path)
print(f"All summaries saved to {output_doc_path}")


Summary for Studienführer Bachelor Digital Engineer Robotik & Big Data.pdf:
1) Bachelor Digital Engineer | Robotik & Big Data

2) The Bachelor Digital Engineer program at FH Zentralschweiz focuses on the digitalization of industry, providing a comprehensive education in computer science, electrical engineering, mechanical engineering, and industrial engineering. This interdisciplinary and practice-oriented course prepares students for an exciting career in the modern, connected industrial world where data and autonomous robots are key drivers. With a strong emphasis on data engineering and applied competencies, students learn to design robotic applications and leverage the data of our interconnected world from day one. The curriculum includes key technologies like data aggregation, model analysis, and machine learning, preparing graduates to develop and implement digital business models, design cyber-physical systems, and utilize artificial intelligence in data-driven industrial proces