# 📝 Session 1: ESG PDF Exploration

In this notebook, you'll explore a real ESG report (TotalEnergies 2024) and begin working with its raw content.

## 📥 Step 1: Load the PDF and View Metadata

In [None]:
import fitz  # PyMuPDF

pdf_path = "../../data/totalenergies_report.pdf"
doc = fitz.open(pdf_path)

print(f"Total number of pages: {len(doc)}")

## 📄 Step 2: Display the First Few Pages

In [None]:
# Print first 3 pages to get a feel for the structure
for i in range(3):
    print(f"--- Page {i+1} ---")
    print(doc[i].get_text())
    print("\n" + "="*80 + "\n")

## ✍️ Step 3: Manually Copy a Paragraph to Prompt the LLM

In [None]:
# You can paste a paragraph here manually or use the page text directly
context = doc[2].get_text()

question = "What are TotalEnergies' sustainability goals for 2030?"

print("📌 Prompt context preview:")
print(context[:500], "...")

## 🤖 Step 4: Use OpenAI API to Ask a Question

In [None]:
from openai import OpenAI
import yaml

# Load config
with open("../../config/openai_config_template.yaml", "r") as f:
    config = yaml.safe_load(f)

# Initialize OpenAI client
client = OpenAI(api_key=config["openai_api_key"])

# Prepare context and question
question = "What are TotalEnergies' sustainability goals for 2030?"
context = "..."  # replace with extracted text from PDF

# LLM call
response = client.chat.completions.create(
    model=config["model"],
    messages=[
        {"role": "system", "content": "You are an ESG analyst assistant."},
        {"role": "user", "content": f"Based on this document:\n{context}\n\nAnswer this question: {question}"}
    ],
    temperature=0.0,
)

# Extract and print the answer
print("💬 LLM Answer:")
print(response.choices[0].message.content)

## 🧠 Optional: Explore the Kaggle Dataset (for Comparison)

In [None]:
import pandas as pd

df = pd.read_csv("../../data/kaggle_sp500_ESG_dataset.csv")
df[['filename', 'year', 'total_score']].head()