# LLM Paper Summarization

This notebook demonstrates how to use the `LLMService` to generate audio-friendly summaries of research papers.

## Setup

In [None]:
import sys
from pathlib import Path
import json
import os

# Add parent directory to path
sys.path.append(str(Path.cwd().parent))

from src.services import LLMService, AnthropicProvider
from src.models import Paper

## Load a Downloaded Paper

First, let's load a paper from the downloads folder.

In [None]:
# Find metadata files in downloads folder
download_dir = Path("downloads")
metadata_files = list(download_dir.glob("*.json"))

if not metadata_files:
    print("No metadata files found in downloads folder!")
    print("Please download a paper first using the arxiv_example.ipynb notebook")
else:
    # Load the first paper's metadata
    metadata_path = metadata_files[0]
    
    with open(metadata_path, 'r') as f:
        metadata = json.load(f)
    
    paper = Paper.from_dict(metadata)
    
    print(f"Found {len(metadata_files)} paper(s) in downloads folder")
    print(f"\nPaper: {paper.title}")
    print(f"Authors: {', '.join([a.name for a in paper.authors])}")
    print(f"Published: {paper.published.strftime('%B %Y')}")

## Find Extracted Content

Check if we have already extracted the PDF content for this paper.

In [None]:
if metadata_files:
    # Find corresponding extracted content file
    extracted_dir = Path("extracted_content")
    base_filename = paper.pdf_filename.replace(".pdf", "")
    extracted_path = extracted_dir / f"{base_filename}.md"
    
    if not extracted_path.exists():
        print(f"Extracted content not found at: {extracted_path}")
        print("Please extract the PDF content first using the pdf_extraction_example.ipynb notebook")
    else:
        print(f"Found extracted content at: {extracted_path}")
        
        # Show size of extracted content
        with open(extracted_path, 'r') as f:
            content = f.read()
        print(f"Content length: {len(content):,} characters")

## Initialize LLM Service

The service requires an `ANTHROPIC_API_KEY` environment variable to be set.

In [None]:
# Option 1: Use default settings (Anthropic provider, reads API key from env)
anthropic_api_key = os.getenv("anthropic_api_key")
provider = AnthropicProvider(model="claude-haiku-4-5", api_key=anthropic_api_key)
llm_service = LLMService(provider=provider)


print("LLM Service initialized successfully")

## Generate Summary

Now let's generate an audio-friendly summary of the paper.

In [None]:
if metadata_files and extracted_path.exists():
    print("Generating summary...\n")
    
    summary = llm_service.summarize_paper_from_files(
        paper=paper,
        extracted_content_path=str(extracted_path),
        prompt_name="summarize_paper",
        temperature=0.7,
        max_tokens=4096,
        output_dir="summaries"
    )
    
    print("GENERATED SUMMARY")
    print(summary)