In [None]:
%pip install nest_asyncio

In [None]:
import nest_asyncio
nest_asyncio.apply()

In [None]:
from llama_index.core import Settings
from llama_index.llms.ollama import Ollama

Settings.llm = Ollama(
    model='llama3.2:latest',
    base_url='http://localhost:11434',
    temperature=0.1
)


In [None]:
from llama_index.core import Document

test_document = Document(text="""
                        LLMs offer a natural language interface between humans and data. LLMs come pre-trained on huge amounts of publicly available data, but they are not trained on your data. Your data may be private or specific to the problem you're trying to solve. It's behind APIs, in SQL databases, or trapped in PDFs and slide decks.
                        Context augmentation makes your data available to the LLM to solve the problem at hand. LlamaIndex provides the tools to build any of context-augmentation use case, from prototype to production. Our tools allow you to ingest, parse, index and process your data and quickly implement complex query workflows combining data access with LLM prompting.
                        The most popular example of context-augmentation is Retrieval-Augmented Generation or RAG, which combines context with LLMs at inference time.
                         """)
# Extract metadata
summary_result = await summary_extractor.aprocess_nodes([test_document])

In [None]:
summary_result

In [None]:
# Print the results
print("Summary Output:")
for node in summary_result:
    print(f"Node ID: {node.node_id}, Summary: {node.metadata.get('section_summary', 'No summary available')}")


In [None]:
from llama_index.core import SimpleDirectoryReader

#documents = SimpleDirectoryReader(input_files=['../data/2022 Q3 AAPL.pdf']).load_data()
documents = SimpleDirectoryReader(input_files=['../data/paul_graham_essay3.txt']).load_data()

In [None]:
from llama_index.core.extractors import SummaryExtractor



# Initialize extractors
summary_extractor = SummaryExtractor(nodes=5)  # Extract summaries for 5 nodes


In [None]:
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=1024, chunk_overlap=50),  # Split text into manageable chunks
        summary_extractor,  # Extract summaries
    ]
)


In [None]:
nodes = pipeline.run(
    documents=documents,
    in_place=True,
    show_progress=True
)


In [None]:
nodes

In [None]:
for node in nodes:
    print(f"Node ID: {node.node_id}")
    print(f"Summary: {node.metadata.get('section_summary', 'No summary available')}")
    print("======================================================")
