In [None]:
%pip install llama-index-extractors-entity

In [None]:
%pip install nest_asyncio

In [None]:
import nest_asyncio
nest_asyncio.apply()

In [None]:
from llama_index.core import Settings
from llama_index.llms.ollama import Ollama

Settings.llm = Ollama(
    model='llama3.2:latest',
    base_url='http://localhost:11434',
    temperature=0.1
)


In [None]:
from llama_index.core import SimpleDirectoryReader

#documents = SimpleDirectoryReader(input_files=['../data/2022 Q3 AAPL.pdf']).load_data()
documents = SimpleDirectoryReader(input_files=['../data/paul_graham_essay3.txt']).load_data()


In [None]:
from llama_index.core.extractors import SummaryExtractor
from llama_index.extractors.entity import EntityExtractor



# Initialize extractors
summary_extractor = SummaryExtractor(nodes=5)  # Extract summaries for 5 nodes
entity_extractor = EntityExtractor(entity_types=["PERSON", "ORG", "GPE"])  # Extract specific entity types


In [None]:
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=1024, chunk_overlap=50),  # Split text into manageable chunks
        summary_extractor,  # Extract summaries
    ]
)


In [None]:
from llama_index.core import Document

test_document = Document(text="LlamaIndex simplifies metadata extraction.")
summary_extractor.aprocess_nodes([test_document])
entity_extractor.aprocess_nodes([test_document])

In [None]:
nodes = pipeline.run(
    documents=documents,
    in_place=True,
    show_progress=True
)


In [None]:
nodes

In [None]:
for node in nodes:
    print(f"Node ID: {node.node_id}")
    print(f"Summary: {node.metadata.get('section_summary', 'No summary available')}")
    print("======================================================")
