# Summary
This repo demonstrates how using the HuggingFaceInferenceAPI LLM when building a Property Graph with a SchemaLLMPathExtractor fails to extract entities where the same settings succeed when using Ollama.

Most of this code is taken directly from the example here, https://github.com/run-llama/llama_index/blob/main/docs/docs/examples/property_graph/property_graph_advanced.ipynb.



# Setup

In [None]:
%pip install llama-index
%pip install llama-index-llms-ollama
%pip install llama-index-embeddings-huggingface
%pip install llama-index-graph-stores-neo4j

In [None]:
# Load documents
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader("./data").load_data()

In [None]:
# Set up asyncio
import nest_asyncio

nest_asyncio.apply()

In [None]:
# Define schema and extractor

from typing import Literal
from llama_index.llms.ollama import Ollama
from llama_index.core.indices.property_graph import SchemaLLMPathExtractor

# best practice to use upper-case
entities = Literal["PERSON", "PLACE", "ORGANIZATION"]
relations = Literal["HAS", "PART_OF", "WORKED_ON", "WORKED_WITH", "WORKED_AT"]

# define which entities can have which relations
validation_schema = {
    "PERSON": ["HAS", "PART_OF", "WORKED_ON", "WORKED_WITH", "WORKED_AT"],
    "PLACE": ["HAS", "PART_OF", "WORKED_AT"],
    "ORGANIZATION": ["HAS", "PART_OF", "WORKED_WITH"],
}
validation_schema = [
    ("ORGANIZATION", "HAS", "PERSON"),
    ("PERSON", "WORKED_AT", "ORGANIZATION"),
    ("PERSON", "WORKED_WITH", "PERSON"),
    ("PERSON", "WORKED_ON", "ORGANIZATION"),
    ("PERSON", "PART_OF", "ORGANIZATION"),
    ("ORGANIZATION", "PART_OF", "ORGANIZATION"),
    ("PERSON", "WORKED_AT", "PLACE"),
]

##  Setup Neo 4j
I am running this locally using Neo4j Desktop on a Windows computer

In [None]:
import os
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore

password = os.getenv("NEO_4J_PASSWORD")

graph_store = Neo4jPropertyGraphStore(
    username="neo4j",
    password=password,
    url="bolt://localhost:7687",
)

# Create Index with Ollama

In [None]:
from llama_index.core import PropertyGraphIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

kg_extractor = SchemaLLMPathExtractor(
    llm=Ollama(model="llama3", json_mode=True, request_timeout=3600),
    possible_entities=entities,
    possible_relations=relations,
    kg_validation_schema=validation_schema,
    strict=True,
)

index = PropertyGraphIndex.from_documents(
    documents,
    kg_extractors=[kg_extractor],
    embed_model=HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
    property_graph_store=graph_store,
    show_progress=True,
)

## Ollama Results
This is a picture from Neo4j desktop using the query, MATCH(n:\`\_\_Entity\_\_\`) RETURN n

![alt text](pictures/ollama_schema.svg "Hugging Face Simple")

# Create Index with Huggingface Inference API

In [None]:
import os
from llama_index.core import PropertyGraphIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

token = os.getenv("HUGGING_FACE_KEY")

from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
kg_extractor = SchemaLLMPathExtractor(
    llm=HuggingFaceInferenceAPI(model_name="meta-llama/Meta-Llama-3-70B-Instruct", token=token),
    possible_entities=entities,
    possible_relations=relations,
    kg_validation_schema=validation_schema,
    strict=True,
)

index = PropertyGraphIndex.from_documents(
    documents,
    kg_extractors=[kg_extractor],
    embed_model=HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
    property_graph_store=graph_store,
    show_progress=True,
)

## Hugging Face Inference API Results
This is a picture from Neo4j desktop using the query, MATCH(n) RETURN n

You can see that no entities are extracted, only chunks.


![alt text](pictures/hf_schema.svg "Hugging Face Schema")

# Create Index with Hugging Face Inference API and SimpleLLMPathExtractor
As a bonus, here is a demo of the inference api working with the simple llm path extractor

In [None]:
import os
from llama_index.core import PropertyGraphIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.indices.property_graph import SimpleLLMPathExtractor

token = os.getenv("HUGGING_FACE_KEY")

from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
kg_extractor = SimpleLLMPathExtractor(
    llm=HuggingFaceInferenceAPI(model_name="meta-llama/Meta-Llama-3-70B-Instruct", token=token),
    max_paths_per_chunk=10,
    num_workers=4,
)

index = PropertyGraphIndex.from_documents(
    documents,
    kg_extractors=[kg_extractor],
    embed_model=HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
    property_graph_store=graph_store,
    show_progress=True,
)

## Hugging Face Inference API Results with Simple LLM Path Extractor
This is a picture from Neo4j desktop using the query, MATCH(n:`__Entity__`) RETURN n


![alt text](pictures/hf_simple.svg "Hugging Face Simple")