In [35]:
import os
from llama_index.core import SimpleDirectoryReader
from llama_index.core import VectorStoreIndex
from dotenv import load_dotenv
import numpy as np
from openai import OpenAI
from llama_index.readers.wikipedia import WikipediaReader
from llama_index.core import Settings
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.openai import OpenAI
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.core.postprocessor import MetadataReplacementPostProcessor


load_dotenv()

True

In [54]:
reader = SimpleDirectoryReader(input_dir="Data/")

wikireader = WikipediaReader()
wiki_docs = wikireader.load_data(pages=['Rules of basketball', 'Basketball'])
docs = reader.load_data()

docs.extend(wiki_docs)

node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

# base node parser is a sentence splitter
text_splitter = SentenceSplitter()

llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)


Settings.llm = llm
Settings.text_splitter = text_splitter

nodes = node_parser.get_nodes_from_documents(docs)

# index = VectorStoreIndex.from_documents(docs)
index = VectorStoreIndex(nodes)

In [40]:
# print(f"Loaded {len(docs)} docs")


query_engine = index.as_query_engine(similarity_top_k=5, 
                                     node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.75),
                                                         MetadataReplacementPostProcessor(target_metadata_key="window")])

In [55]:
query = 'How old do you have to be to play in the NBA?'

response = query_engine.query(query)

In [56]:
print(response.response)
print()
for node in response.source_nodes:
    print(node)

To play in the NBA, a player must be at least 19 years old during the calendar year of the draft and at least one NBA season has passed since the player's graduation from high school.

Node ID: 31602817-21ec-40c9-b0d3-37976e021014
Text: Coaching was originally prohibited during the game, but from
1949, coaches were allowed to address players during a time-out.
Originally a player was disqualified on his second foul.  This limit
became four fouls in 1911 and five fouls in 1945, still the case in
most forms of basketball where the normal length of the game (before
any overtime p...
Score:  0.824

Node ID: 64d56f5a-7b77-474c-8fde-869b80bbfc3e
Text: The rules of basketball are the rules and regulations that
govern the play, officiating, equipment and procedures of basketball.
While many of the basic rules are uniform throughout the world,
variations do exist.  Most leagues or governing bodies in North
America, the most important of which are the National Basketball
Association and NCAA, fo