In [1]:
import os
from llama_index.core import SimpleDirectoryReader
from llama_index.core import VectorStoreIndex
from dotenv import load_dotenv
import numpy as np
from openai import OpenAI
from llama_index.readers.wikipedia import WikipediaReader
from llama_index.core import Settings
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.openai import OpenAI
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.core.postprocessor import MetadataReplacementPostProcessor
from llama_index.core import Settings
import torch
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core import PromptTemplate
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.service_context import ServiceContext


load_dotenv()

True

In [2]:
import torch 

print(torch.backends.mps.is_available()) #the MacOS is higher than 12.3+
print(torch.backends.mps.is_built()) #MPS is activated

True
True


In [3]:
from huggingface_hub import login

login(os.getenv('HUGGING_FACE_API_KEY'))

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /Users/user/.cache/huggingface/token
Login successful


In [10]:
llama = True

if llama:
    # Model names (make sure you have access on HF)
    LLAMA2_7B = "meta-llama/Llama-2-7b-hf"
    LLAMA2_7B_CHAT = "daryl149/llama-2-7b-chat-hf"
    LLAMA2_7B_CHAT = "openlm-research/open_llama_3b_v2"
    LLAMA2_7B_CHAT = "TinyLlama/TinyLlama-1.1B-step-50K-105b"
    LLAMA2_13B = "meta-llama/Llama-2-13b-hf"
    LLAMA2_13B_CHAT = "meta-llama/Llama-2-13b-chat-hf"
    LLAMA2_70B = "meta-llama/Llama-2-70b-hf"
    LLAMA2_70B_CHAT = "meta-llama/Llama-2-70b-chat-hf"

    selected_model = LLAMA2_7B_CHAT

    SYSTEM_PROMPT = """You are an AI assistant that answers questions in a friendly manner, based on the given source documents. Here are some rules you always follow:
    - Generate human readable output, avoid creating output with gibberish text.
    - Generate only the requested output, don't include any other language before or after the requested output.
    - Never say thank you, that you are happy to help, that you are an AI agent, etc. Just answer directly.
    - Generate professional language typically used in business documents in North America.
    - Never generate offensive or foul language.
    """

    query_wrapper_prompt = PromptTemplate(
        "[INST]<<SYS>>\n" + SYSTEM_PROMPT + "<</SYS>>\n\n{query_str}[/INST] "
    )

    llm = HuggingFaceLLM(
        # context_window=350,
        # max_new_tokens=256,
        generate_kwargs={"temperature": 0.1, "do_sample": False},
        # query_wrapper_prompt=query_wrapper_prompt,
        tokenizer_name=selected_model,
        model_name=selected_model,
        device_map="auto",
        # change these settings below depending on your GPU
        model_kwargs={"torch_dtype": torch.float16},
    )
    
    embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

else:
    
    llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
    



In [11]:
reader = SimpleDirectoryReader(input_dir="Data/")

wikireader = WikipediaReader()
wiki_docs = wikireader.load_data(pages=['Rules of basketball', 'Basketball'])
docs = reader.load_data()

docs.extend(wiki_docs)

node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

# base node parser is a sentence splitter
text_splitter = SentenceSplitter()
nodes = node_parser.get_nodes_from_documents(docs)


if llama:
    service_context = ServiceContext.from_defaults(
                                    llm=llm,
                                    embed_model="local",
                                    text_splitter=text_splitter,
                                    context_window=2048,
                                    num_output=1024,
                                    )
    
    index = VectorStoreIndex(nodes, service_context=service_context)


else:
    index = VectorStoreIndex(nodes)


  service_context = ServiceContext.from_defaults(


In [12]:
# print(f"Loaded {len(docs)} docs")


query_engine = index.as_query_engine(similarity_top_k=5, 
                                     node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.75),
                                                         MetadataReplacementPostProcessor(target_metadata_key="window")])

In [13]:
query = 'How many fauls for a NBA team to get to the bonus? Explain what is bonus.'

response = query_engine.query(query)

In [14]:
print(response.response)
print()
for node in response.source_nodes:
    print(node)


1 player is- or or the is or the or ball.
players.
players.
1 player is or1 player.
-1 player.0.
1 only.
10 ball.





the 1 player. is the 1.
12
1 ball and the player1

4.
.
 player.
1  the player4














.
2
Pur player
Player
112.1-the

 1.
1.
.
1.

.
1.
1.




.
1














The.









.
1















































































Node ID: 18a0fa9c-d822-4ad1-8560-94b4e826a80c
Text: Any defensive player, who is positioned in the 16-foot lane or
the area extending 4 feet past the lane endline, must be actively
guarding an opponent within three seconds.   Actively guarding means
being within arm’s length of an offensive player and in a guarding
position.  Any defensive player may play any offensive player.  The
defenders may ...
Score:  0.854

Node ID: 64f488ed-a7d7-4988-9480-5b2bdbf86d54
Text: The 3-second count shall not begin until the ball is in control
in the offensive team’s frontcourt.  No violation can occur if the
ball is batte

In [9]:
print(response.response)
print()
for node in response.source_nodes:
    print(node)


1 or.
1,010012s (or.
1.0.

1

0011,1 at the 2.1,end.
1:41:10 (or.
1.


.0:s ( or the Pos, ,1:,1:.
1. 0.1.
00
10:step.14.
1.1.
.1.










1.1.0.
1. 11.1, 1.1a:1 player the 1.1.1.
.

11.111.
:
111011..
1011111.1.:11. 11.
.11.1.11








01.10-11.11:1

1.111.111.11.1.1.
_10.10.1.111111111

Node ID: dc643f60-599f-4a3a-82e7-22f07b1d2f28
Text: Any defensive player, who is positioned in the 16-foot lane or
the area extending 4 feet past the lane endline, must be actively
guarding an opponent within three seconds.   Actively guarding means
being within arm’s length of an offensive player and in a guarding
position.  Any defensive player may play any offensive player.  The
defenders may ...
Score:  0.854

Node ID: d8286239-5834-485d-81fc-0e033a597e93
Text: Section XII—Illegal Assist in Scoring  A player may not assist
himself in an attempt to score by using any part of the rim, net,
backboard or basket support to lift, hold or raise himself.  A player
may not assist a teammate to gain heigh