In [1]:
# Libraries to install
# !pip install -U sentence-transformers
# !pip install wikipedia-api
# !pip install accelerate
# !pip install bitsandbytes

In [None]:
from wikipediaapi import Wikipedia
import numpy as np

# Grab text from wikipedia page.
wiki = Wikipedia("RAGBot/0.0", "en")

page = wiki.page("Pokemon")
print(page.text)

In [3]:
text_chunks = page.text.split("\n\n") # Split wall of text into chunks.

In [4]:
# Chunk overlapping. Ensures context is retained across chunks.
overlap_amount = 10
for i in range(1, len(text_chunks)):
    prev_string = text_chunks[i-1].split()
    overlap_words = np.array(prev_string)[len(prev_string)-overlap_amount::]
    text_chunks[i] = " ".join(overlap_words) + " " + text_chunks[i]

In [None]:
print(text_chunks[1])

In [None]:
from sentence_transformers import SentenceTransformer

sentence_encoder = SentenceTransformer("BAAI/bge-small-en-v1.5") # Load text encoder model.

In [None]:
chunks_embedding = sentence_encoder.encode(text_chunks, normalize_embeddings=True) # Convert text into ML understandable format. Normalize data so we can easily use dot product to find similarities.

In [8]:
user_input = "What was Ash's first pokemon?"
input_embedding = sentence_encoder.encode(user_input, normalize_embeddings=True) # Encode user input, normalize for dot product use.

In [9]:
similarities = np.dot(chunks_embedding, input_embedding.T) # Find the similarities between the two.

In [None]:
k = 5
top_k_probabiliies = np.sort(similarities)[::-1][::k] # Grab the top k highest similarity probability, just for us to see :^)
top_k_probabiliies

In [None]:
top_k_indices = np.argsort(similarities)[::-1][::k] # Grab the indices of the top k entries.
top_k_indices

In [12]:
top_k_paragraphs = np.array(text_chunks)[top_k_indices] # Grab the top k most similar paragraphs to our query.

In [None]:
for text in top_k_paragraphs:
    print("----------------------------------------------------------------\n")
    print(text)

In [14]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "Qwen/Qwen2-1.5B"
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto") # Load LLM
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast = True) # Load tokenizer correlating to LLM

In [None]:
# Set up the instruction, context, and user input string for the model.
context_string = "Context:\n\n"
for paragraph in top_k_paragraphs:
    context_string = context_string + paragraph + "\n\n"

instruction_strings = f"""
You are a personal assistant who specializes in a wide variety of topics. Communicate in a clear, accessible language, escalating to technical depth upon request.
  React to feedback aptly and end responses with '-Yours Truly'.

  You will tailor the length of its responses to match the viewer's comment, providing concise acknowledgments to brief expressions of gratitude or feedback,
thus keeping the interaction natural and engaging.
"""

prompt = lambda context, user_input: f"""
[INST]
{instruction_strings}

{context}

Please answer the following comment. Use the context above if it helps.

{user_input}
[\INST]

"""

model_input = prompt(context_string, "Who was Ash's first Pokemon?")

In [None]:
tokens = tokenizer(model_input, return_tensors="pt")
result = model.generate(input_ids = tokens["input_ids"], max_new_tokens = 200)