In [3]:
!pip install accelerate
!pip install ctransformers
!pip install langchain
# !pip install weaviate-client

from IPython.display import clear_output
clear_output()

In [1]:
import weaviate
import json

# auth_config = weaviate.AuthApiKey(api_key="YOUR-WEAVIATE-API-KEY")

client = weaviate.Client(
    url="https://genai-case-study-7d99r5vj.weaviate.network",
    # auth_client_secret=auth_config,
)

In [None]:
print('is_ready:', client.is_ready())

In [3]:
from transformers import DPRContextEncoder, DPRContextEncoderTokenizer

def generate_embeddings(text):
  tokenizer = DPRContextEncoderTokenizer.from_pretrained(
      "facebook/dpr-ctx_encoder-single-nq-base"
  )
  model = DPRContextEncoder.from_pretrained(
      "facebook/dpr-ctx_encoder-single-nq-base"
  )
  input_ids = tokenizer(text, return_tensors="pt")["input_ids"]
  embeddings = model(input_ids).pooler_output
  return embeddings

In [None]:
query = "How to install the air filter?"
query_vector = generate_embeddings(query)

In [5]:
result = client.query.get("DocumentSearch", ["source_text"]).with_limit(5).with_near_vector({
    "vector": query_vector,
    "certainty": 0.7
}).do()

In [6]:
output = result['data']['Get']["DocumentSearch"]

In [7]:
for i in output:
  try:
    text = i["source_text"]
    i["source_text"] = text.split(" [META] ")[0]
    i["start_index"] = int(text.split(" [META] ")[1].split(":")[1])
    i["end_index"] = i["start_index"] + len(i["source_text"])
  except:
    print(i)
    break

In [8]:
def overlap_merge(chunk1, chunk2):
  """
  De-duplicate overlapping chunks.

  Args:
      chunk1 (str): first chunk or content.
      chunk2 (str): second chunk or content.

  Returns:
      str: merged chunk.
      int: status.
  """
  if (chunk1['end_index']>chunk2['start_index']):
      new_chunk = chunk1
      new_chunk['source_text'] = chunk1['source_text'][:chunk1['end_index']] + chunk2['source_text'][chunk1['end_index']:]
      new_chunk['end_index'] = chunk2['end_index']
      return new_chunk, 1
  else:
      return chunk2, 0

In [9]:
from itertools import groupby
from operator import itemgetter

def chunk_dedup(top_k_docs):
  """
  Check and De-duplicate overlapping chunks.

  Args:
      top_k_docs (tuple): top k retrieved documents.

  Returns:
      tuple: top k retrieved docs de-duplicated.
  """
  chunk_list = sorted(top_k_docs, key=lambda x: x['start_index'])

  final_chunk_list = []

  for j in range(1,len(chunk_list)):
      chunk_list[j],res = overlap_merge(chunk_list[j-1],chunk_list[j])

      if res == 0:
          final_chunk_list.append(chunk_list[j-1])

  final_chunk_list.append(chunk_list[j])

  return final_chunk_list

In [10]:
context = ""
for i in chunk_dedup(output):
  context = context + i["source_text"]

In [11]:
prompt = """You are a helpful chatbot use the following context to answer the question asked by the user. If you don't know the answer reply with "I don't know"
Context: {}
Question: {}
Answer:"""

In [12]:
input = prompt.format(context, query)

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch

model = "tiiuae/falcon-7b-instruct"

tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

In [None]:
sequences = pipeline(
    input,
    max_length=200,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
)
for seq in sequences:
    print(f"Result: {seq['generated_text'].split('Answer:')[1]}")


In [15]:
from langchain.llms import CTransformers

llm = CTransformers(
            model="/content/drive/MyDrive/GenAI/llama-2-7b-chat.ggmlv3.q4_0.bin",
            model_type="llama",
            max_new_tokens=200,
            temperature=0.5,
            context_length = 2048
        )

In [None]:
llm(input)