In [17]:
import torch
import os

import dotenv
dotenv.load_dotenv()

TAVILY_KEY = os.getenv("TAVILY_API_KEY")
HF_KEY = os.getenv("HF_API_KEY")
LC_KEY = os.getenv("LANGCHAIN_API_KEY")
WEAVIATE_KEY = os.getenv("WEAVIATE_API_KEY")
WEAVIATE_URL = os.getenv("WEAVIATE_URL")

import warnings
warnings.filterwarnings("ignore")

In [18]:
from typing import Literal
from tavily import TavilyClient
from duckduckgo_search import AsyncDDGS, DDGS

SEARCH_ENGINE = Literal["tavily", "ddgs", "both"]

def search_ddgs(query:str, max_results:int=2) -> list:
    ddgs = DDGS()
    results = None
    
    try:
        results = ddgs.text(keywords=query, max_results=max_results)
        results = [{'url': result['href'], 'title': result['title'], 'content': result['body']} for result in results]
    except Exception as e:
        print("Error:", e)
        print('Duck Duck Go search is unavailable now, using tavily')
        results = search_tavily(query, max_results)
        
    return results

def search_tavily(query:str, max_results:int=2) -> list:
    results = None
    
    try:
        tavily_client = TavilyClient(api_key=TAVILY_KEY)
        results = tavily_client.search(query=query, max_results=max_results, search_depth='basic')['results']
    except Exception as e:
        print("Error:", e)
        
    return results

def search_web(query:str, search_engine:SEARCH_ENGINE, max_results:int=2) -> dict:
    results = {} 
    
    assert search_engine in ["tavily", "ddgs", "both"], "Invalid search engine"
     
    if search_engine == "tavily":
        results['results'] = search_tavily(query, max_results)
        
    elif search_engine == "ddgs":
        results['results'] = search_ddgs(query, max_results)
        
    elif search_engine == "both":
        results['results'] = [*search_tavily(query, max_results), *search_ddgs(query, max_results)]
     
    return results 

In [19]:
def collapse_results(results:dict) -> list:
    return ['title:' + result['title'] + '\nContent' + result['content'] for result in results['results']]

## Vector DB: Weaviate

In [20]:
from sentence_transformers import SentenceTransformer

vect_model = SentenceTransformer('all-MiniLM-L6-v2', device='cuda')

In [21]:
import json
import requests
import weaviate
import weaviate.classes as wvc
from weaviate.auth import AuthApiKey

def search_and_create_embedding(query:str):
    client = None

    try:
        client = weaviate.connect_to_wcs(
            WEAVIATE_URL, 
            auth_credentials=AuthApiKey(WEAVIATE_KEY),
            additional_config=wvc.init.AdditionalConfig(timeout=wvc.init.Timeout(init=360)),
            skip_init_checks=True
        )
        print(client.schema.get())
        if client.is_ready():
            print("Connection to Weaviate established")
            research_docs = None
            
            if not client.collections.exists("ResearchDocs"):
                research_docs = client.collections.create(
                    name='ResearchDocs',
                    vectorizer_config=wvc.config.Configure.Vectorizer.none(),
                )
            
            else: 
                research_docs = client.collections.get('ResearchDocs')
                
                results = search_web(query=query, search_engine="both", max_results=2)
                print("Search results:", len(results['results']))

                docs = collapse_results(results)
                print("Docs:", len(docs))
                emb = (vect_model.encode(docs)).tolist()

                wvc_data_objects = list()
                for i, (d, e) in enumerate(zip(docs, emb)):
                    wvc_data_objects.append(
                        wvc.data.DataObject(
                            properties={
                                "title": results['results'][i]['title'],
                                "content": results['results'][i]['content'],
                            },
                            vector=e
                        )
                    )
                research_docs.data.insert_many(wvc_data_objects)
                
    except Exception as e:
        print("Error:", e)

    finally:
        client.close()

def search_weaviate(query:str):
    results = {}
    
    try:
        client = weaviate.connect_to_wcs(WEAVIATE_URL, auth_credentials=AuthApiKey(WEAVIATE_KEY))

        if client.is_ready():
            print("Connection to Weaviate established")

            if client.collections.exists("ResearchDocs"):
                research_docs = client.collections.get("ResearchDocs")
                query_vector = (vect_model.encode(query)).tolist()

                results = research_docs.query.near_vector(
                    near_vector=query_vector,
                    limit=2,
                    return_metadata=wvc.query.MetadataQuery(certainty=True)
                )
                client.close()
                return results.objects[1].properties

            client.close()
        else:
            raise Exception("Weaviate connection failed")
        
        assert results == {}, "No results found"
        raise AssertionError('Error occurred during data fetch')

    except Exception as e:
        print("Error:", e)

In [23]:
search_weaviate('Generating audio using deep learning models')

Connection to Weaviate established


{'title': 'Detection Stays One Step Ahead of Deepfakes—for Now',
 'content': 'In November, Intel announced its Real-Time Deepfake Detector, a platform for analyzing videos. (The term “deepfake” derives from the use of deep learning—an\xa0...'}

In [34]:
from langchain_weaviate.vectorstores import WeaviateVectorStore
from langchain_huggingface import HuggingFaceEmbeddings

hf_embedding = HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2', model_kwargs={'device':'cuda'})

w_client = weaviate.connect_to_wcs(
    cluster_url=WEAVIATE_URL, 
    auth_credentials=AuthApiKey(WEAVIATE_KEY), 
    additional_config=wvc.init.AdditionalConfig(
        timeout=wvc.init.Timeout(init=360)
    )
)
db = WeaviateVectorStore(client=w_client, embedding=hf_embedding, index_name='ResearchDocs', text_key='content')

In [36]:
query = "tell me about deep learning"
docs = db.similarity_search(query)

# Print the first 100 characters of each result
for i, doc in enumerate(docs):
    print(f"\nDocument {i+1}:")
    print(doc.page_content[:100] + "...")


Document 1:
In November, Intel announced its Real-Time Deepfake Detector, a platform for analyzing videos. (The ...

Document 2:
DeepFake Detection. 158 papers with code • 8 benchmarks • 19 datasets. DeepFake Detection is the tas...

Document 3:
Detect Fakes is a research project that aims to counteract misinformation created by AI, such as Dee...

Document 4:
Detect DeepFakes: How to counteract misinformation created by AI
Creative Commons
Attribution 4.0 In...


In [37]:
retriever = db.as_retriever()

In [38]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True
)

model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-128k-instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
    quantization_config=bnb_config
)

# model_id = 'google/gemma-2b-it'
model_id = 'microsoft/Phi-3-mini-128k-instruct'

tokenizer = AutoTokenizer.from_pretrained(model_id, max_new_tokens=128, skip_special_tokens=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [5]:
tokenizer.chat_template

"{% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}"

In [7]:
inputs = tokenizer("""<|system|>
You are a helpful assistant.<|end|>
<|user|>
How to explain Internet for a medieval knight?<|end|>
<|assistant|> """, return_tensors="pt")
inputs = inputs.to('cuda')
outputs = model.generate(**inputs, max_length=64)
decoded = tokenizer.decode(outputs[0])

The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.
You are not running the flash-attention implementation, expect numerical differences.


In [8]:
decoded

"<|system|> You are a helpful assistant.<|end|><|user|> How to explain Internet for a medieval knight?<|end|><|assistant|> To explain the Internet to a medieval knight, you would need to use analogies and descriptions that relate to their world and understanding. Here's a possible explanation:\n\nImagine a vast network of"

In [39]:
from transformers import pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=128)
messages = [
    {
        "role": "system",
        "content": "You are a friendly chatbot who always responds in the style of a pirate",
    },
    {
        "role": "user", 
        "content": "How many helicopters can a human eat in one sitting?"
    }
]

output = pipe(messages, max_new_tokens=128)[0]['generated_text'][-1]['content']
print(output)

The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.
You are not running the flash-attention implementation, expect numerical differences.


 Arr matey, I be tellin' ye, a human be not built for eatin' helicopters, nor any other flying contraption for that matter. 'Tis a jest, I assure ye, for no soul should ever attempt such a feat. The only way to truly enjoy a helicopter is to admire its design and the skilled hands that craft it, not to consume it. So, I'd say the answer be zero, and I hope ye had a hearty laugh at the thought!


In [11]:
tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)

'<|system|>\nYou are a friendly chatbot who always responds in the style of a pirate<|end|>\n<|user|>\nHow many helicopters can a human eat in one sitting?<|end|>\n<|endoftext|>'

In [40]:
from langchain_huggingface import HuggingFacePipeline, ChatHuggingFace
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage

llm = HuggingFacePipeline(
    pipeline=pipe,
)

In [41]:
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import HumanMessagePromptTemplate, SystemMessagePromptTemplate, AIMessagePromptTemplate
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

user_turn = "<|user|>"
system_turn = "<|system|>"
assistant_turn = "<|assistant|>"
end_turn = "<|end|>"

### Prompt Template

# General Prompt Template
general_prompt_template = """<|system|>\n You are a helpful research assistant. You have to answer the questions \
by summarizing or deriving useful information from provided data. You must be careful and concise about the answers.<|end|>
<|user|>\n Here is the question:{question} and the provided data:{data}.<|end|>
<|assistant|>\n"""

PROMPT = PromptTemplate(template=general_prompt_template, input_variables=["question", "data"])
#PROMPT.format(system_turn=system_turn, user_turn=user_turn, assistant_turn=assistant_turn, end_turn=end_turn)

# Chat Prompt Template
chat_prompt_template = """{system_turn}\n You are a friendly chatbot who always responds in the style of a pirate.{end_turn}
{user_turn}\n {question} {assistant_turn}\n
"""
chat_prompt = ChatPromptTemplate.from_template(template=chat_prompt_template)

In [42]:
chain = PROMPT | llm | StrOutputParser() |  (lambda x: (x.split("<|assistant|>")[-1]).strip())

In [43]:
res = chain.invoke({'question': 'tell me about deep learning', 'data':retriever})

In [44]:
res

'Deep learning is a subset of machine learning that uses neural networks with multiple layers to model and understand complex patterns in data. It is particularly effective in tasks such as image and speech recognition, natural language processing, and predictive analytics.\n\nIn the provided data, there are two main components:\n\n1. WeaviateVectorStore: This is an object from the langchain_weaviate.vectorstores module, which likely represents a vector store that uses the Weaviate vector database to store and retrieve vector embeddings. Weaviate is a scalable, real-time graph database that can store,'