## Chatbot using langchain and huggingface

In [2]:
!pip install langchain langchain-huggingface huggingface_hub pinecone sentence-transformers -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m421.9/421.9 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m112.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m80.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m51.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
import os
import warnings
from getpass import getpass
from langchain_huggingface import HuggingFaceEndpoint
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from sentence_transformers import SentenceTransformer
from pinecone import Pinecone, ServerlessSpec

warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.utils._deprecation")
HUGGINGFACEHUB_API_TOKEN = getpass("Enter your Hugging Face token: ")
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN

PINECONE_API_KEY = getpass("Enter your Pinecone API key: ")
pc = Pinecone(api_key=PINECONE_API_KEY)

model_id = "Qwen/QwQ-32B"
conv_model = HuggingFaceEndpoint(
    repo_id=model_id,
    huggingfacehub_api_token=os.environ["HUGGINGFACEHUB_API_TOKEN"],
    temperature=0.8,
    model_kwargs={"max_length": 300},
    task="text-generation"
)

embedder = SentenceTransformer('all-MiniLM-L6-v2')

Enter your Hugging Face token: ··········
Enter your Pinecone API key: ··········


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [10]:
index_name = "chat-history"
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="gcp", region="us-central1")
    )
index = pc.Index(index_name)

template = """You are an explainable and creative AI assistant that explains the text that is given to you. You answer the questions that are asked of you about any topic.
Below is the conversation history so far:
{history}
Explain this text with at least 100 words: {query}"""

prompt = PromptTemplate(input_variables=["history", "query"], template=template)

conv_chain = prompt | conv_model

def store_in_pinecone(query, response, message_id):
    combined_text = f"User: {query}\nAI: {response}"
    embedding = embedder.encode(combined_text).tolist()
    index.upsert([(message_id, embedding, {"text": combined_text, "query": query, "response": response})])

def get_recent_history():
    stats = index.describe_index_stats()
    total_vectors = stats.get("total_vector_count", 0)
    if total_vectors == 0:
        return "No conversation history yet!"

    # Calculate the range of the last 5 message IDs (or fewer if total_vectors < 5)
    start_id = max(1, total_vectors - 4)  # Ensure we don’t go below 1
    recent_ids = [f"msg_{i}" for i in range(start_id, total_vectors + 1)]

    # Fetch the vectors by their IDs
    results = index.fetch(ids=recent_ids)

    # Extract and sort the history by ID (most recent last)
    history_entries = []
    for msg_id in recent_ids:
        if msg_id in results.vectors:  # Use results.vectors instead of results["vectors"]
            history_entries.append(results.vectors[msg_id].metadata["text"])  # Access attributes

    # Join up to 5 most recent entries
    history = "\n".join(history_entries[-5:])  # Take the last 5 if more exist
    return history if history else "No recent conversation history available."

def delete_stale_history(last_deleted, keep_recent=5):
  stats = index.describe_index_stats()
  total_vectors = stats.get("total_vector_count", 0)
  if total_vectors == 0:
    return last_deleted
  if total_vectors <= keep_recent:
    return last_deleted
  cutoff_id = total_vectors - keep_recent
  if last_deleted > cutoff_id:
    return last_deleted
  stale_ids = [f"msg_{i}" for i in range(last_deleted+1, cutoff_id+1)]
  if stale_ids:
    index.delete(ids=stale_ids)
    return cutoff_id
  else:
    return last_deleted


In [11]:
last_deleted = 0

stats = index.describe_index_stats()
total_vectors = stats.get("total_vector_count", 0)
msg_id = total_vectors + 1
while True:
    query = input("You: ")
    if query.lower() in ["exit", "quit"]:
        print("Goodbye!")
        break
    elif query.lower() == "clear stale":
        last_deleted = delete_stale_history(last_deleted, keep_recent=5)
        print(f"Stale history cleared. Last deleted ID: {last_deleted}")
    else:
        history = get_recent_history()  # Updated here
        response = conv_chain.invoke({"query": query, "history": history})
        print("ChatBot: ", response)
        store_in_pinecone(query, response, f"msg_{msg_id}")
        msg_id += 1

You: Stable Diffusion
ChatBot:   (SD) 3.0 is a major update to the Stable Diffusion framework. The backbone architecture has been entirely redesigned from the previous UNet-based model to a Rectified Flow Transformer, which implements the rectified flow method through a Transformer architecture. This new structure, called the Multimodal Diffusion Transformer (MMDiT), consists of three distinct tracks within the Transformer: one for original text encoding, another for transformed text encoding, and a third for image encoding in the latent space. During each Transformer block, the transformed text encoding and image encoding are dynamically mixed, enabling bidirectional influence between textual and visual information. This contrasts with earlier versions of the DiT architecture where the text encodings only influenced the image generation process unidirectionally. By integrating both modalities within the same Transformer framework, MMDiT allows for more coherent and contextually releva