# Load depedencies

## Install depedencies

In [1]:
import os

# ✅ Disable parallelism for Hugging Face Tokenizers to prevent deadlocks
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
%%capture
%pip install -U langchain langchainhub langchain_community langchain-huggingface
%pip install -U faiss-gpu transformers accelerate requests pandas

In [3]:
import os
from kaggle_secrets import UserSecretsClient

# ✅ Retrieve API key securely from Kaggle Secrets
user_secrets = UserSecretsClient()
GROQ_API_KEY = user_secrets.get_secret("GROQ_API_KEY")  # 🔴 Ensure this is set in Kaggle Secrets

# ✅ Ensure API Key is set
if not GROQ_API_KEY:
    raise ValueError("❌ API Key is missing! Set GROQ_API_KEY in Kaggle Secrets.")

# ✅ Print partial key to confirm it's loaded
print("🔑 API Key Loaded:", GROQ_API_KEY[:5] + "****")  # Only shows first 5 characters for security


🔑 API Key Loaded: gsk_d****


## Import depedencies

In [4]:
%%time

import torch
import warnings
warnings.filterwarnings("ignore")

# ✅ LangChain Components
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_huggingface import HuggingFacePipeline

# ✅ Hugging Face Transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# ✅ Pandas for CSV Handling
import pandas as pd

# ✅ Clear output (Optional)
from IPython.display import clear_output
clear_output()

# ✅ Load CSV file
dataset_path = "/kaggle/input/faq-toyota/FAQ Toyota(Sheet1)-2.csv"

# ✅ Load DataFrame with flexible encoding
df = None
for encoding in ['utf-8-sig', 'latin1', 'ISO-8859-1', 'utf-16']:
    try:
        df = pd.read_csv(dataset_path, encoding=encoding)
        print(f"✅ Successfully loaded CSV using encoding: {encoding}")
        break
    except Exception as e:
        print(f"❌ Failed with encoding {encoding}: {str(e)}")

# ✅ Ensure DataFrame is loaded
if df is None or df.empty:
    raise ValueError("❌ Failed to load CSV file. Try uploading a different format.")

# ✅ Convert CSV data into LangChain Documents
from langchain.docstore.document import Document
documents = [Document(page_content=str(row)) for row in df['answer'].dropna().tolist()]

# ✅ Initialize FAISS Vector Database
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_db = FAISS.from_documents(documents, embeddings)

print("✅ FAISS vector search initialized!")


❌ Failed with encoding utf-8-sig: 'utf-8' codec can't decode byte 0x92 in position 2680: invalid start byte
✅ Successfully loaded CSV using encoding: latin1




modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

✅ FAISS vector search initialized!
CPU times: user 11.6 s, sys: 2.77 s, total: 14.4 s
Wall time: 27.5 s


## Hugging Face login

In [5]:
# from huggingface_hub import login
# from kaggle_secrets import UserSecretsClient
# user_secrets = UserSecretsClient()
# secret_value_0 = user_secrets.get_secret("hug")
# login(token = secret_value_0)

# Load Dataset

In [6]:
def ask_groq(question, max_tokens=256, max_retries=3, retry_delay=2):
    """
    Sends a query to Groq AI and returns a conversational response.

    Parameters:
        question (str): The user's input.
        max_tokens (int): Maximum length of the response.
        max_retries (int): Number of retries in case of API failure.
        retry_delay (int): Seconds to wait before retrying.

    Returns:
        str: A conversational, relevant response from Groq API.
    """

    headers = {
        "Authorization": f"Bearer {GROQ_API_KEY}",
        "Content-Type": "application/json"
    }

    data = {
        "model": "mixtral-8x7b-32768",
        "messages": [
            {"role": "system", "content": "You are a helpful assistant who can answer any question, engage in casual conversations, and provide fact-based responses in a friendly and natural way."},
            {"role": "user", "content": question}
        ],
        "max_tokens": max_tokens
    }

    for attempt in range(max_retries):
        try:
            print(f"📤 Sending request to Groq API (Attempt {attempt+1}/{max_retries})...")
            response = requests.post(GROQ_API_URL, headers=headers, json=data, timeout=20)
            response_json = response.json()

            print(f"📩 Received response: {response.status_code}")
            print("📩 Full API Response:", response_json)  # Debugging: Full response print

            if response.status_code == 200:
                return response_json.get("choices", [{}])[0].get("message", {}).get("content", "⚠️ No valid response from Groq API.")
            elif response.status_code == 401:
                return "❌ Error 401: Unauthorized. Check your API key."
            elif response.status_code == 403:
                return "❌ Error 403: Forbidden. You might not have API access."
            elif response.status_code == 404:
                return "❌ Error 404: Invalid API URL. Check if Groq API URL has changed."
            elif response.status_code == 429:
                return "❌ Error 429: Too many requests. Try again later."
            elif response.status_code >= 500:
                print(f"⚠️ Server error ({response.status_code}). Retrying...")
            else:
                return f"❌ Error {response.status_code}: {response.text}"

        except requests.exceptions.Timeout:
            print(f"⏳ Request timed out. Retrying in {retry_delay} seconds...")
        except requests.exceptions.RequestException as e:
            print(f"❌ API Request failed: {e}")
            return f"❌ API Request failed: {e}"

        time.sleep(retry_delay)

    return "❌ Failed after multiple attempts. Groq API may be down."


In [7]:
alpaca_loader = df

# Load Embedding model

In [8]:
# Define the path to the pre-trained model you want to use
modelPath = "sentence-transformers/all-MiniLM-L12-v2"

# Create a dictionary with model configuration options, specifying to use the GPU for computations
model_kwargs = {'device':'cuda'}

# Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to False
encode_kwargs = {'normalize_embeddings': False}

# Initialize an instance of HuggingFaceEmbeddings with the specified parameters
embeddings = HuggingFaceEmbeddings(
    model_name=modelPath,     
    model_kwargs=model_kwargs, 
    encode_kwargs=encode_kwargs
)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/352 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [9]:
text = "What are the most popular Toyota car models?"
query_result = embeddings.embed_query(text)
query_result[:10]

[0.007054546382278204,
 0.019185686483979225,
 -0.016045207157731056,
 -0.10547027736902237,
 -0.03071235492825508,
 0.001832180074416101,
 0.02506093867123127,
 0.06352841854095459,
 -0.02509220689535141,
 0.008866644464433193]

## Create a VectorDB

In [10]:
import pandas as pd
from langchain.docstore.document import Document
from langchain.vectorstores import FAISS
from langchain.embeddings.huggingface import HuggingFaceEmbeddings  # Free alternative

# Specify the path to your CSV file
dataset_path = r"/kaggle/input/faq-toyota/FAQ Toyota(Sheet1)-2.csv"

# Try multiple encodings to load CSV
df = None
for encoding in ['utf-8-sig', 'latin1', 'ISO-8859-1', 'utf-16']:
    try:
        df = pd.read_csv(dataset_path, encoding=encoding)
        print(f"Successfully loaded CSV using encoding: {encoding}")
        break
    except Exception as e:
        print(f"Failed with encoding {encoding}: {e}")

# Ensure the DataFrame is loaded
if df is None:
    raise ValueError("Failed to load CSV file with all attempted encodings.")

# Check if 'output' column exists
if 'answer' not in df.columns:
    raise ValueError("Column 'output' not found in CSV file. Check column names.")

# Convert the DataFrame into a list of Document objects
documents = [Document(page_content=str(row)) for row in df['answer'].dropna().tolist()]

# Use Hugging Face embeddings (No API key required)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Create the FAISS vector store using the list of Document objects
vector_db = FAISS.from_documents(documents, embeddings)
vector_db.save_local("/kaggle/working/faiss_doctor_index")

print("FAISS index saved successfully!")


Failed with encoding utf-8-sig: 'utf-8' codec can't decode byte 0x92 in position 2680: invalid start byte
Successfully loaded CSV using encoding: latin1
FAISS index saved successfully!


In [11]:
def chatbot_response(user_input, similarity_threshold=0.4):  # Threshold for FAISS match
    """
    First, try FAISS Vector Search. If no relevant results, use Groq API.
    
    Parameters:
        user_input (str): The user's question.
        similarity_threshold (float): Minimum similarity score to accept a FAISS match.
        
    Returns:
        str: The best-matched response from FAISS or Groq API.
    """

    # ✅ Step 1: Search FAISS Vector Database with similarity scores
    results = vector_db.similarity_search_with_score(user_input, k=5)  # Retrieve top 5 matches

    # ✅ Debugging: Print FAISS results to check matches
    if not results:
        print("❌ FAISS returned no results.")
    else:
        print(f"📌 FAISS Found {len(results)} Results:")
        for i, (match, score) in enumerate(results):
            adjusted_score = 1 / (1 + score)  # Convert L2 distance to similarity (closer to 1 is better)
            print(f"🔍 {i+1}: (Raw Score: {score:.2f} → Adjusted Similarity: {adjusted_score:.2f}) {match.page_content[:200]}...")

    # ✅ Step 2: Normalize FAISS score (Convert L2 distance to similarity)
    if results:
        best_answer, raw_score = results[0]
        similarity_score = 1 / (1 + raw_score)  # Convert L2 to similarity (0-1 range)

        # ✅ If similarity is high, return FAISS result
        if similarity_score >= similarity_threshold:
            print(f"✅ FAISS Match Found (Score: {similarity_score:.2f}) → Using Vector DB Response.")
            return best_answer.page_content

    # ✅ Step 3: If FAISS fails, use Groq API for everything else
    print("⚠️ No relevant match found in FAISS. Querying Groq API instead...")
    return ask_groq(user_input)


# Example usage
question = "What are the most popular Toyota car models?"
response = chatbot_response(question)
print("Chatbot Response:", response)


📌 FAISS Found 5 Results:
🔍 1: (Raw Score: 0.34 → Adjusted Similarity: 0.74) Some of the most popular Toyota cars include the Toyota Corolla, Camry, Prius, RAV4, Highlander, and Tacoma....
🔍 2: (Raw Score: 0.67 → Adjusted Similarity: 0.60) The Toyota Corolla is the best-selling car in the world, with over 50 million units sold globally....
🔍 3: (Raw Score: 0.71 → Adjusted Similarity: 0.59) Toyota offers several hybrid models, including the Prius, Corolla Hybrid, Camry Hybrid, and RAV4 Hybrid....
🔍 4: (Raw Score: 0.80 → Adjusted Similarity: 0.55) Toyota is known for its reliability, durability, and longevity. Many Toyota models have been recognized for lasting well over 200,000 miles with proper maintenance....
🔍 5: (Raw Score: 0.81 → Adjusted Similarity: 0.55) The Toyota Prius is the most fuel-efficient Toyota car, offering over 50 MPG. The Corolla Hybrid and Camry Hybrid also provide excellent fuel economy....
✅ FAISS Match Found (Score: 0.74) → Using Vector DB Response.
Chatbot Respon

# Load Model

In [12]:
%%time

import os
import torch

# Define the model path
base_model = "/kaggle/input/qwen2/transformers/qwen2-7b-instruct/1"

# Check if the model path exists
if not os.path.exists(base_model):
    raise FileNotFoundError(f"❌ Model path not found: {base_model}")

# Check if CUDA (GPU) is available
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.float16 if torch.cuda.is_available() else torch.float32  # Adjust for CPU

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(base_model)

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    return_dict=True,
    low_cpu_mem_usage=True,
    torch_dtype=dtype,   # Adjust dtype based on GPU/CPU availability
    device_map="auto",   # Automatically assigns model to available device
    trust_remote_code=True,
)

# Print success message
print(f"✅ Qwen2-7B Model loaded successfully on {device}!")


Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

✅ Qwen2-7B Model loaded successfully on cuda!
CPU times: user 15.8 s, sys: 18.3 s, total: 34.1 s
Wall time: 1min 22s


## Create Pipeline

In [13]:
pipe = pipeline(
    "text-generation", 
    model=model, 
    tokenizer=tokenizer,
    max_new_tokens=1024
)

llm = HuggingFacePipeline(pipeline=pipe)

Device set to use cuda:0


# Create chain

In [14]:
retriever = vector_db.as_retriever()
rag_prompt = hub.pull("rlm/rag-prompt")

NameError: name 'hub' is not defined

In [None]:
vector_db.similarity_search('Generate 10 embeddings', k=10)

In [None]:
qa_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
    | StrOutputParser()
)

## Testing

In [None]:
question_1 = "Find the area of a circle given its radius."
result_1 = qa_chain.invoke(question_1)

print(result_1.split("Answer: ")[1])

In [None]:
question_2 = "Explain what the article is about in 1 to 2 sentences."
result_2 = qa_chain.invoke(question_2)
print(result_2.split("Answer: ")[1])

In [None]:
question_3 = "Render a 3D model of a house"
result_3 = qa_chain.invoke(question_3)

print(result_3.split("Answer: ")[1])