<a href="https://colab.research.google.com/github/BlairSonnen/Glassdoor_AIBot/blob/blairdev/Glassdoor_Ai_chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install huggingface_hub

In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) y
Token is valid (permission: fineGrained).
The token `newtoken` has been saved to /root/.cache/huggingface/stored_tokens
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-aut

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Updated RAG Code

# simple_rag_csv_ingestion_chunked.py

import subprocess
import sys

def install_if_missing(package):
    try:
        __import__(package)
    except ImportError:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])

required_packages = [
    "sentence_transformers",
    "chromadb",
    "pandas",
    "tqdm"
]

for pkg in required_packages:
    install_if_missing(pkg)

from sentence_transformers import SentenceTransformer
from chromadb import PersistentClient
import pandas as pd
from pathlib import Path
from tqdm import tqdm
import gc

# Mount Google Drive if in Colab
try:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)
except ImportError:
    pass

CHROMA_PATH = "/content/drive/MyDrive/Glassdoor Chroma Store"
Path(CHROMA_PATH).mkdir(parents=True, exist_ok=True)

embed_model = SentenceTransformer('all-MiniLM-L6-v2')
client = PersistentClient(path=CHROMA_PATH)
collection = client.get_or_create_collection("csv_documents")

def ingest_csv_chunked(filepath, chunk_size=50):
    try:
        df = pd.read_csv(filepath, low_memory=False)
        total_rows = len(df)
        print(f"\n📄 Processing: {filepath.name} ({total_rows} rows)")

        batch_count = 0
        for start in tqdm(range(0, total_rows, chunk_size), desc=f"Chunks in {filepath.name}"):
            chunk = df.iloc[start:start + chunk_size]
            chunk_text = chunk.to_string(index=False)
            embedding = embed_model.encode(chunk_text)

            collection.add(
                documents=[chunk_text],
                embeddings=[embedding.tolist()],
                metadatas=[{"filename": str(filepath), "start_row": int(start)}],
                ids=[f"{filepath.stem}_{start}"]
            )

            batch_count += 1
            gc.collect()

        print(f"✅ Ingested {batch_count} chunks from {filepath.name}")
        return batch_count

    except Exception as e:
        print(f"❌ Error processing {filepath}: {e}")
        return 0

def process_csv_files(folder_path, chunk_size=50):
    folder = Path(folder_path)
    csv_files = list(folder.glob("**/*.csv"))

    total_chunks = 0
    for filepath in csv_files:
        chunks = ingest_csv_chunked(filepath, chunk_size=chunk_size)
        total_chunks += chunks

    client.persist()
    print(f"\n✅ Done: Ingested {total_chunks} chunks total from {len(csv_files)} CSV file(s).")

if __name__ == "__main__":
    process_csv_files("/content/drive/MyDrive/AI Chatbot Data/Resources", chunk_size=50)

In [None]:
# ai_recruiter_with_rag.py (patched for Gradio file compatibility with full fallback support)

import subprocess
import sys
import importlib.util
import os

# Auto-install APT dependencies if in Colab
if os.path.exists("/content"):
    try:
        subprocess.check_call(["apt-get", "install", "-y", "libmagic1"])
    except Exception as e:
        print(f"⚠️ Failed to install apt dependency: {e}")

# Auto-install and upgrade missing packages
required = {
    "torch": "torch",
    "pandas": "pandas",
    "mammoth": "mammoth",
    "docx": "python-docx",
    "fitz": "PyMuPDF",
    "xlrd": "xlrd",
    "sentence_transformers": "sentence-transformers",
    "transformers": "transformers --upgrade",
    "gradio": "gradio",
    "bitsandbytes": "git+https://github.com/TimDettmers/bitsandbytes.git",
    "accelerate": "accelerate --upgrade",
    "chromadb": "chromadb",
    "magic": "python-magic"
}

def install_missing(pkg_map):
    for imp_name, pip_cmd in pkg_map.items():
        if importlib.util.find_spec(imp_name) is None:
            print(f"📦 Installing {pip_cmd}...")
            subprocess.check_call([sys.executable, "-m", "pip", "install"] + pip_cmd.split())

install_missing(required)

import io
import time
import torch
import pandas as pd
import mammoth
import docx
import fitz
import xlrd
import magic
import numpy as np
from io import StringIO
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import gradio as gr
from chromadb import PersistentClient

# Mount Google Drive if in Colab
try:
    from google.colab import drive
    drive.mount('/content/drive')
except ImportError:
    pass

CHROMA_PATH = "/content/drive/MyDrive/Glassdoor Chroma Store"
client = PersistentClient(path=CHROMA_PATH)
collection = client.get_or_create_collection("csv_documents")

model_path = "mistralai/Mistral-7B-Instruct-v0.2"
try:
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type='nf4',
        bnb_4bit_compute_dtype=torch.float16
    )
    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        device_map="auto",
        quantization_config=bnb_config,
        trust_remote_code=True
    )
    print("✅ Loaded model with 4-bit quantization (bnb)")
except Exception as e:
    print("⚠️ Failed to load with bitsandbytes, falling back to fp16")
    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True
    )

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
embed_model = SentenceTransformer("all-MiniLM-L6-v2")

def extract_text_from_file(file):
    if file is None:
        return ""

    try:
        if hasattr(file, "read"):
            bytes_data = file.read()
        elif hasattr(file, "value") and os.path.exists(file.value):
            with open(file.value, "rb") as f:
                bytes_data = f.read()
        elif hasattr(file, "name") and os.path.exists(file.name):
            with open(file.name, "rb") as f:
                bytes_data = f.read()
        else:
            raise ValueError("Unsupported file object or missing file path.")
    except Exception as e:
        print(f"❌ Failed to read uploaded file: {e}")
        return ""

    mime_type = magic.from_buffer(bytes_data, mime=True)
    stream = io.BytesIO(bytes_data)

    try:
        if mime_type == "text/plain":
            return bytes_data.decode("utf-8", errors="ignore")[:3000]
        elif mime_type == "application/pdf":
            with fitz.open(stream=stream, filetype="pdf") as doc:
                return "\n".join(page.get_text() for page in doc)[:3000]
        elif mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
            return "\n".join(p.text for p in docx.Document(stream).paragraphs)[:3000]
        elif mime_type == "application/msword":
            return mammoth.extract_raw_text(stream).value[:3000]
        elif mime_type == "text/csv":
            return pd.read_csv(StringIO(bytes_data.decode("utf-8", errors="ignore"))).to_string()[:3000]
        elif "excel" in mime_type:
            return pd.read_excel(stream).to_string()[:3000]
    except Exception as e:
        print(f"❌ Extraction error: {e}")
        return ""

    return ""

def qa_with_llm(file, user_option, base_prompt, question):
    if file:
        context = extract_text_from_file(file)
    else:
        query_embedding = embed_model.encode(question).tolist()
        results = collection.query(query_embeddings=[query_embedding], n_results=3)
        context = "\n\n".join(results['documents'][0]) if results and results['documents'] else ""

    if not question.strip():
        if not context:
            return "⚠️ Please upload a document or enter a question."
        question = f"Please {user_option.lower()} the following content:\n\n{context}"

    prompt = f"{base_prompt}Question: {question}\n\nContext:\n{context[:1000]}\n\nAnswer:"

    tokenized = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=4096, padding=True)
    inputs = {k: v.to(model.device) for k, v in tokenized.items()}
    print(f"🧠 Prompt token count: {inputs['input_ids'].shape[1]}")

    try:
        start = time.time()
        output = model.generate(**inputs, max_new_tokens=512, temperature=0.7, do_sample=True)
        end = time.time()
        print(f"⏱️ Response time: {end - start:.2f} sec")
    except Exception as e:
        print(f"❌ Model generation failed: {e}")
        return "⚠️ An error occurred while generating the answer. Please try with a different file or question."

    answer = tokenizer.decode(output[0], skip_special_tokens=True)
    final_answer = answer.split("Answer:")[-1].strip()

    return final_answer

app = gr.Interface(
        fn=qa_with_llm,
        inputs=[
            gr.File(label="Upload Document"),
            gr.Radio([
                "Summarize the content",
                "Check for grammar/spelling/formatting",
                "Analyze the data"
            ], label="Task"),
            gr.Textbox(label="Base Prompt", value="You are an unbiased recruiting analyst. Use the data below to answer the question clearly and professionally."),
            gr.Textbox(label="Ask a question", placeholder="What are the top-rated companies for software engineers?")
        ],
        outputs="text",
        title="AI Recruiter Assistant",
        description="Upload a job-related document and/or ask the AI for recommendations or summaries."
    )

app.launch(share=True, debug=True)


In [None]:
app.close()

Closing server running on port: 7860


In [None]:
# Blair's multi-turn attempt #2
import subprocess, sys, importlib.util, os, io, time, torch, pandas as pd, mammoth, docx, fitz, xlrd, magic, numpy as np
from io import StringIO
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import gradio as gr
from chromadb import PersistentClient

# ──────────────────────────────────────
# 1. Environment setup
# ──────────────────────────────────────
if os.path.exists("/content"):
    try:
        subprocess.check_call(["apt-get", "install", "-y", "libmagic1"])
    except Exception as e:
        print(f"⚠️ Failed to install apt dependency: {e}")

required = {
    "torch": "torch",
    "pandas": "pandas",
    "mammoth": "mammoth",
    "docx": "python-docx",
    "fitz": "PyMuPDF",
    "xlrd": "xlrd",
    "sentence_transformers": "sentence-transformers",
    "transformers": "transformers --upgrade",
    "gradio": "gradio",
    "bitsandbytes": "git+https://github.com/TimDettmers/bitsandbytes.git",
    "accelerate": "accelerate --upgrade",
    "chromadb": "chromadb",
    "magic": "python-magic"
}

for imp_name, pip_cmd in required.items():
    if importlib.util.find_spec(imp_name) is None:
        print(f"📦 Installing {pip_cmd}…")
        subprocess.check_call([sys.executable, "-m", "pip", "install", *pip_cmd.split()])

# ──────────────────────────────────────
# 2. Model & embedding setup
# ──────────────────────────────────────
CHROMA_PATH = "/content/drive/MyDrive/Glassdoor Chroma Store" if os.path.exists("/content") else "./glassdoor_chroma_store"
client = PersistentClient(path=CHROMA_PATH)
collection = client.get_or_create_collection("csv_documents")

model_path = "mistralai/Mistral-7B-Instruct-v0.2"
try:
    bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_use_double_quant=True,
                                    bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16)
    model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto", quantization_config=bnb_config,
                                                trust_remote_code=True)
    print("✅ Loaded model with 4‑bit quantization (bnb)")
except Exception as e:
    print("⚠️ bitsandbytes load failed – falling back to fp16 ::", e)
    model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16,
                                                device_map="auto", trust_remote_code=True)

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

embed_model = SentenceTransformer("all-MiniLM-L6-v2")

# ──────────────────────────────────────
# 3. Helper – extract text from uploaded files
# ──────────────────────────────────────

def extract_text_from_file(file):
    if file is None:
        return ""
    try:
        bytes_data = file.read() if hasattr(file, "read") else open(file.name, "rb").read()
    except Exception as e:
        print(f"❌ Failed to read uploaded file: {e}")
        return ""

    mime_type = magic.from_buffer(bytes_data, mime=True)
    stream = io.BytesIO(bytes_data)

    try:
        if mime_type == "text/plain":
            return bytes_data.decode("utf-8", errors="ignore")[:3000]
        if mime_type == "application/pdf":
            return "\n".join(p.get_text() for p in fitz.open(stream=stream, filetype="pdf"))[:3000]
        if mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
            return "\n".join(p.text for p in docx.Document(stream).paragraphs)[:3000]
        if mime_type == "application/msword":
            return mammoth.extract_raw_text(stream).value[:3000]
        if mime_type == "text/csv":
            return pd.read_csv(StringIO(bytes_data.decode("utf-8", errors="ignore"))).to_string()[:3000]
        if "excel" in mime_type:
            return pd.read_excel(stream).to_string()[:3000]
    except Exception as e:
        print(f"❌ Extraction error: {e}")
    return ""

# ──────────────────────────────────────
# 4. Core QA routine – now accepts **conversation** (history) for multi‑turn
# ──────────────────────────────────────

def qa_with_llm(file, task, base_prompt, history, question):
    """Return answer *and* updated history list (Gradio State)"""

    # → Reset logic -------------------------------------------------------------
    if question.strip().lower() in {"clear", "reset", "start over"}:
        return [], []  # empty chatbot + cleared state

    # → Context acquisition -----------------------------------------------------
    if file is not None:
        context = extract_text_from_file(file)
    else:
        # RAG fallback
        query_embedding = embed_model.encode(question).tolist()
        docs = collection.query(query_embeddings=[query_embedding], n_results=3)
        context = "\n\n".join(docs["documents"][0]) if docs and docs["documents"] else ""

    # → Build conversation string (limit last 5 turns to avoid context bloat) ---
    convo_snippets = "".join(
        f"User: {u}\nAssistant: {a}\n" for u, a in history[-5:]
    )

    # → Compose final prompt ----------------------------------------------------
    prompt = (
        f"{base_prompt}\n\n"
        f"Conversation so far:\n{convo_snippets}\n"
        f"User question: {question}\n\n"
        f"Context (from Glassdoor / file):\n{context[:1000]}\n\n"
        f"Assistant answer:"
    )

    tokens = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=4096, padding=True)
    inputs = {k: v.to(model.device) for k, v in tokens.items()}

    try:
        output_ids = model.generate(**inputs, max_new_tokens=512, temperature=0.7, do_sample=True)
    except Exception as e:
        print("❌ Generation error:", e)
        answer = "⚠️ The model hit an error. Please try again."
    else:
        full = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        answer = full.split("Assistant answer:")[-1].strip()

    # → Update chat history -----------------------------------------------------
    history.append((question, answer))
    #return f"{history},\n\n{convo_snippets}", history  # first is for Chatbot, second for State persistence
    return "TEST","HISTORY"
# ──────────────────────────────────────
# 5. Gradio **Interface** UI
# ──────────────────────────────────────

file_in   = gr.File(label="Upload Document")
radio_in  = gr.Radio([
    "Summarize the content",
    "Check for grammar/spelling/formatting",
    "Analyze the data"], label="Task")
base_in   = gr.Textbox(label="Base Prompt", lines=2, value="You are an unbiased recruiting analyst. Use the data below to answer clearly and professionally.")
chat_out  = gr.Chatbot(label="Conversation")
question_in = gr.Textbox(label="Ask a question", placeholder="e.g. What are the top‑rated companies for software engineers?", lines=1)
state_mem = gr.State([])  # holds list[tuple[str, str]]

app = gr.Interface(
    fn=qa_with_llm,
    inputs=[file_in, radio_in, base_in, state_mem, question_in],
    outputs=[chat_out, state_mem],
    title="AI Recruiter Assistant (Glassdoor RAG)",
    description="Ask any series of questions about employers or roles. Type **clear** to reset the chat."
)

if __name__ == "__main__":
    app.launch(share=True, debug=True)

In [None]:
# Blair's multi-turn attempt #1

# ai_recruiter_with_rag.py (patched for Gradio file compatibility with full fallback support)

import subprocess
import sys
import importlib.util
import os

# Auto-install APT dependencies if in Colab
if os.path.exists("/content"):
    try:
        subprocess.check_call(["apt-get", "install", "-y", "libmagic1"])
    except Exception as e:
        print(f"⚠️ Failed to install apt dependency: {e}")

# Auto-install and upgrade missing packages
required = {
    "torch": "torch",
    "pandas": "pandas",
    "mammoth": "mammoth",
    "docx": "python-docx",
    "fitz": "PyMuPDF",
    "xlrd": "xlrd",
    "sentence_transformers": "sentence-transformers",
    "transformers": "transformers --upgrade",
    "gradio": "gradio",
    "bitsandbytes": "git+https://github.com/TimDettmers/bitsandbytes.git",
    "accelerate": "accelerate --upgrade",
    "chromadb": "chromadb",
    "magic": "python-magic"
}

def install_missing(pkg_map):
    for imp_name, pip_cmd in pkg_map.items():
        if importlib.util.find_spec(imp_name) is None:
            print(f"📦 Installing {pip_cmd}...")
            subprocess.check_call([sys.executable, "-m", "pip", "install"] + pip_cmd.split())

install_missing(required)

import io
import time
import torch
import pandas as pd
import mammoth
import docx
import fitz
import xlrd
import magic
import numpy as np
from io import StringIO
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import gradio as gr
from chromadb import PersistentClient

# Mount Google Drive if in Colab
try:
    from google.colab import drive
    drive.mount('/content/drive')
except ImportError:
    pass

CHROMA_PATH = "/content/drive/MyDrive/Glassdoor Chroma Store"
client = PersistentClient(path=CHROMA_PATH)
collection = client.get_or_create_collection("csv_documents")

model_path = "mistralai/Mistral-7B-Instruct-v0.2"
try:
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type='nf4',
        bnb_4bit_compute_dtype=torch.float16
    )
    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        device_map="auto",
        quantization_config=bnb_config,
        trust_remote_code=True
    )
    print("✅ Loaded model with 4-bit quantization (bnb)")
except Exception as e:
    print("⚠️ Failed to load with bitsandbytes, falling back to fp16")
    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True
    )

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
embed_model = SentenceTransformer("all-MiniLM-L6-v2")

def extract_text_from_file(file):
    if file is None:
        return ""

    try:
        if hasattr(file, "read"):
            bytes_data = file.read()
        elif hasattr(file, "value") and os.path.exists(file.value):
            with open(file.value, "rb") as f:
                bytes_data = f.read()
        elif hasattr(file, "name") and os.path.exists(file.name):
            with open(file.name, "rb") as f:
                bytes_data = f.read()
        else:
            raise ValueError("Unsupported file object or missing file path.")
    except Exception as e:
        print(f"❌ Failed to read uploaded file: {e}")
        return ""

    mime_type = magic.from_buffer(bytes_data, mime=True)
    stream = io.BytesIO(bytes_data)

    try:
        if mime_type == "text/plain":
            return bytes_data.decode("utf-8", errors="ignore")[:3000]
        elif mime_type == "application/pdf":
            with fitz.open(stream=stream, filetype="pdf") as doc:
                return "\n".join(page.get_text() for page in doc)[:3000]
        elif mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
            return "\n".join(p.text for p in docx.Document(stream).paragraphs)[:3000]
        elif mime_type == "application/msword":
            return mammoth.extract_raw_text(stream).value[:3000]
        elif mime_type == "text/csv":
            return pd.read_csv(StringIO(bytes_data.decode("utf-8", errors="ignore"))).to_string()[:3000]
        elif "excel" in mime_type:
            return pd.read_excel(stream).to_string()[:3000]
    except Exception as e:
        print(f"❌ Extraction error: {e}")
        return ""

    return ""

def qa_with_llm(file, user_option, base_prompt, question):
    if file:
        context = extract_text_from_file(file)
    else:
        query_embedding = embed_model.encode(question).tolist()
        results = collection.query(query_embeddings=[query_embedding], n_results=3)
        context = "\n\n".join(results['documents'][0]) if results and results['documents'] else ""

    if not question.strip():
        if not context:
            return "⚠️ Please upload a document or enter a question."
        question = f"Please {user_option.lower()} the following content:\n\n{context}"

    prompt = f"{base_prompt}Question: {question}\n\nContext:\n{context[:1000]}\n\nAnswer:"

    tokenized = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=4096, padding=True)
    inputs = {k: v.to(model.device) for k, v in tokenized.items()}
    print(f"🧠 Prompt token count: {inputs['input_ids'].shape[1]}")

    try:
        start = time.time()
        output = model.generate(**inputs, max_new_tokens=512, temperature=0.7, do_sample=True)
        end = time.time()
        print(f"⏱️ Response time: {end - start:.2f} sec")
    except Exception as e:
        print(f"❌ Model generation failed: {e}")
        return "⚠️ An error occurred while generating the answer. Please try with a different file or question."

    answer = tokenizer.decode(output[0], skip_special_tokens=True)
    final_answer = answer.split("Answer:")[-1].strip()

    return final_answer

app = gr.Interface(
        fn=qa_with_llm,
        inputs=[
            gr.File(label="Upload Document"),
            gr.Radio([
                "Summarize the content",
                "Check for grammar/spelling/formatting",
                "Analyze the data"
            ], label="Task"),
            gr.Textbox(label="Base Prompt", value="You are an unbiased recruiting analyst. Use the data below to answer the question clearly and professionally."),
            gr.Textbox(label="Ask a question", placeholder="What are the top-rated companies for software engineers?")
        ],
        outputs="text",
        title="AI Recruiter Assistant",
        description="Upload a job-related document and/or ask the AI for recommendations or summaries."
    )

app.launch(share=True, debug=True)



📦 Installing mammoth...
📦 Installing python-docx...
📦 Installing PyMuPDF...
📦 Installing gradio...
📦 Installing git+https://github.com/TimDettmers/bitsandbytes.git...
📦 Installing chromadb...
📦 Installing python-magic...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

ERROR:bitsandbytes.cextension:bitsandbytes library load error: Configured CUDA binary not found at /usr/local/lib/python3.11/dist-packages/bitsandbytes/libbitsandbytes_cuda124.so
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/bitsandbytes/cextension.py", line 287, in <module>
    lib = get_native_library()
          ^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/bitsandbytes/cextension.py", line 267, in get_native_library
    raise RuntimeError(f"Configured CUDA binary not found at {cuda_binary_path}")
RuntimeError: Configured CUDA binary not found at /usr/local/lib/python3.11/dist-packages/bitsandbytes/libbitsandbytes_cuda124.so


model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

⚠️ Failed to load with bitsandbytes, falling back to fp16


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://04cf6900c0de83baf2.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


🧠 Prompt token count: 317
⏱️ Response time: 13.79 sec


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


🧠 Prompt token count: 317
⏱️ Response time: 11.47 sec


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


🧠 Prompt token count: 153
⏱️ Response time: 12.96 sec


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


🧠 Prompt token count: 140
⏱️ Response time: 20.54 sec
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://04cf6900c0de83baf2.gradio.live


