In [None]:
!pip uninstall -y llama-cpp-python llama_cpp_python
!pip cache purge
!pip install -U pip wheel

!pip install --quiet --upgrade langchain-text-splitters langchain-community langgraph
!{sys.executable} -m pip install --force-reinstall --no-cache-dir "numpy==2.1.3"
!pip install openpyxl
!pip install --upgrade faiss-cpu
!pip install -qU langchain-core
!pip install -U \
  langchain-huggingface \
  "sentence-transformers>=3.1" \
  "transformers>=4.44" \
  "huggingface_hub>=0.24"
# CUDA 12.4 wheels:
!pip install --no-cache-dir --force-reinstall \
  --index-url https://abetlen.github.io/llama-cpp-python/whl/cu124 \
  --extra-index-url https://pypi.org/simple \
  "llama-cpp-python==0.3.16"

In [None]:
import sys, llama_cpp
print("py:", sys.version)
print("llama-cpp-python:", llama_cpp.__version__)

# Low-level C-extension handle (works across versions)
try:
    from llama_cpp import llama_cpp as _lib
except Exception:
    _lib = llama_cpp

print("GPU offload support:", getattr(_lib, "llama_supports_gpu_offload", lambda: "unknown")())

bi = getattr(_lib, "llama_build_info", lambda: lambda: "n/a")()
print("Build info:", bi)


In [None]:
# --- Imports (all local) ---
import json
from typing_extensions import List, TypedDict

from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.documents import Document

# Local LLM via Ollama
from langchain_community.chat_models import ChatOllama

# Local embeddings + FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

from langgraph.graph import START, StateGraph
import pandas as pd
from tqdm import tqdm
import time, random, json
from collections import deque
from google.colab import files
from huggingface_hub import snapshot_download
from langchain_community.chat_models import ChatLlamaCpp
import pandas as pd, json, time, random, re
from tqdm import tqdm
from langchain_core.prompts import ChatPromptTemplate


In [None]:
uploaded = files.upload()



In [None]:
# Download both shards directly (resumable)
local_dir = snapshot_download(
    repo_id="Qwen/Qwen2.5-7B-Instruct-GGUF",   # change if your repo is different
    allow_patterns=[
        "qwen2.5-7b-instruct-q4_k_m-00001-of-00002.gguf",
        "qwen2.5-7b-instruct-q4_k_m-00002-of-00002.gguf"
    ],
    local_dir="/content/models/qwen2.5-7b-instruct",
    local_dir_use_symlinks=False # store real files
)

# Sanity check sizes
!ls -lh /content/models/qwen2.5-7b-instruct

In [None]:

llm = ChatLlamaCpp(
    model_path="/content/models/qwen2.5-7b-instruct/qwen2.5-7b-instruct-q4_k_m-00001-of-00002.gguf",
    n_ctx=8192,        # 4096 works; 8192 helps if your prompt is long
    temperature=0.2,   # stable JSON
    n_threads=4,       # CPU threads (tokenization etc.)
    n_gpu_layers=-1,   # offload all layers that fit on the GPU
    n_batch=256,      # <<< massive speedup for prompt evaluation
    verbose=False
)


In [None]:
# 2) Local embeddings (multilingual bge-m3 or English bge-large-en-v1.5)
#    Keep default settings; downloads once and then stays local.
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-m3")

In [None]:
import json
from langchain.schema import Document
# If you're on LangChain >=0.1:
from langchain_community.vectorstores import FAISS
# else (older versions):
# from langchain.vectorstores import FAISS

# 1) Load labels (list[str])
with open("label_names.json", "r", encoding="utf-8") as f:
    labels = json.load(f)

# 2) Make one Document per label (no splitting needed)
docs = [
    Document(page_content=lbl, metadata={"label": lbl, "source": "label_names.json"})
    for lbl in labels
]

# 3) Build FAISS over labels
# 'embeddings' should be your HuggingFaceEmbeddings(model_name="BAAI/bge-m3")
vector_store = FAISS.from_documents(docs, embedding=embeddings)

In [None]:
# 'for i, sec in enumerate(guide.get("sections", []), start=1):
#     q = sec.get("question", "").strip()
#     a = sec.get("answer", "").strip()

#     # Optional: include structured contact info in the body to make it searchable
#     extra = []
#     cm = sec.get("contact_methods")
#     if cm:
#         if "email" in cm: extra.append(f"Email: {cm['email']}")
#         if "phone" in cm: extra.append(f"Phone: {cm['phone']}")
#         live = cm.get("live_chat")
#         if live and "availability" in live:
#             extra.append(f"Live chat availability: {live['availability']}")

#     page_content = "\n".join(
#         part for part in [
#             f"Q: {q}" if q else "",
#             f"A: {a}" if a else "",
#             "\n".join(extra) if extra else ""
#         ] if part
#     )

#     docs.append(
#         Document(
#             page_content=page_content,
#             metadata={
#                 "section_index": i,
#                 "question": q,
#                 "title": title,
#                 "version": version,
#                 "source": "customer_support_guide_v1.json",
#             },
#         )
#     )'

In [None]:
from langchain_core.prompts import ChatPromptTemplate

custom_prompt = ChatPromptTemplate.from_messages([
    (
        "---------",
        "Input:\n{input}\n\nContext:\n{context}"
    )
])


In [None]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict


prompt = custom_prompt


# Define state for application
class State(TypedDict):
    input: str
    context: List[Document]
    answer: str


# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["input"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"input": state["input"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}


# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [None]:
import pandas as pd

# Load the Excel file as a DataFrame
df = pd.read_excel("----")  # add sheet_name="Sheet1" if needed

# Make a list of dicts â€“ one dict per row
rows_as_dicts = df.to_dict(orient="records")


In [None]:
results = []  # will hold the model answers
for row_dict in rows_as_dicts:
    # Build one combined string for the model to embed
    combined_text = (
        f"x	: {row_dict.get('x','')}\n"
        f"y: {row_dict.get('y','')}\n"
        f"z: {row_dict.get('z','')}\n"
    )

    # 4) Send the combined string to your graph
    #    Change "question" to the key your graph expects
    resp = graph.invoke({"input": combined_text})

    # 5) Store the model's answer
    results.append(resp.get("answer", ""))

# 6) Add model answers as a new column and save to Excel
df["answer"] = results
df.to_excel("file.xlsx", index=False)

print("Finished. New file saved as file.xlsx")

Finished. New file saved as MA_open_with_answers.xlsx


In [None]:
# import builtins

# while True:
#     user_input = builtins.input("Enter your question (or type 'exit' to quit): ")
#     if user_input.lower() == "exit":
#         break
#     result_state = graph.invoke({"user_input": user_input})
#     print(result_state["answer"])

In [None]:
files.download("file.xlsx")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>