In [None]:
!pip install gradio
!pip install huggingface_hub
!huggingface-cli login --token "your-hf-token"
!pip install transformers gradio accelerate --quiet
!pip install transformers sentence-transformers faiss-cpu datasets
!pip install requests beautifulsoup4 nltk sentence-transformers faiss-cpu transformers torch gradio pyspellchecker
!pip install streamlit sentence-transformers scikit-learn faiss-cpu transformers nltk tenacity pandas openpyxl torch beautifulsoup4 requests
!pip install -q sentence-transformers scikit-learn faiss-cpu gradio transformers nltk tenacity openpyxl rank_bm25
!pip install pandas matplotlib openpyxl
!pip install -q sentence-transformers scikit-learn faiss-cpu gradio transformers nltk tenacity openpyxl

In [None]:

# ==============================================
# SynecxAI Final RAG Chatbot with Full Metrics
# ==============================================
import os, time, urllib.parse, pandas as pd, requests, torch, gradio as gr
from bs4 import BeautifulSoup
from nltk.tokenize import sent_tokenize
import nltk
from sentence_transformers import SentenceTransformer, util
import faiss
from transformers import pipeline
from sklearn.metrics.pairwise import cosine_similarity
from tenacity import retry, stop_after_attempt, wait_fixed
from rank_bm25 import BM25Okapi
from collections import defaultdict
from openpyxl import load_workbook

# ========== Setup ==========
os.environ["TOKENIZERS_PARALLELISM"] = "false"
nltk.download("punkt", quiet=True)

# ========== NLP ==========
intent_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
INTENT_LABELS = ["product_info", "pricing", "support", "career", "contact", "case_study", "blog", "general"]

def detect_intent(text): return intent_classifier(text, candidate_labels=INTENT_LABELS)["labels"][0]
try:
    import spacy
    nlp = spacy.load("en_core_web_sm")
except:
    nlp = None
def extract_entities(text): return [(e.text, e.label_) for e in nlp(text).ents] if nlp else []

# ========== Web Scraping ==========
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
def get_website_text(url):
    try:
        r = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=30)
        r.raise_for_status()
        soup = BeautifulSoup(r.text, "html.parser")
        return [s.strip() for s in sent_tokenize(soup.get_text(" ")) if len(s.strip()) > 30]
    except: return []

URLS = [
    "https://synecxailabs.io", "https://synecxailabs.io/about-us", "https://synecxailabs.io/blog",
    "https://synecxailabs.io/CaseStudy", "https://synecxailabs.io/career", "https://synecxailabs.io/contact",
    "https://synecxailabs.io/retail", "https://docsynecx.com/", "https://docsynecx.com/platforms",
    "https://docsynecx.com/#solutions", "https://docsynecx.com/blog", "https://docsynecx.com/pricing",
    "https://docsynecx.com/contact"
]

documents = []
for url in URLS:
    documents.extend(get_website_text(url))
rag_enabled = False

# ========== Indexing ==========
if documents:
    embedder = SentenceTransformer("all-MiniLM-L6-v2")
    doc_emb = embedder.encode(documents, show_progress_bar=True)
    index = faiss.IndexFlatL2(doc_emb.shape[1]); index.add(doc_emb)
    bm25 = BM25Okapi([d.split() for d in documents])
    rag_enabled = True

# ========== Retrieval ==========
def retrieve_context(query, top_k=3):
    if not rag_enabled: return "No relevant information."
    q_emb = embedder.encode([query])
    _, dense_idx = index.search(q_emb, top_k * 2)
    sparse_scores = bm25.get_scores(query.split())
    sparse_idx = sorted(range(len(sparse_scores)), key=lambda i: sparse_scores[i], reverse=True)[:top_k*2]
    candidates = list(set(dense_idx[0]).union(sparse_idx))
    cand_emb = embedder.encode([documents[i] for i in candidates])
    sims = cosine_similarity(q_emb, cand_emb)[0]
    top = sorted(zip(candidates, sims), key=lambda x: x[1], reverse=True)[:top_k]
    return "\n".join([documents[i] for i, _ in top])[:2000]

# ========== LLaMA ==========
llama_loaded = False
try:
    llama = pipeline("text-generation", model="meta-llama/Llama-3.2-3B-Instruct", torch_dtype=torch.float16, device_map="auto")
    llama_loaded = True
except Exception as e:
    print("Model load failed:", e)

def rag_response(query):
    if not llama_loaded: return "Model error.", ""
    context = retrieve_context(query)
    prompt = (
        "You are the SynecxAI virtual assistant.\n"
        "Answer concisely using ONLY the context.\n\n"
        f"Context:\n{context}\n\n"
        f"Question: {query}\nAnswer:"
    )
    output = llama(prompt, max_new_tokens=128, temperature=0.7, return_full_text=False)
    return output[0]["generated_text"].strip(), context

# ========== Metrics ==========
interaction_logs = []
user_sessions = defaultdict(list)
previous_questions = {}
EXCEL_PATH = "interaction_metrics.xlsx"

def semantic_similarity(a, b): return util.cos_sim(embedder.encode(a), embedder.encode(b)).item()

def calculate_metrics():
    df = pd.DataFrame(interaction_logs)
    metrics = {}

    metrics["First Response Time"] = df["response_time"].iloc[0] if not df.empty else None
    metrics["Average Response Time"] = df["response_time"].mean()
    metrics["Task Completion Rate (%)"] = df["task_completed"].mean() * 100
    metrics["Fallback Rate (%)"] = df["fallback"].mean() * 100
    metrics["Intent Accuracy (%)"] = df["intent_correct"].dropna().mean() * 100 if "intent_correct" in df else None
    metrics["Entity Accuracy (%)"] = df["entity_correct"].dropna().mean() * 100 if "entity_correct" in df else None

    session_counts = df["user_id"].value_counts()
    metrics["User Retention Rate (%)"] = (session_counts > 1).mean() * 100
    metrics["Bounce Rate (%)"] = (session_counts == 1).mean() * 100
    metrics["Deflection Rate (%)"] = df["task_completed"].mean() * 100
    metrics["Semantic Similarity Avg"] = df["semantic_similarity"].mean()

    return metrics

def log_interaction(user_input, bot_response, context, start, end, fallback=False, user_id="guest"):
    user_sessions[user_id].append((user_input, bot_response))
    sim = semantic_similarity(user_input, bot_response)

    consistency = None
    if user_input in previous_questions:
        consistency = semantic_similarity(bot_response, previous_questions[user_input])
    previous_questions[user_input] = bot_response

    log_row = {
        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
        "user_id": user_id,
        "user_input": user_input,
        "bot_response": bot_response,
        "context": context,
        "response_time": round(end - start, 2),
        "fallback": fallback,
        "task_completed": not fallback,
        "semantic_similarity": round(sim, 4),
        "consistency_score": round(consistency, 4) if consistency else None,
        "intent_prediction": detect_intent(user_input),
        "entities_extracted": extract_entities(user_input),
        "intent_correct": None,
        "entity_correct": None,
    }

    interaction_logs.append(log_row)
    metrics = calculate_metrics()

    try:
        if os.path.exists(EXCEL_PATH):
            with pd.ExcelWriter(EXCEL_PATH, engine="openpyxl", mode="a", if_sheet_exists="overlay") as writer:
                pd.DataFrame([log_row]).to_excel(writer, sheet_name="Logs", index=False, header=not writer.sheets.get("Logs"))
                pd.DataFrame([metrics]).to_excel(writer, sheet_name="Metrics", index=False)
        else:
            with pd.ExcelWriter(EXCEL_PATH, engine="openpyxl") as writer:
                pd.DataFrame([log_row]).to_excel(writer, sheet_name="Logs", index=False)
                pd.DataFrame([metrics]).to_excel(writer, sheet_name="Metrics", index=False)
    except Exception as e:
        print(f"❌ Excel save error: {e}")

# ========== Gradio ==========
def chat_fn(user_msg, chat_history):
    start = time.time()
    try:
        answer, context = rag_response(user_msg)
        end = time.time()
        chat_history.append((user_msg, answer))
        log_interaction(user_msg, answer, context, start, end)
    except Exception as e:
        end = time.time()
        error_msg = f"❌ Error: {e}"
        chat_history.append((user_msg, error_msg))
        log_interaction(user_msg, error_msg, "", start, end, fallback=True)
    return chat_history, ""

with gr.Blocks() as demo:
    gr.Markdown("## 🤖 SynecxAI Assistant (Metrics-Enabled)")
    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Ask me anything", placeholder="e.g., What services do you offer?", lines=1)
    clear = gr.Button("🧹 Clear")

    msg.submit(chat_fn, [msg, chatbot], [chatbot, msg])
    clear.click(lambda: ([], ""), None, [chatbot, msg])

print("🚀 Launching …")
demo.launch(share=True)

