In [None]:
!pip install pyngrok neo4j





Collecting pyngrok
  Downloading pyngrok-7.2.11-py3-none-any.whl.metadata (9.4 kB)
Collecting neo4j
  Downloading neo4j-5.28.1-py3-none-any.whl.metadata (5.9 kB)
Downloading pyngrok-7.2.11-py3-none-any.whl (25 kB)
Downloading neo4j-5.28.1-py3-none-any.whl (312 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m312.3/312.3 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyngrok, neo4j
Successfully installed neo4j-5.28.1 pyngrok-7.2.11


In [None]:
!ngrok authtoken 2yT5rvCV1V4H1uXJwu5jezCmazk_6CzHr3ZMHn9dS6TvRYEeJ

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
import torch
from neo4j import GraphDatabase
from transformers import AutoModelForCausalLM, AutoTokenizer
from flask import Flask, request, jsonify
from pyngrok import ngrok


class CppChatbot:
    """Chatbot that answers only C++ programming questions."""

    # ------------------------------------------------------------------
    # System‑level guard‑rail
    # ------------------------------------------------------------------
    SYSTEM_PROMPT = (
        "<<SYS>>\n"
        "You are an expert assistant who answers *only* questions about *C++ programming*. "
        "If the user's question is not related to C++, reply exactly with:\n"
        "\"Sorry, I can only answer questions about C++ programming.\"\n"
        "<</SYS>>\n"
    )

    # Fixed refusal text (must match exactly)
    REFUSAL = "Sorry, I can only answer questions about C++ programming."

    def __init__(self, uri, user, password, hf_token):
        """Initialize chatbot with Neo4j connection and CodeLlama model."""
        self.hf_token = hf_token
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

        # Detect GPU
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Using device: {self.device}")

        # Load tokenizer and model from Hugging Face
        model_name = "meta-llama/CodeLlama-7b-Instruct-hf"  # Must have access to this model on HF
        print("Loading tokenizer…")
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, token=self.hf_token)

        print("Loading model…")
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
            device_map="auto",
            token=self.hf_token,
        )

    # ------------------------------------------------------------------
    # Preprocess query to add "in c++" if not already specified
    # ------------------------------------------------------------------
    @staticmethod
    def preprocess_query(query: str) -> str:
        """Add 'in c++' to the query if not already present."""
        q_lower = query.lower()

        # If query doesn't already mention C++, append "in c++"
        if "c++" not in q_lower and "cpp" not in q_lower:
            return query + " in c++"
        return query

    # ------------------------------------------------------------------
    # Non-C++ topic detector - for immediate rejection
    # ------------------------------------------------------------------
    @staticmethod
    def is_non_cpp_topic(query: str) -> bool:
        """Detect queries that are definitely not about programming."""
        non_programming_topics = [
            "physics", "chemistry", "biology", "history", "geography",
            "music", "art", "literature", "philosophy", "newton", "einstein",
            "gravity", "planet", "animal", "plant", "cell", "atom", "molecule",
            "war", "religion", "politics", "sports", "medicine", "disease",
            "math", "calculus", "algebra", "geometry", "weather", "climate"
        ]
        q_lower = query.lower()
        return any(topic in q_lower for topic in non_programming_topics)

    # ------------------------------------------------------------------
    # Cheap keyword filter – avoids false positives like "Newton's law"
    # ------------------------------------------------------------------
    @staticmethod
    def looks_like_cpp(query: str) -> bool:
        """Heuristic check: does the question look like it targets C++?"""
        cpp_keywords = (
            # language identifiers / syntax
            "c++", "cpp", "cplusplus", "#include", "std::", "cout <<", "cin >>",
            "template<", "int main(", "using namespace std", "::std", "decltype", "constexpr",
            # OOP / core‑concept vocabulary commonly asked in interviews
            "polymorphism", "inheritance", "encapsulation", "abstraction", "virtual", "override",
            "object oriented", "oop", "class", "struct", "operator overloading", "friend function",
            # STL / modern‑C++ hints
            "std::vector", "std::string", "std::map", "std::unique_ptr", "smart pointer",
            # build / compile terminology
            "g++", "clang++", "makefile", "cmake",
        )
        q_lower = query.lower()
        return any(k in q_lower for k in cpp_keywords)

    # ------------------------------------------------------------------
    # Neo4j helpers - UPDATED FOR YOUR ACTUAL DATA STRUCTURE
    # ------------------------------------------------------------------
    def close(self):
        self.driver.close()

    def fetch_knowledge(self):
        """Fetch knowledge from Neo4j - Updated for your actual database structure."""
        query = """
            MATCH (c:Chunk)
            WHERE c.text IS NOT NULL AND c.text <> ''
            RETURN c.text AS content
        """

        with self.driver.session() as session:
            try:
                result = session.run(query)
                records = list(result)

                if records:
                    print(f"✅ Found {len(records)} chunks from your knowledge base")
                    return [
                        {"title": "C++ Knowledge", "content": record["content"]}
                        for record in records
                        if record["content"] and len(record["content"].strip()) > 10
                    ]
                else:
                    print("❌ No chunks found with text content")
                    return []

            except Exception as e:
                print(f"❌ Error fetching knowledge: {e}")
                return []

    def is_relevant(self, user_query, knowledge_base):
        """Check if user query is relevant to the knowledge base."""
        if not knowledge_base:
            return False

        # Sample some knowledge text for relevance check
        knowledge_text = " ".join(
            item["content"].lower()
            for item in knowledge_base[:10]  # Check first 10 chunks
        )
        knowledge_words = set(knowledge_text.split())
        user_words = set(user_query.lower().split())

        # Check for overlap
        overlap = user_words & knowledge_words
        return len(overlap) > 0

    def search_relevant_chunks(self, user_query, limit=10):
        """Search for chunks most relevant to the user query."""
        # Simple keyword-based search
        query_words = user_query.lower().split()

        # Create a query that searches for chunks containing query keywords
        search_query = """
            MATCH (c:Chunk)
            WHERE c.text IS NOT NULL AND c.text <> ''
            AND (
                ANY(word IN $query_words WHERE toLower(c.text) CONTAINS word)
            )
            RETURN c.text AS content
        """

        with self.driver.session() as session:
            try:
                result = session.run(search_query, query_words=query_words, limit=limit)
                records = list(result)

                if records:
                    print(f"✅ Found {len(records)} relevant chunks")
                    return [
                        {"title": "Relevant C++ Knowledge", "content": record["content"]}
                        for record in records
                    ]
                else:
                    print("❌ No relevant chunks found, falling back to general knowledge")
                    return self.fetch_knowledge()  # Fallback to general chunks

            except Exception as e:
                print(f"❌ Error searching chunks: {e}")
                return self.fetch_knowledge()  # Fallback

    # ------------------------------------------------------------------
    # Llama generation
    # ------------------------------------------------------------------
    def generate_response(self, user_query: str, knowledge_base):
        if not knowledge_base:
            return "Sorry, I don't have enough data to answer your question."

        # ---------- assemble a compact context ----------
        limited_knowledge = knowledge_base[:5]
        knowledge_texts = "\n".join(
            f"Knowledge {i+1}: {item['content'][:500]}..."
            for i, item in enumerate(limited_knowledge)
        )

        prompt = (
            f"<s>[INST]\n{self.SYSTEM_PROMPT}"
            f"Question: {user_query}\n"
            f"Context:\n{knowledge_texts}\n[/INST] Answer:"
        )

        # ---------- encode ----------
        inputs = self.tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            max_length=2048     # prompt *only*
        ).to(self.device)

        # ---------- generate ----------
        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=786,          # ← output budget
                temperature=0.7,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id,
                eos_token_id=self.tokenizer.eos_token_id
            )

        # ---------- decode ONLY the newly generated tokens ----------
        gen_tokens = outputs[0][inputs["input_ids"].shape[-1]:]   # drop the prompt part
        answer = self.tokenizer.decode(gen_tokens, skip_special_tokens=True).strip()

        return answer or self.REFUSAL


    # ------------------------------------------------------------------
    # Public API - UPDATED WITH BETTER SEARCH
    # ------------------------------------------------------------------
    def get_answer(self, user_query):
        # 0) First, immediately reject obvious non-programming topics
        if self.is_non_cpp_topic(user_query):
            return self.REFUSAL

        # 1) Preprocess query to add "in c++" if not already present
        processed_query = self.preprocess_query(user_query)

        # 2) Cheap keyword filter using the processed query
        if not self.looks_like_cpp(processed_query):
            return self.REFUSAL

        # 3) Search for relevant chunks based on the query
        knowledge_base = self.search_relevant_chunks(processed_query)

        if not knowledge_base:
            return self.REFUSAL

        # 4) Check relevance
        if not self.is_relevant(processed_query, knowledge_base):
            return self.REFUSAL

        # 5) Generate answer via model
        full = self.generate_response(processed_query, knowledge_base)
        marker = "[/INST] Answer:"
        answer = full.split(marker, 1)[-1].strip() if marker in full else full

        # 6) Extra safety check: if the model ignored the system prompt and still
        # answered a non-C++ question, force the refusal message
        if self.REFUSAL in answer:
            return self.REFUSAL
        else:
            return answer or self.REFUSAL


#########################################
#  FLASK + NGROK SETUP
#########################################

# Replace placeholders with your actual credentials/tokens
HF_TOKEN = "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"
URI = "neo4j+s://ac34d9e1.databases.neo4j.io"
USER = "neo4j"
PASSWORD = "tyd_z6syD5GXTsmZQKQVbHghxEEeHlxF0FPXpe96SK4"

# Create chatbot instance
chatbot = CppChatbot(URI, USER, PASSWORD, HF_TOKEN)

# Flask app
app = Flask(__name__)

@app.route("/")
def home():
    return "Chatbot is live! Use POST /chat with JSON to query."

@app.route("/chat", methods=["POST"])
def chat():
    data = request.get_json(force=True)
    user_query = data.get("user_query", "")
    answer = chatbot.get_answer(user_query)
    return jsonify({"response": answer})

# Expose via ngrok
public_url = ngrok.connect(5000)
print("Public URL:", public_url.public_url)

if __name__ == "__main__":
    app.run(port=5000)

Using device: cuda
Loading tokenizer…


tokenizer_config.json:   0%|          | 0.00/1.59k [00:00<?, ?B/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

Loading model…


config.json:   0%|          | 0.00/646 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Public URL: https://74ee-34-169-215-235.ngrok-free.app
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


✅ Found 291 relevant chunks


INFO:werkzeug:127.0.0.1 - - [15/Jun/2025 20:09:43] "POST /chat HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [15/Jun/2025 20:10:11] "POST /chat HTTP/1.1" 200 -


✅ Found 291 relevant chunks


INFO:werkzeug:127.0.0.1 - - [15/Jun/2025 20:20:02] "POST /chat HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [15/Jun/2025 20:20:26] "POST /chat HTTP/1.1" 200 -
