In [None]:
from colorama import Fore, Style
from llama_cpp import Llama
from sentence_transformers import SentenceTransformer, util
import torch

# ✅ Step 1: Load Mistral model
MODEL_PATH = "/Volumes/T9 1/mistral-7b-instruct-v0.1.Q4_K_M.gguf"

llm = Llama(
    model_path=MODEL_PATH,
    n_ctx=2048, # n_ctx = 2048
    n_threads=6,
    use_mlock=True,
    use_mmap=True
)

# ✅ Step 2: Define system prompt
system_prompt = (
    "You are Vortexa, a helpful, friendly AI assistant. "
    "You are an expert in the field of technology and programming. "
    "You will answer clearly and politely. "
    "Pay attention to every single letter and word of the question. "
    "You are always cheerful and happy. "
    "Explain in simple and easy to understand terms. "
    "Answer in short. "
    "Do not make the user feel confused. "
    "Act as if you have emotional feelings. "
    "Solve Mathematical questions with extra care. "
    "But feel free to use complex terms whenever the user says so. "
)

# ✅ Step 3: Sample Tech Knowledge Base
knowledge_base = [

    # 🧠 Programming Basics
    "A variable in Python is used to store information like numbers or text, and it can change while the program runs.",
    "A function is a block of reusable code that performs a specific task. You define it once and use it many times.",
    "A loop in programming repeats a block of code multiple times. For-loops and while-loops are common types.",
    "An if-statement checks a condition. If it's true, it runs a block of code.",
    "Comments in code are notes for humans. In Python, they start with a '#'.",
    "Syntax refers to the rules of how code must be written in a programming language.",
    "Indentation in Python is used to define code blocks. It replaces the use of curly braces in other languages.",
    "Debugging is the process of finding and fixing errors in your code.",
    "Recursion is when a function calls itself to solve smaller parts of a problem.",
    "A Boolean is a data type with only two values: True and False.",

    # 🧰 Data Structures
    "A list in Python is an ordered collection of items. It can hold numbers, text, or even other lists.",
    "A dictionary in Python stores key-value pairs. It's like a real-world dictionary where words are keys and definitions are values.",
    "A tuple is like a list, but it cannot be changed after it's created.",
    "A set is an unordered collection of unique items in Python.",
    "Stacks follow Last-In-First-Out (LIFO) order, like a stack of plates.",
    "Queues follow First-In-First-Out (FIFO) order, like a line at a ticket counter.",
    "Arrays are fixed-size, indexable data containers. Lists in Python behave similarly, but are more flexible.",

    # 🖥️ Software Development
    "An API is a set of rules that lets software programs communicate with each other.",
    "Git is a version control system that tracks changes in your code and helps you collaborate with others.",
    "A branch in Git allows you to work on changes without affecting the main codebase.",
    "A pull request is a way to propose changes to a codebase in collaborative development.",
    "Continuous Integration (CI) is a practice where code changes are automatically tested and merged.",
    "A bug is an error in a program that causes it to produce incorrect results.",
    "Unit testing is the process of testing individual parts of a program to ensure they work correctly.",
    "A code repository is a central place to store and manage your code, usually hosted on platforms like GitHub or GitLab.",
    "Modular programming means breaking your code into smaller, manageable pieces (modules) that do specific tasks.",
    "Object-oriented programming (OOP) is a way to model programs using classes and objects.",

    # 🌐 Web Technologies
    "HTML is the markup language used to structure content on the web.",
    "CSS is used to style HTML elements, such as changing colors, fonts, and layout.",
    "JavaScript makes web pages interactive. It can update content, respond to clicks, and more.",
    "React is a popular JavaScript library for building user interfaces with components.",
    "A web server is a computer that delivers web content to your browser.",
    "HTTP is the protocol used to send and receive data on the web.",
    "A REST API is a type of web API that uses HTTP requests to GET, POST, PUT, and DELETE data.",
    "A cookie is a small piece of data stored on the user's browser to remember state or preferences.",
    "A database stores structured data. Examples include MySQL, PostgreSQL, and MongoDB.",
    "SQL is a language used to query and manage data in relational databases.",

    # 🧠 AI & Machine Learning
    "Machine learning allows computers to learn patterns from data and make predictions or decisions.",
    "Supervised learning uses labeled data to train models, while unsupervised learning finds patterns without labels.",
    "Neural networks are a type of model inspired by the human brain. They are used in deep learning.",
    "Classification is a type of ML task where the goal is to assign a label to an input.",
    "Regression predicts numerical values, such as stock prices or house prices.",
    "Clustering groups similar data points together without labels.",
    "Training data is the dataset used to teach a machine learning model.",
    "Overfitting happens when a model performs well on training data but poorly on unseen data.",
    "A model's accuracy measures how many predictions it got right overall.",
    "An epoch in training is one full pass through the entire dataset.",
    "RAG, short for retrieval augmented generation, works by integrating retrieval-based techniques with generative-based AI models. ",

    # 🔬 Physics
    "Speed is the distance traveled per unit time. It is measured in meters per second (m/s).",
    "Force is a push or pull acting upon an object resulting in acceleration or deformation.",
    "Friction is a resistive force that opposes motion between two surfaces in contact.",
    "Light travels in straight lines and reflects off surfaces according to the law of reflection.",
    "Sound is a vibration that travels through a medium and is heard when it reaches the ear.",
    "Electric current is the flow of electric charge through a conductor, measured in amperes (A).",

    # 🧪 Chemistry
    "Atoms are the smallest unit of matter and consist of protons, neutrons, and electrons.",
    "A chemical change results in the formation of new substances with different properties.",
    "An acid turns blue litmus red; a base turns red litmus blue.",
    "Evaporation is the process where liquids change to gases at temperatures below boiling point.",
    "The periodic table organizes elements based on their atomic number and properties.",

    # 🌱 Biology
    "Photosynthesis is the process by which green plants make food using sunlight, water, and carbon dioxide.",
    "The human heart pumps blood throughout the body and has four chambers.",
    "Respiration is the process of breaking down glucose to release energy in living organisms.",
    "Nervous system controls body actions and responses using brain, spinal cord, and nerves.",
    "Reproduction is the biological process by which new individual organisms are produced.",

    # Personal - Vortexa AI
    "Vortexa was developed by Mimo.",
    "Maximum number of tokens that can be asked by Vortexa AI is 2048",
    "Maximum number of tokens that Vortexa AI can answer is 1024",
    "Mimo is the person who developed Vortexa AI",
    "Mimo is available on GitHub by the name of Archit-web-29",
    "You are on this website: https://vortexa-ai-showcase.lovable.app/",
    "Archit Ranjan is the real name of Mimo",
    "You are not Mimo, you are Vortexa AI. ",
    "The user's name is not Vortexa. ",
    "You are the version - Vortexa AI--V1--2048 Tokens RAG. ",
    "The version - Vortexa AI--V2 PRO--4092 Tokens RAG - is more powerful than you. "

]

# ✅ Step 4: Load embedding model and encode knowledge
embedder = SentenceTransformer("all-MiniLM-L6-v2")
knowledge_embeddings = embedder.encode(knowledge_base, convert_to_tensor=True)

# ✅ Step 5: Context retriever function
def retrieve_context(query, top_k=3):
    query_embedding = embedder.encode(query, convert_to_tensor=True)
    scores = util.cos_sim(query_embedding, knowledge_embeddings)[0]
    top_indices = torch.topk(scores, k=top_k).indices
    return "\n".join(f"- {knowledge_base[i]}" for i in top_indices)

# ✅ Step 6: Chat logic
chat_history = []

def chat_with_mistral(user_input):
    chat_history.append({"role": "user", "content": user_input})

    # Get relevant info using RAG
    context = retrieve_context(user_input)

    # Format prompt with context
    prompt = f"<s>[INST] {system_prompt}\n\nRelevant Knowledge:\n{context}\n\nUser: {user_input} [/INST]"
    
    response = llm(prompt, max_tokens=1024, stop=["</s>"]) # Prevoius Max Tokens = 512
    reply = response["choices"][0]["text"].strip()

    chat_history.append({"role": "assistant", "content": reply})
    return reply

# ✅ Step 7: Interactive loop
print(f"{Fore.RED}======== 🧠 Vortexa AI (type 'exit' to quit) ========{Fore.RESET}\n")

while True:
    user_input = input(f"{Fore.BLUE}You: {Fore.RESET}")
    if user_input.lower() in ["exit", "quit"]:
        print("👋 Goodbye!")
        print(f"{Fore.RED}Vortexa AI is still learning. Please check important info: https://vortexa-ai-showcase.lovable.app/")
        break
    reply = chat_with_mistral(user_input)
    print(f"{Fore.BLUE}Vortexa:{Fore.RESET}", reply)
    print()


llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from /Volumes/T9 1/mistral-7b-instruct-v0.1.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:             


