In [69]:
# # Install required libraries (only run once)
# !pip install streamlit faiss-cpu mistralai beautifulsoup4 requests numpy

In [71]:
# Import necessary libraries
import os
import time
import faiss
import numpy as np
import requests
from bs4 import BeautifulSoup
from mistralai import Mistral, UserMessage

In [73]:
# -------------------------- Step 1: Set API Key --------------------------

# Set the Mistral API Key (Replace with your actual key)
os.environ["MISTRAL_API_KEY"] = "pSnb6dOIGJqlPqhVuNo9nxC02ilfYPls"
api_key = os.getenv("MISTRAL_API_KEY")

# Verify if the API Key is set correctly
if not api_key:
    raise ValueError("API Key is missing! Set your MISTRAL_API_KEY.")

In [81]:
def get_policies():
    """
    Fetches UDST policies from the official website.
    Cleans unnecessary new lines and spaces.
    Returns a list of 10 policies.
    """
    url = "https://www.udst.edu.qa/about-udst/institutional-excellence-ie/policies-and-procedures"
    response = requests.get(url)
    
    if response.status_code != 200:
        raise ValueError("Failed to fetch UDST policies. Check the URL or website status.")

    soup = BeautifulSoup(response.text, "html.parser")
    raw_policies = [tag.text.strip() for tag in soup.find_all("div") if tag.text.strip()]

    # Clean policies: Remove excessive newlines and spaces
    cleaned_policies = [" ".join(policy.split()) for policy in raw_policies]

    return cleaned_policies[:10]  # Limit to 10 policies

# Fetch and check policies
policies = get_policies()
print("Fetched Policies:", policies[:3])  # Display the first 3 policies for verification

Fetched Policies: ["Search Search by Keyword Search Search by Purpose I am Select an OptionUDST EmployeeCareer at UDSTParentCurrent StudentAlumniFuture Student Looking for Select an Option Take me There Quick links Current Student Registration Academic Calendar Student Systems Attendance Final Exams Academic Awards Request Letters and Records Graduation Ceremony Graduation Requirements Ramadan Schedule Professional Education Customized Training Courses Consulting Services Register Today Online Resources Alumni Presidents Message Alumni Services Update Contact Information Request Letters, Records and Transcripts Alumni Benefits UDST Offers Alumni Events Alumni Magazine Alumni Office Graduation Ceremonies Testing Centre Placement Tests IELTS Tests Proctoring Careers Sport and Wellness Our Facilities Sport and Varsity Wellness Programs Junior Wolves Club Book a Facility or Wellness Program Today Search Admissions Why UDST Admissions All Programs How to Apply Application Deadlines Admissio

In [83]:
# -------------------------- Step 3: Chunk the Policy Text --------------------------

def chunk_text(text, chunk_size=256):
    """
    Splits text into smaller chunks to avoid exceeding token limits.
    Returns a list of text chunks.
    """
    return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]

# Process all policies into chunks
chunks = [chunk for policy in policies for chunk in chunk_text(policy)]
print(f"Total Chunks Created: {len(chunks)}")

Total Chunks Created: 57


In [85]:
# -------------------------- Step 4: Generate Embeddings with Rate Limit Handling --------------------------

def get_embeddings(chunks, batch_size=1, delay=3, max_retries=5):
    """
    Generates embeddings for text chunks using Mistral AI.
    Uses batch processing with retry logic to avoid hitting API rate limits.
    """
    client = Mistral(api_key=api_key)
    embeddings = []

    for i in range(0, len(chunks), batch_size):
        batch = chunks[i:i+batch_size]
        retries = 0

        while retries < max_retries:
            try:
                response = client.embeddings.create(model="mistral-embed", inputs=batch)
                embeddings.extend([e.embedding for e in response.data])
                time.sleep(delay)  # Add delay to prevent hitting rate limits
                break  # Exit retry loop if successful
            except Exception as e:
                print(f"API Error: {e}. Retrying in {delay * 2} seconds...")
                time.sleep(delay * 2)
                retries += 1
        
        if retries == max_retries:
            print("Max retries reached. Skipping batch.")

    return embeddings

# Generate embeddings with optimized API requests
text_embeddings = get_embeddings(chunks)
print(f"Generated {len(text_embeddings)} embeddings.")

Generated 57 embeddings.


In [87]:
# -------------------------- Step 5: Store Embeddings in FAISS Vector Database --------------------------

# Define FAISS index
d = len(text_embeddings[0])  # Embedding dimension
index = faiss.IndexFlatL2(d)
index.add(np.array(text_embeddings))

In [89]:
# -------------------------- Step 6: Query Processing & Testing in Jupyter --------------------------

def get_query_embedding(query):
    """
    Generates an embedding for the user's query.
    """
    client = Mistral(api_key=api_key)
    response = client.embeddings.create(model="mistral-embed", inputs=[query])
    return np.array(response.data[0].embedding)

# Test a sample query in Jupyter Notebook
sample_query = "What are the policies for student activities?"
query_embedding = get_query_embedding(sample_query).reshape(1, -1)

# Retrieve similar chunks
D, I = index.search(query_embedding, k=2)  # Retrieve top 2 most relevant chunks
retrieved_chunks = [chunks[i] for i in I[0]]

# Create the final prompt for the chatbot
prompt = f"""
Context:
{' '.join(retrieved_chunks)}
Query: {sample_query}
Answer:
"""

def ask_mistral(prompt):
    """
    Uses Mistral AI to generate a response based on retrieved context.
    """
    client = Mistral(api_key=api_key)
    response = client.chat.complete(model="mistral-large-latest", messages=[UserMessage(content=prompt)])
    return response.choices[0].message.content

# Get chatbot response in Jupyter
answer = ask_mistral(prompt)
print("\nChatbot Response:", answer)


Chatbot Response: Based on the provided context, there is no explicitly listed policy titled "Student Activities Policy." However, student activities can often be governed by several policies that shape the conduct, attendance, and overall academic life of students. Here are some relevant policies from your context that might indirectly relate to student activities:

1. **Student Conduct Policy (Policy-V2 Pl-ST-01 Student Conduct Procedure-V1)**:
   - This policy likely outlines the expected behavior of students, including in extracurricular activities.

2. **Student Attendance Policy (PR-ST-02 Student Attendance Policy-V1 Pl-ST-03 Student Attendance Procedure-V2)**:
   - This policy would cover attendance requirements, which could impact participation in student activities.

3. **Student Appeals Policy (PR-ST-03 Student Appeals Policy-V1)**:
   - This policy provides a framework for students to appeal decisions, which could be relevant if there are disputes related to student activit