In [1]:
!pip install pymysql


Collecting pymysql
  Downloading PyMySQL-1.1.1-py3-none-any.whl.metadata (4.4 kB)
Downloading PyMySQL-1.1.1-py3-none-any.whl (44 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/45.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.0/45.0 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pymysql
Successfully installed pymysql-1.1.1


In [2]:
!pip install chromadb pandas sentence-transformers transformers

Collecting chromadb
  Downloading chromadb-1.0.15-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.4 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.22.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.6 kB)
Collecting opentelemetry-api>=1.2.0 (from chromadb)
  Downloading opentelemetry_api-1.35.0-py3-none-any.whl.metadata (1.5 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.35.0-py3-none-any.whl.metadata (2.4 kB)
Collecting opentelemetry-sdk>=1.2.0 (from chromadb)
  Downloading opentelemetry_sdk-1.35.0-py3-none-any.whl.metadata (1.5 k

In [3]:

import pandas as pd
import chromadb
from sentence_transformers import SentenceTransformer
from chromadb.config import Settings
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

In [4]:
# === Load CSV Data ===
filepath = '/content/pace_graduate_programs (1).csv'  # <- Make sure you upload the file
df = pd.read_csv(filepath)

program_names = df['Program Name'].tolist()
program_links = df['Program Link'].tolist()

# === Embed Program Names ===
model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(program_names)

# === Initialize Chroma Vector Store ===
chroma_client = chromadb.Client(Settings(anonymized_telemetry=False))
collection = chroma_client.create_collection(name="pace_programs")

# Add documents with metadata (links)
collection.add(
    documents=program_names,
    metadatas=[{"link": link} for link in program_links],
    embeddings=embeddings.tolist(),
    ids=[f"id{i}" for i in range(len(program_names))]
)

# === Semantic Search Function ===
def search_programs(user_query, embedding_model, top_k=5):
    query_embed = embedding_model.encode([user_query])[0].tolist()
    results = collection.query(
        query_embeddings=[query_embed],
        n_results=top_k
    )
    programs = results['documents'][0]
    metadata = results['metadatas'][0]
    return list(zip(programs, [meta['link'] for meta in metadata]))

# === Load RAG Model ===
model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
llm_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
rag_pipeline = pipeline("text2text-generation", model=llm_model, tokenizer=tokenizer)

# === RAG Answer Generator ===
def generate_answer_with_flan(query, program_tuples):
    context = "\n".join([f"{name} – {link}" for name, link in program_tuples])
    prompt = f"""You are a helpful assistant that answers questions about graduate programs at Pace University.

Programs:
{context}

Question: {query}

Answer:"""
    result = rag_pipeline(prompt, max_new_tokens=150)[0]['generated_text']
    return result

# === Greeting and Exit Helpers ===
def is_exit(text):
    return text.strip().lower() in ['exit', 'quit', 'bye', 'see you later']

def is_greeting(text):
    return any(greet in text.lower() for greet in ['hi', 'hello', 'hey'])
import json
import uuid

import requests

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Device set to use cpu


In [5]:
def save_application(data):
    import pymysql
    connection = None  # Initialize connection to None
    try:
        # Connect to your RDS MySQL instance
        connection = pymysql.connect(
            host="pacebot-db.cjwyq660ofyh.us-east-2.rds.amazonaws.com",  # Change if needed
            user="admin",  # Your RDS master username
            password="Hm0779701981!",  # Replace with your actual RDS password
            database="pacebot",  # Your database name
            cursorclass=pymysql.cursors.DictCursor
        )

        with connection.cursor() as cursor:
            sql = """
                INSERT INTO applications (student_name, email, education, program_applied)
                VALUES (%s, %s, %s, %s)
            """
            cursor.execute(sql, (
                data['full_name'],
                data['email'],
                data['education'],
                data['program']
            ))
            connection.commit()
            return cursor.lastrowid  # Return the ID of the inserted application

    except Exception as e:
        print("❌ Error saving application:", e)
        return "ERROR"
    finally:
        if connection:
            connection.close()

In [9]:
!pip install redis
import redis, hashlib, json

r = redis.StrictRedis(host='your-redis-endpoint', port=6379, decode_responses=True)

def search_programs(query, model):
    key = hashlib.sha256(query.encode()).hexdigest()
    cached = r.get(key)
    if cached:
        print("🧠 Redis hit")
        return json.loads(cached)

    results = run_semantic_search(query, model)  # Your current function
    r.set(key, json.dumps(results), ex=3600)  # Cache 1 hour
    print("⚡ Redis miss → saved")
    return results


Collecting redis
  Downloading redis-6.2.0-py3-none-any.whl.metadata (10 kB)
Downloading redis-6.2.0-py3-none-any.whl (278 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/278.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/278.7 kB[0m [31m2.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m278.7/278.7 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: redis
Successfully installed redis-6.2.0


In [7]:
!pip install pymysql

# === Application Flow ===
def collect_application():
    print("\n🎓 Let's start your application to Pace University!")
    full_name = input("👉 Full Name: ")
    email = input("📧 Email Address: ")
    education = input("🎓 Previous Education (e.g., BSc Computer Science): ")

    # Search for program selection
    while True:
        program_query = input("🧭 What program are you applying to? (type a keyword): ")
        matches = search_programs(program_query, model)
        if not matches:
            print("❌ No matching programs found. Try again.")
            continue
        print("\n✅ Top Matches:")
        for i, (name, link) in enumerate(matches):
            print(f"  {i+1}. {name} – {link}")
        choice = input("👉 Enter the number of the program to apply to: ")
        try:
            program_choice = matches[int(choice) - 1][0]
            break
        except:
            print("⚠️ Invalid choice. Try again.")

    application = {
        "full_name": full_name,
        "email": email,
        "education": education,
        "program": program_choice
    }

    app_id = save_application(application)
    print(f"\n📨 Your application has been saved with ID: {app_id}")
    print("✅ Thank you for applying to Pace University!\n")

# === Updated Chatbot Interface ===
print("🎓 PaceBot: Hello! I'm PaceBot, your assistant for graduate programs at Pace University.")
print("🎓 Ask me anything like 'Do you offer data science?' or type 'apply' to submit an application. Type 'exit' to leave.\n")

while True:
    user_input = input("You: ")

    if is_exit(user_input):
        print("PaceBot: Chat with you later! 🎓")
        break
    elif is_greeting(user_input):
        print("PaceBot: Hello! How can I help you today?")
    elif user_input.strip().lower() == "apply":
        collect_application()
    else:
        program_matches = search_programs(user_input, model)
        response = generate_answer_with_flan(user_input, program_matches)
        print("PaceBot:", response)


🎓 PaceBot: Hello! I'm PaceBot, your assistant for graduate programs at Pace University.
🎓 Ask me anything like 'Do you offer data science?' or type 'apply' to submit an application. Type 'exit' to leave.

You: exit
PaceBot: Chat with you later! 🎓


In [None]:
import socket

host = "pacebot-db.cjwyq660ofyh.us-east-2.rds.amazonaws.com"
port = 3306

try:
    socket.create_connection((host, port), timeout=10)
    print("✅ Able to reach RDS host")
except Exception as e:
    print("❌ Cannot reach RDS host:", e)


✅ Able to reach RDS host
