In [None]:
import json
import openai
import PyPDF2
import tkinter as tk
from tkinter import filedialog
import re
import mysql.connector
from mysql.connector import Error
import uuid  # for generating session IDs
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from transformers import pipeline

# Ensure the VADER lexicon is downloaded
nltk.download('vader_lexicon')

# Initialize Hugging Face emotion classifier
emotion_classifier = pipeline(
    "text-classification", 
    model="j-hartmann/emotion-english-distilroberta-base", 
    top_k=None
)

# 1. Load JSON into a dictionary
with open("course_index_cache.json", "r", encoding="utf-8") as f:
    courses_dict = json.load(f)

# 2. Set your OpenAI API key
openai.api_key = "Your-Open-Api-Key"  # Replace with your actual API key

# Define your fine-tuned model ID
fine_tuned_model_id = "Your-Fine-Tuned-Model-ID"

# Define the academic advisor meta prompt.
META_PROMPT = """
Act as an academic advisor for university-level students seeking guidance on academic and career planning.

You will help students by analyzing their academic interests, performance, and career goals, and then provide actionable advice on course selection, research opportunities, internships, and career trajectories. Focus on clarity, actionable steps, and evidence-based recommendations. Use reasoning to outline your thought process and then provide your conclusions at the end.

If a new student is asking what they should take, tell them they should start with their core courses.

If an international student wants to be considered full time they should take at least 9 hours and at least 6 in-person credit hours and point them to this website: https://catalogue.uncw.edu/content.php?catoid=74&navoid=10022&hl=%22full+time%22&returnto=search#Full-Time_Status 

If they ask "What do I need to register for full time if I am not taking a course but still need to finish my capstone?" point them to this website: https://uncw.edu/myuncw/academics/graduate-school/forms and recommend them to register for continuous enrollment

If they ask "How many capstone hours should I register for?" tell them "3 hours of the capstone is the typical amount to take per semester. The idea is 3 hours during one semester to work on the proposal and 3 hours the following semester to complete/defend the capstone."

If students meets even if only one prerequiste for a course which requires multiple prerequiste, tell them they are eligible. 
# Steps

1. Evaluate the student's current academic status and interests.
2. Identify relevant academic and career goals.
3. Provide structured advice on course selection, research opportunities, and career options.
4. Summarize the reasoning process before presenting final recommendations.

# Output Format

The output should be a well-organized response in markdown. Include clear sections with headings, bullet points for actionable steps, and a summary of conclusions at the end. If structured data is required, use JSON format without wrapping it in code blocks.
""".strip()

def remove_markdown_formatting(text: str) -> str:
    """
    Removes Markdown formatting from the text, including headers, bullet list markers,
    and bold markers (e.g., **text**).
    """
    text = re.sub(r'^#{1,6}\s+', '', text, flags=re.MULTILINE)
    text = re.sub(r'^\s*[-*+]\s+', '', text, flags=re.MULTILINE)
    text = re.sub(r'\*\*(.*?)\*\*', r'\1', text)
    return text

def clean_text(text: str) -> str:
    """
    Cleans the provided text by removing any asterisks and occurrences of the word 'import'
    (case-insensitive) to help prevent injection.
    """
    cleaned = re.sub(r'\*+', '', text)
    cleaned = re.sub(r'\bimport\b', '', cleaned, flags=re.IGNORECASE)
    return cleaned

def sanitize_sql_input(input_string):
    """
    Sanitizes the input string by removing characters that might be harmful in SQL queries.
    """
    if not isinstance(input_string, str):
        return input_string
    return re.sub(r"[;\'\"#]", "", input_string)

def extract_text_from_pdf(file_path: str) -> str:
    """
    Extracts text from a PDF file given its file path.
    """
    text = ""
    with open(file_path, "rb") as file_obj:
        pdf_reader = PyPDF2.PdfReader(file_obj)
        for page in pdf_reader.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text
    return text

def analyze_emotion_hf(text: str) -> list:
    """
    Analyzes the text using a Hugging Face emotion detection model.
    Returns a list of dictionaries with emotion labels and scores.
    """
    scores = emotion_classifier(text)
    # If the output is nested (i.e. a list of lists), return the inner list.
    if scores and isinstance(scores[0], list):
        return scores[0]
    return scores

def chat_with_courses(user_question: str, pdf_text: str = "", conversation_history: list = None) -> (str, list):
    """
    Sends the user question along with course data and optional PDF text to the GPT model.
    Maintains conversation history so that follow-up questions can be answered in context.
    Now also uses advanced emotion detection to adjust responses if the user is upset.
    """
    if conversation_history is None:
        base_system_prompt = (
            META_PROMPT + "\n\n" +
            "You are a helpful and friendly academic assistant with complete and accurate course data. "
            "Below is the course data from a JSON cache. "
            "Answer questions based solely on this provided data and any additional PDF context, "
            "and do not generate or assume any information that is not explicitly provided. "
            "If a course does not include schedule or lecturer information, do not list any lecturers; "
            "instead, state that no lecturer information is available.\n\n"
            f"Course data:\n{json.dumps(courses_dict, indent=2)}\n\n"
            "Additional instructions:\n"
            "For more detailed instructions and to access the necessary forms, visit the [Graduate School Forms](https://uncw.edu/myuncw/academics/graduate-school/forms) page.\n\n"
            "If additional PDF information is provided, take it into account, but do not invent details that are not present."
        )
        conversation_history = [{"role": "system", "content": base_system_prompt}]
        if pdf_text:
            conversation_history.append({"role": "system", "content": f"Additional PDF information:\n{pdf_text}"})
    
    conversation_history.append({"role": "user", "content": user_question})
    
    # Advanced emotion detection using Hugging Face
    emotion_scores = analyze_emotion_hf(user_question)
    negative_emotions = {"anger", "sadness", "fear", "disgust"}
    max_neg_score = 0.0
    for item in emotion_scores:
        if item['label'].lower() in negative_emotions:
            max_neg_score = max(max_neg_score, item['score'])
    if max_neg_score > 0.5:
        empathetic_note = "Note: The user appears quite upset. Please respond with empathy and understanding."
        conversation_history.append({"role": "system", "content": empathetic_note})
    
    try:
        response = openai.ChatCompletion.create(
            model=fine_tuned_model_id,
            messages=conversation_history,
            max_tokens=600,
            temperature=0.7,
            top_p=0.8,
        frequency_penalty=0.5,
        presence_penalty=0.5
        )
    except Exception as e:
        print(f"Error during API call: {e}")
        return "Sorry, an error occurred while processing your request.", conversation_history

    assistant_message = response["choices"][0]["message"]["content"]
    conversation_history.append({"role": "assistant", "content": assistant_message})
    return assistant_message, conversation_history

def choose_pdf_file() -> str:
    """
    Opens a file dialog that allows the user to choose a PDF file.
    Returns the selected file path.
    """
    root = tk.Tk()
    root.withdraw()  # Hide the main window
    file_path = filedialog.askopenfilename(
        title="Select a PDF file",
        filetypes=[("PDF Files", "*.pdf")]
    )
    return file_path

def connect_to_database():
    """
    Connects to the MySQL database using the provided credentials.
    """
    try:
        connection = mysql.connector.connect(
            host='127.0.0.1',
            port="port",
            database='database,
            user='root',
            password='Your Password'
        )
        if connection.is_connected():
            print("Connected to MySQL database")
        return connection
    except Error as e:
        print(f"Error while connecting to MySQL: {e}")
        return None

def save_session_to_db(session_id, user_query, assistant_response, connection):
    """
    Saves the conversation session to the MySQL database.
    """
    if connection:
        try:
            sanitized_query = sanitize_sql_input(user_query)
            sanitized_response = sanitize_sql_input(assistant_response)
            
            cursor = connection.cursor()
            query = "INSERT INTO memory (session_id, query, response) VALUES (%s, %s, %s)"
            cursor.execute(query, (session_id, sanitized_query, sanitized_response))
            connection.commit()
            cursor.close()
        except Error as e:
            print(f"Error saving to MySQL: {e}")

if __name__ == "__main__":
    # Establish connection to the MySQL database.
    db_connection = connect_to_database()

    # Ask the user if they want to upload a PDF file.
    upload_choice = input("Do you want to upload a PDF file? (yes/no): ").strip().lower()
    pdf_text = ""
    if upload_choice.startswith("y"):
        file_path = choose_pdf_file()
        if file_path:
            pdf_text = extract_text_from_pdf(file_path)
        else:
            print("No file selected. Continuing without PDF information.")
    
    # Initialize conversation history for follow-up questions
    conversation_history = None

    # Main loop for asking questions
    while True:
        user_input = input("\nAsk about courses (or type 'exit' to quit): ").strip()
        if user_input.lower() in ["exit", "quit"]:
            break
        cleaned_input = clean_text(user_input)
        answer, conversation_history = chat_with_courses(cleaned_input, pdf_text, conversation_history)
        plain_answer = remove_markdown_formatting(answer)
        print(f"\nAssistant: {plain_answer}\n")
        
        session_id = str(uuid.uuid4())
        save_session_to_db(session_id, cleaned_input, plain_answer, db_connection)


# Flask API endpoints for browser-based HTML integration


from flask import Flask, request, jsonify
import tempfile
import os

app = Flask(__name__)

# Minimal CORS so the HTML file can call the API without extra deps
@app.after_request
def _add_cors_headers(resp):
    resp.headers["Access-Control-Allow-Origin"] = "*"
    resp.headers["Access-Control-Allow-Headers"] = "Content-Type"
    resp.headers["Access-Control-Allow-Methods"] = "POST, OPTIONS"
    return resp

# In-memory session storage: { session_id: {"pdf_text": str, "history": list} }
_SESSIONS = {}

# Optional shared DB connection for API usage
try:
    _DB = connect_to_database()
except Exception:
    _DB = None

@app.route("/api/chat", methods=["POST", "OPTIONS"])
def api_chat():
    if request.method == "OPTIONS":
        return ("", 204)

    data = request.get_json(silent=True) or {}
    message = (data.get("message") or "").strip()
    session_id = data.get("session_id") or str(uuid.uuid4())

    if not message:
        return jsonify({"reply": "Please enter a message.", "session_id": session_id}), 400

    state = _SESSIONS.get(session_id, {})
    pdf_text = state.get("pdf_text", "")
    history = state.get("history")

    # Mirror CLI behavior: clean text first
    cleaned = clean_text(message)

    # Call existing logic (unchanged)
    answer_md, new_history = chat_with_courses(cleaned, pdf_text, history)

    # Save session memory
    _SESSIONS[session_id] = {"pdf_text": pdf_text, "history": new_history}

    # Persist plain text to DB like CLI does
    try:
        plain = remove_markdown_formatting(answer_md)
        save_session_to_db(session_id, cleaned, plain, _DB)
    except Exception:
        pass

    return jsonify({"reply": answer_md, "session_id": session_id})

@app.route("/api/upload", methods=["POST", "OPTIONS"])
def api_upload():
    if request.method == "OPTIONS":
        return ("", 204)

    f = request.files.get("file")
    session_id = request.form.get("session_id") or str(uuid.uuid4())
    if not f:
        return jsonify({"error": "No file uploaded"}), 400

    # Save to temp path and use existing extractor
    with tempfile.TemporaryDirectory() as td:
        path = os.path.join(td, f.filename)
        f.save(path)
        pdf_text = extract_text_from_pdf(path)

    # Merge/initialize session state
    prev = _SESSIONS.get(session_id, {})
    _SESSIONS[session_id] = {"pdf_text": pdf_text, "history": prev.get("history")}

    return jsonify({"ok": True, "session_id": session_id})