In [1]:
import os
import requests
import json
import difflib
from flask import Flask, request, jsonify
from openai import OpenAI
import math
from config import PAGE_ACCESS_TOKEN, VERIFY_TOKEN, OPENAI_API_KEY, GPT_MODEL, SIMILARITY_THRESHOLD

In [2]:
client = OpenAI(api_key=OPENAI_API_KEY)

In [3]:
app = Flask(__name__)

In [4]:
# --- Load data from files ---
def load_data_from_file(filepath):
    """Loads a JSON file and returns the data."""
    try:
        with open(filepath, "r", encoding="utf-8") as f:
            return json.load(f)
    except FileNotFoundError:
        print(f"Error: The file '{filepath}' was not found. Please create it.")
        return {}
    except json.JSONDecodeError:
        print(f"Error: The file '{filepath}' contains invalid JSON. Please check the file format.")
        return {}

In [5]:
# FAQ + embeddings
faq_english = json.load(open("data/faq_english.json", encoding="utf-8"))
faq_embeddings_english = json.load(open("data/faq_english_embeddings.json", encoding="utf-8"))
faq_georgian = json.load(open("data/faq_georgian.json", encoding="utf-8"))
faq_embeddings_georgian = json.load(open("data/faq_georgian_embeddings.json", encoding="utf-8"))

# Dynamic data
schedule_en = json.load(open("data/schedule_english.json", encoding="utf-8"))
schedule_ge = json.load(open("data/schedule_georgian.json", encoding="utf-8"))
location_en = json.load(open("data/location_contact_english.json", encoding="utf-8"))
location_ge = json.load(open("data/location_contact_georgian.json", encoding="utf-8"))

intents = json.load(open("data/intents.json", encoding="utf-8"))

# Default fallback messages
default_messages = json.load(open("data/default_messages.json", encoding="utf-8"))

In [6]:
def get_class_schedule(class_type=None, is_georgian=False):
    schedule_data = schedule_ge if is_georgian else schedule_en
    results = []
    for cls in schedule_data["classes"]:
        if class_type is None or cls["type"].lower() == class_type.lower():
            results.append(f"{cls['type']} with {cls['trainer']} on {', '.join(cls['days'])} at {cls['hours']}")
    return "\n".join(results) if results else ("Sorry, no matching class found." if not is_georgian else "მაპატიეთ, შესაბამისი კლასი ვერ მოიძებნა.")

In [7]:
def get_location_info(is_georgian=False):
    loc = location_ge if is_georgian else location_en
    hours = "\n".join([f"{day}: {time}" for day, time in loc["opening_hours"].items()])
    return (
        f"{loc['studio_name']}\n"
        f"Address: {loc['address']}\n"
        f"Phone: {loc['phone']}\n"
        f"Email: {loc['email']}\n"
        f"Opening hours:\n{hours}"
    )

In [8]:
def cosine_similarity(vec1, vec2):
    if len(vec1) != len(vec2):
        return 0
    dot_product = sum(a * b for a, b in zip(vec1, vec2))
    norm1 = math.sqrt(sum(a * a for a in vec1))
    norm2 = math.sqrt(sum(b * b for b in vec2))
    if norm1 == 0 or norm2 == 0:
        return 0
    return dot_product / (norm1 * norm2)

In [9]:
def preprocess_intents(intents_data):
    """
    Creates a global dictionary for O(1) lookup of examples.
    """
    processed_intents = {}
    # Note: Access the list inside the 'intents' dictionary: intents_data['intents']
    for intent in intents_data.get('intents', []): 
        name = intent["name"]
        processed_intents[name] = {
            "english": [ex.lower().strip() for ex in intent.get("examples", {}).get("english", [])],
            "georgian": [ex.lower().strip() for ex in intent.get("examples", {}).get("georgian", [])]
        }
    return processed_intents

In [10]:
PREPROCESSED_INTENT_EXAMPLES = preprocess_intents(intents)

In [11]:
INTENT_LIST = list(PREPROCESSED_INTENT_EXAMPLES.keys())

In [12]:
def detect_intent_ai(query):
    # 1. Language Detection
    # Check for Georgian (reusing your existing logic)
    is_ge = any(ord(c) > 127 for c in query)
    lang = "georgian" if is_ge else "english"
    # 1. Define the System Prompt
    system_prompt = f"""
    You are a Natural Language Understanding (NLU) model for a yoga studio chatbot.
    Your task is to classify the user's query into one of the following intents: {', '.join(INTENT_LIST)}.
    If the query clearly asks about a class schedule or location, use 'schedule_request' or 'location_request'.
    If it's a simple salutation, use 'greeting'. If it's thanks, use 'thanks'.
    For any other complex question (pricing, booking, descriptions, etc.), use the general intent 'faq'.

    You MUST only respond with a single JSON object. Do not include any other text or explanation.
    JSON Format: {{"intent": "CLASSIFIED_INTENT_NAME"}}
    """
    try:
        response = client.chat.completions.create(
            model=GPT_MODEL,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": query}
            ],
            response_format={"type": "json_object"},
            temperature=0.0 # Make it deterministic
        )
        
        json_string = response.choices[0].message.content
        intent_data = json.loads(json_string)
        
        # Ensure the intent name is one of the known intents (fallback to 'faq' if the model hallucinated one)
        intent_name = intent_data.get("intent", "faq")
        if intent_name not in INTENT_LIST:
             intent_name = "faq"

        return intent_name, lang

    except Exception as e:
        print(f"Error detecting intent with OpenAI: {e}")
        # Fallback to the original rule-based detection or a safe default
        return "faq", lang # Safe fallback on API error

In [13]:
def get_intent_response(intent_name, lang="english"):
    """
    Retrieves a static response message for a matched intent from the JSON data.
    """
    # Note: We iterate over the list stored under the 'intents' key in your global intents dictionary
    for intent in intents.get('intents', []): 
        if intent["name"] == intent_name:
            # Try to get the responses for the detected language
            responses = intent.get("responses", {}).get(lang, [])
            
            # Optional Polishing: If the specific language response is missing, fall back to English
            if not responses and lang == "georgian":
                 responses = intent.get("responses", {}).get("english", [])

            if responses:
                # Returns the first response in the list (or you could use random.choice(responses))
                return responses[0] 
                
    # Returns None if the intent name is valid but has no responses defined
    return None

In [14]:
def match_faq(query, faq_embeddings, faq_data, threshold=SIMILARITY_THRESHOLD):
    # Generate embedding for user query
    response = client.embeddings.create(
        model="text-embedding-3-small",
        input=query
    )
    user_vector = response.data[0].embedding

    # Find best match
    best_question = None
    best_score = 0
    for question, vector in faq_embeddings.items():
        score = cosine_similarity(user_vector, vector)
        if score > best_score:
            best_score = score
            best_question = question

    if best_score >= threshold:
        return faq_data[best_question]
    return None


In [15]:
# --- Messenger helper ---
def send_message(recipient_id, text):
    """Send message back to a Facebook Messenger user."""
    url = "https://graph.facebook.com/v21.0/me/messages"
    params = {"access_token": PAGE_ACCESS_TOKEN}
    headers = {"Content-Type": "application/json"}
    data = {"recipient": {"id": recipient_id}, "message": {"text": text}}
    r = requests.post(url, params=params, headers=headers, json=data)
    if r.status_code != 200:
        print(f"Error sending message: {r.text}")
        print(f"Facebook API Response: {r.text}")

In [16]:
def get_answer(query):
    query_lower = query.lower()
    is_georgian = any(ord(c) > 127 for c in query)
    
    # Set the correct data based on detected language
    faq = faq_georgian if is_georgian else faq_english
    embeddings = faq_embeddings_georgian if is_georgian else faq_embeddings_english
    schedule_data = schedule_ge if is_georgian else schedule_en
    lang_key = "georgian" if is_georgian else "english"


    for cls in schedule_data["classes"]:
        if cls["type"].lower() in query_lower:
            return get_class_schedule(cls["type"], is_georgian)
    if any(word in query_lower for word in ["class","schedule","hours","დროში","კლას"]):
        return get_class_schedule(is_georgian=is_georgian)

    if any(word in query_lower for word in ["location","address","phone","contact","studio","მისამართი","ტელეფონი"]):
        return get_location_info(is_georgian)

    
    faq_response = match_faq(
        query, 
        embeddings, 
        faq, 
        threshold=SIMILARITY_THRESHOLD # Ensure this variable is accessible
    )

    if faq_response:
        return faq_response
    
    # --- Final Fallback: Static Message from default_messages.json ---
    return default_messages.get(lang_key, "Sorry, I didn't understand.")

In [17]:
# --- Root route for testing ---
@app.route("/", methods=["GET"])
def home():
    return "Messenger bot is running!"

In [18]:
@app.route("/webhook", methods=["GET", "POST"])
def webhook():
    if request.method == "GET":
        token_sent = request.args.get("hub.verify_token")
        return request.args.get("hub.challenge") if token_sent == VERIFY_TOKEN else "Invalid token"
    
    # POST: incoming messages
    output = request.get_json(silent=True)
    if not output:
        return "No JSON payload", 400

    for entry in output.get("entry", []):
        for messaging_event in entry.get("messaging", []):
            sender = messaging_event.get("sender")
            message = messaging_event.get("message")
            
            # Use robust checks for critical fields
            if not isinstance(sender, dict) or not isinstance(message, dict):
                continue
            
            sender_id = sender.get("id")
            user_text = message.get("text")
            
            # Skip if no sender ID or non-text message
            if not sender_id or not isinstance(user_text, str):
                continue

            try:
               
                # 1. Intent Detection (New AI Function)
                intent, lang = detect_intent_ai(user_text) 
                print("AI-detected intent:", intent)
                response_text = None

                # 2. Intent-Based Response (DATA-DRIVEN)
                # This function pulls the response dynamically from intents.json
                response_text = get_intent_response(intent, lang) 

                # 3. Dynamic/FAQ Fallback
                # If the intent was 'faq' or if an intent had no simple static response, 
                # run the complex logic (schedule, location, embedding match).
                if not response_text:
                    response_text = get_answer(user_text)

                # 4. Final Fallback
                if not response_text:
                    lang_key = "georgian" if lang == "georgian" else "english"
                    response_text = default_messages.get(lang_key, "Sorry, I didn't understand.")

            except Exception as e:
                print("Error processing message:", e)
                response_text = "Sorry, something went wrong."

            # Send response
            try:
                if response_text:
                    send_message(sender_id, response_text)
            except Exception as e:
                print("Error sending message:", e)

    return "ok", 200

In [19]:
# --- Run server ---
if __name__ == "__main__":
    app.run(port=5000, debug=True, use_reloader=False)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [26/Sep/2025 13:35:45] "[37mGET /webhook?hub.mode=subscribe&hub.challenge=22298149&hub.verify_token=kdjjskhfjskfnklafmkasa6s45asffsdnfsdf5dsfc HTTP/1.1[0m" 200 -


AI-detected intent: greeting


127.0.0.1 - - [26/Sep/2025 13:36:06] "[37mPOST /webhook HTTP/1.1[0m" 200 -


AI-detected intent: greeting


127.0.0.1 - - [26/Sep/2025 13:36:22] "[37mPOST /webhook HTTP/1.1[0m" 200 -


AI-detected intent: greeting


127.0.0.1 - - [26/Sep/2025 13:36:42] "[37mPOST /webhook HTTP/1.1[0m" 200 -


AI-detected intent: faq


127.0.0.1 - - [26/Sep/2025 13:36:56] "[37mPOST /webhook HTTP/1.1[0m" 200 -
