In [29]:
import pandas as pd

# Load recipe data from CSV
recipes_df = pd.read_csv('../Resource/completed_recipes.csv')
print(f"Loaded {len(recipes_df)} recipes.")


Loaded 522517 recipes.


In [30]:
from elasticsearch import Elasticsearch

es = Elasticsearch(
    "https://localhost:9200",
    basic_auth=("elastic", "D*d4-0+Kl+lxfbbzh5ut"),
    ca_certs="~/http_ca.crt"
)

if es.ping():
    print("Elasticsearch connect for now!!")
else:
    print("failed to connect")

Elasticsearch connect for now!!


In [35]:
import json
import numpy as np
import re
from tqdm import tqdm
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk, BulkIndexError

In [49]:
# --- Define Index Name ---
index_name = "recipes"
recipes_sample = recipes_df  # Use your dataset

# --- Delete Index if Exists ---
es.indices.delete(index=index_name, ignore=[400, 404])

# --- Mapping: Updated image_url as text ---
mapping = {
    "settings": {
        "analysis": {
            "tokenizer": {
                "ngram_tokenizer": {
                    "type": "ngram",
                    "min_gram": 2,
                    "max_gram": 3,
                    "token_chars": ["letter", "digit"]
                }
            },
            "filter": {
                "shingle_filter": {
                    "type": "shingle",
                    "min_shingle_size": 2,
                    "max_shingle_size": 3
                }
            },
            "analyzer": {
                "default": {"type": "english"},
                "ngram_analyzer": {
                    "type": "custom",
                    "tokenizer": "ngram_tokenizer",
                    "filter": ["lowercase"]
                },
                "shingle_analyzer": {
                    "type": "custom",
                    "tokenizer": "standard",
                    "filter": ["lowercase", "shingle_filter"]
                }
            }
        }
    },
    "mappings": {
        "properties": {
            "recipe_id": {"type": "keyword"},
            "name": {
                "type": "text",
                "analyzer": "english",
                "fields": {
                    "ngram": {"type": "text", "analyzer": "ngram_analyzer"},
                    "shingle": {"type": "text", "analyzer": "shingle_analyzer"}
                }
            },
            "description": {"type": "text", "analyzer": "english"},
            "instructions": {"type": "text", "analyzer": "english"},
            "text": {"type": "text", "analyzer": "english"},
            "calories": {"type": "float"},
            "rating": {"type": "float"},
            "image_url": {"type": "text"}  # Changed from keyword to text
        }
    }
}

# --- Create Index ---
es.indices.create(index=index_name, body=mapping)
print(f"Created index: {index_name}")

# --- Cleaning Functions ---

def safe_text_field(value):
    """Convert NaN or None to empty string for text fields."""
    if isinstance(value, str):
        return value.strip()
    elif isinstance(value, float) and np.isnan(value):
        return ""
    elif value is None:
        return ""
    return str(value).strip()

def clean_text(text):
    """Remove unwanted characters like c("..."), quotes, and escape sequences."""
    if not isinstance(text, str):
        return ""
    text = re.sub(r'c\("', '', text)
    text = re.sub(r'"\)', '', text)
    text = text.replace('\\"', '').replace('"', '').replace("\\", '')
    cleaned_urls = re.sub(r'\s+', ' ', text.strip())
    urls = cleaned_urls.split(', ')
    return urls

def clean_instructions_combined_v2(instructions):
    if isinstance(instructions, list):
        instructions = " ".join(instructions)
    if not isinstance(instructions, str):
        return ""
    instructions = re.sub(r'c\s*\(\s*', '', instructions)
    instructions = re.sub(r'\s*\)$', '', instructions)
    instructions = instructions.strip('"')
    words = instructions.split()
    cleaned_words = []
    buffer = ""
    for word in words:
        if len(word) == 1:
            buffer += word
        else:
            if buffer:
                cleaned_words.append(buffer + word)
                buffer = ""
            else:
                cleaned_words.append(word)
    if buffer:
        cleaned_words.append(buffer)
    instructions = " ".join(cleaned_words)
    instructions = re.sub(r'\.\s*\.', '.', instructions)
    instructions = re.sub(r'\s*\.\s*', '. ', instructions)
    instructions = re.sub(r'\s*,', ',', instructions)
    instructions = re.sub(r'(^\"|\"$)', '', instructions)
    instructions = instructions.replace('\\"', '')
    instructions = instructions.replace('", "', ', ')
    instructions = re.sub(r'\s*\.\s*', ', ', instructions)
    instructions = re.sub(r',\s*,+', ', ', instructions)
    instructions = re.sub(r'\s+', ' ', instructions)
    instructions = instructions.strip().rstrip(',') + "."
    return instructions

def fix_image_url_field(image_url_raw):
    """Converts c("url1", "url2") to list OR returns original list/string"""
    if isinstance(image_url_raw, str) and image_url_raw.startswith("c("):
        try:
            fixed_str = image_url_raw.replace("c(", "[").replace(")", "]")
            return ast.literal_eval(fixed_str)
        except Exception:
            return [image_url_raw]
    elif isinstance(image_url_raw, list):
        return image_url_raw
    elif isinstance(image_url_raw, str):
        return [image_url_raw]
    else:
        return []

# --- Document Generator ---

def generate_docs(df):
    for idx, row in tqdm(df.iterrows(), total=len(df), desc="Indexing recipes"):
        recipe_id = str(int(float(row.get('RecipeId', idx))))
        name = safe_text_field(row.get('Name', ''))
        description = safe_text_field(row.get('Description', ''))
        instructions_raw = row.get('RecipeInstructions', [])
        instructions = safe_text_field(clean_instructions_combined_v2(instructions_raw))
        text_raw = row.get('text', '')
        text_list = clean_text(text_raw)
        text = ", ".join(text_list) if isinstance(text_list, list) else safe_text_field(text_list)
        calories = float(row.get('Calories', 0))
        image_url_raw = row.get('image_link', [])
        image_urls = fix_image_url_field(image_url_raw)
        image_url_final = image_urls[0] if isinstance(image_urls, list) and image_urls else ""
        image_url_final = safe_text_field(image_url_final)
        try:
            rating = float(row['AggregatedRating']) if not np.isnan(row['AggregatedRating']) else 0
        except (KeyError, TypeError, ValueError):
            rating = 0
        doc = {
            "_op_type": "index",
            "_index": index_name,
            "_id": recipe_id,
            "_source": {
                "recipe_id": recipe_id,
                "name": name,
                "description": description,
                "instructions": instructions,
                "text": text,
                "calories": calories,
                "rating": rating,
                "image_url": image_url_final
            }
        }
        yield doc

# --- Bulk Indexing ---

try:
    bulk(es, generate_docs(recipes_sample))
    print(f"Indexed {len(recipes_sample)} recipes into Elasticsearch.")
except BulkIndexError as e:
    print(f"Bulk index error: {len(e.errors)} documents failed.")
    for err in e.errors[:5]:
        print(json.dumps(err, indent=2))


  es.indices.delete(index=index_name, ignore=[400, 404])


Created index: recipes


Indexing recipes: 100%|██████████| 522517/522517 [03:07<00:00, 2787.40it/s]


Indexed 522517 recipes into Elasticsearch.


### Flask

In [50]:
import uuid
import pymysql
import numpy as np
from flask import Flask, request, jsonify
from flask_cors import CORS
from werkzeug.security import generate_password_hash, check_password_hash
from elasticsearch import Elasticsearch
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [60]:
app = Flask(__name__)
app.secret_key = "SOME_RANDOM_SECRET_KEY"
CORS(app)

def get_db_connection():
    return pymysql.connect(
        host='127.0.0.1',
        port=3309,
        user='root',
        password='root_password',
        db='my_database',
        cursorclass=pymysql.cursors.DictCursor
    )


INDEX_NAME = "recipes"

try:
    es.indices.put_settings(
        index=INDEX_NAME,
        body={"index": {"max_result_window": 100000}}
    )
    print(f"✅ Updated max_result_window to 100000 for index: {INDEX_NAME}")
except Exception as e:
    print(f"⚠️ Failed to update max_result_window: {str(e)}")

@app.route('/search', methods=['GET'])
def search():
    query = request.args.get('q', '').strip()
    page = int(request.args.get('page', 1))
    results_per_page = 10

    if not query:
        return jsonify({"error": "Query parameter 'q' is required"}), 400

    # Log query to database
    connection = get_db_connection()
    try:
        with connection.cursor() as cursor:
            cursor.execute("INSERT INTO search_logs (query) VALUES (%s)", (query,))
            connection.commit()
    except Exception as log_error:
        print("Search log error:", log_error)
    finally:
        connection.close()

    # Build Elasticsearch query
    es_query = {
        "query": {
            "multi_match": {
                "query": query,
                "fields": ["name^3", "description^2", "text", "name.ngram^2"],
                "fuzziness": "AUTO"
            }
        },
        "size": results_per_page,
        "from": (page - 1) * results_per_page,
        "suggest": {
            "text": query,
            "phrase_suggest": {
                "phrase": {
                    "field": "name.shingle",
                    "size": 1,
                    "gram_size": 3,
                    "direct_generator": [{"field": "name.shingle", "suggest_mode": "always"}],
                    "highlight": {"pre_tag": "<em>", "post_tag": "</em>"}
                }
            }
        }
    }

    try:
        # Execute search
        response = es.search(index=INDEX_NAME, body=es_query)
        total_count = response["hits"]["total"]["value"]

        # 🔥 Debug print for development
        print(f"Total hits for '{query}':", total_count)

        hits = response.get("hits", {}).get("hits", [])

        # Handle suggestion
        suggested_query = ''
        suggestions = response.get("suggest", {}).get("phrase_suggest", [])
        if suggestions and suggestions[0]["options"]:
            suggested_query = suggestions[0]["options"][0]["text"]

        # 🚧 Pagination limit enforcement (Option A)
        MAX_RESULT_WINDOW = 100000
        max_allowed_pages = MAX_RESULT_WINDOW // results_per_page
        if page > max_allowed_pages:
            return jsonify({
                "results": [],
                "total_count": total_count,
                "suggested_query": suggested_query,
                "warning": f"Cannot fetch page {page}. Maximum page allowed is {max_allowed_pages}."
            })

        # Format search results
        results = [{
            "recipe_id": hit["_source"].get("recipe_id"),
            "name": hit["_source"].get("name"),
            "description": hit["_source"].get("description"),
            "instructions": hit["_source"].get("instructions"),
            "image_url": clean_text(hit["_source"].get("image_url")),
            "calories": hit["_source"].get("calories"),
            "rating": hit["_source"].get("rating", 0),
            "score": hit["_score"]
        } for hit in hits]

        return jsonify({
            "results": results,
            "total_count": total_count,
            "suggested_query": suggested_query
        })

    except Exception as e:
        return jsonify({"error": f"Elasticsearch error: {str(e)}"}), 500

# ------------------ Recipe Details ------------------ #
@app.route('/recipe/<recipe_id>', methods=['GET'])
def get_recipe(recipe_id):
    response = es.get(index=INDEX_NAME, id=recipe_id, ignore=[404])
    if not response or not response.get("found"):
        return jsonify({"error": f"Recipe ID {recipe_id} not found"}), 404

    source = response["_source"]
    source["instructions"] = clean_instructions_combined_v2(source.get("instructions", ""))
    
    cleaned_image = clean_text(source.get("image_url", ""))
    source["image_url"] = cleaned_image[0] if isinstance(cleaned_image, list) and cleaned_image else ""

    return jsonify(source)


# ------------------ User Registration/Login ------------------ #
@app.route('/register', methods=['POST'])
def register():
    data = request.json
    username, password = data.get('username'), data.get('password')
    if not username or not password:
        return jsonify({"error": "Missing username or password"}), 400

    password_hash = generate_password_hash(password)
    connection = get_db_connection()
    try:
        with connection.cursor() as cursor:
            cursor.execute("SELECT user_id FROM users WHERE username = %s", (username,))
            if cursor.fetchone():
                return jsonify({"error": "Username already taken"}), 400
            cursor.execute("INSERT INTO users (username, password_hash) VALUES (%s, %s)", (username, password_hash))
            connection.commit()
        return jsonify({"message": "Registration successful"}), 200
    finally:
        connection.close()

@app.route('/login', methods=['POST'])
def login():
    data = request.json
    username, password = data.get('username'), data.get('password')
    if not username or not password:
        return jsonify({"error": "Missing username or password"}), 400

    connection = get_db_connection()
    try:
        with connection.cursor() as cursor:
            cursor.execute("SELECT user_id, password_hash FROM users WHERE username = %s", (username,))
            user = cursor.fetchone()
            if not user or not check_password_hash(user['password_hash'], password):
                return jsonify({"error": "Invalid username or password"}), 401

            token = str(uuid.uuid4())
            cursor.execute("REPLACE INTO sessions (token, username, user_id) VALUES (%s, %s, %s)",
                           (token, username, user['user_id']))
            connection.commit()
            return jsonify({"message": "Login successful", "username": username, "token": token}), 200
    finally:
        connection.close()

@app.route('/logout', methods=['POST'])
def logout():
    auth_header = request.headers.get('Authorization')
    if not auth_header or not auth_header.startswith('Bearer '):
        return jsonify({"error": "Invalid Authorization header format"}), 401
    token = auth_header.split(" ")[1]

    connection = get_db_connection()
    try:
        with connection.cursor() as cursor:
            cursor.execute("DELETE FROM sessions WHERE token = %s", (token,))
            connection.commit()
        return jsonify({"message": "Logged out successfully"}), 200
    finally:
        connection.close()

# ------------------ Top Searches ------------------ #
@app.route('/top-searches', methods=['GET'])
def top_searches():
    connection = get_db_connection()
    try:
        with connection.cursor() as cursor:
            cursor.execute("""
                SELECT query, COUNT(*) AS count 
                FROM search_logs 
                GROUP BY query 
                ORDER BY count DESC 
                LIMIT 5
            """)
            top_queries = cursor.fetchall()
        return jsonify({"top_searches": top_queries}), 200
    except Exception as e:
        return jsonify({"error": f"Failed to fetch top searches: {str(e)}"}), 500
    finally:
        connection.close()

# ------------------ Recommendations ------------------ #
@app.route('/recommendations', methods=['GET'])
def recommendations():
    auth_header = request.headers.get('Authorization')
    if not auth_header or not auth_header.startswith('Bearer '):
        return jsonify({"error": "Invalid Authorization header format"}), 401
    token = auth_header.split(" ")[1]

    connection = get_db_connection()
    try:
        with connection.cursor() as cursor:
            cursor.execute("SELECT user_id FROM sessions WHERE token = %s", (token,))
            user = cursor.fetchone()
            if not user:
                return jsonify({"error": "Invalid token"}), 401
            user_id = user['user_id']

            cursor.execute("SELECT recipe_id FROM bookmarks WHERE user_id = %s", (user_id,))
            bookmarked_ids = [r["recipe_id"] for r in cursor.fetchall()]

        bookmarked_recipes = []
        if bookmarked_ids:
            import random
            random.shuffle(bookmarked_ids)
            picked = bookmarked_ids[:2]
            for rid in picked:
                res = es.get(index=INDEX_NAME, id=str(rid), ignore=[404])
                if res and res.get("found"):
                    doc = res["_source"]
                    doc["image_url"] = clean_text(doc.get("image_url", ""))
                    bookmarked_recipes.append(doc)

        must_not_clause = []
        if bookmarked_ids:
            must_not_clause.append({"terms": {"recipe_id": [str(i) for i in bookmarked_ids]}})

        es_query = {
            "query": {
                "bool": {
                    "must": [{"exists": {"field": "image_url"}}],
                    "must_not": must_not_clause
                }
            },
            "size": 3
        }

        es_res = es.search(index=INDEX_NAME, body=es_query)
        new_recipes = []
        for hit in es_res.get("hits", {}).get("hits", []):
            doc = hit["_source"]
            doc["image_url"] = clean_text(doc.get("image_url", ""))
            new_recipes.append(doc)

        return jsonify({"recommendations": bookmarked_recipes + new_recipes}), 200
    except Exception as e:
        return jsonify({"error": f"Error fetching recommendations: {str(e)}"}), 500
    finally:
        connection.close()

@app.route('/bookmark', methods=['POST'])
def add_bookmark():
    data = request.json or {}
    auth_header = request.headers.get('Authorization')
    if not auth_header or not auth_header.startswith('Bearer '):
        return jsonify({"error": "Invalid Authorization header format"}), 401
    token = auth_header.split(" ")[1]

    connection = get_db_connection()
    try:
        with connection.cursor() as cursor:
            cursor.execute("SELECT username FROM sessions WHERE token = %s", (token,))
            user = cursor.fetchone()
            if not user:
                return jsonify({"error": "Invalid token"}), 401
            username = user['username']

            recipe_id = data.get('recipe_id')
            folder_name = data.get('folder_name', '').strip()
            rating = data.get('rating', 0)

            if not recipe_id or not folder_name:
                return jsonify({"error": "Missing recipe_id or folder_name"}), 400

            cursor.execute("SELECT id FROM folders WHERE username = %s AND folder_name = %s", (username, folder_name))
            folder_row = cursor.fetchone()
            folder_id = folder_row['id'] if folder_row else None

            if not folder_id:
                cursor.execute("INSERT INTO folders (username, folder_name) VALUES (%s, %s)", (username, folder_name))
                folder_id = cursor.lastrowid

            cursor.execute("INSERT IGNORE INTO folder_recipes (folder_id, recipe_id) VALUES (%s, %s)", (folder_id, recipe_id))
            cursor.execute("""
                INSERT INTO bookmarks (user_id, recipe_id, rating)
                VALUES ((SELECT user_id FROM users WHERE username = %s), %s, %s)
                ON DUPLICATE KEY UPDATE rating = VALUES(rating)
            """, (username, recipe_id, rating))
            connection.commit()

        return jsonify({"message": "Bookmark added successfully"}), 200
    except Exception as e:
        return jsonify({"error": f"Bookmark error: {str(e)}"}), 500
    finally:
        connection.close()

@app.route('/bookmarks', methods=['GET'])
def get_bookmarks():
    auth_header = request.headers.get('Authorization')
    if not auth_header or not auth_header.startswith('Bearer '):
        return jsonify({"error": "Invalid Authorization header format"}), 401
    token = auth_header.split(" ")[1]

    connection = get_db_connection()
    try:
        with connection.cursor() as cursor:
            cursor.execute("SELECT username FROM sessions WHERE token = %s", (token,))
            user = cursor.fetchone()
            if not user:
                return jsonify({"error": "Invalid token"}), 401
            username = user['username']

            cursor.execute("SELECT id AS folder_id, folder_name FROM folders WHERE username = %s", (username,))
            folders = cursor.fetchall()

            for folder in folders:
                folder_id = folder["folder_id"]
                cursor.execute("""
                    SELECT fr.recipe_id, b.rating 
                    FROM folder_recipes fr
                    LEFT JOIN bookmarks b ON fr.recipe_id = b.recipe_id 
                    AND b.user_id = (SELECT user_id FROM users WHERE username = %s)
                    WHERE fr.folder_id = %s
                    ORDER BY b.rating DESC
                """, (username, folder_id))
                recipes = cursor.fetchall()

                for recipe in recipes:
                    es_res = es.get(index="recipes", id=str(recipe["recipe_id"]), ignore=[404])
                    if es_res and es_res.get("found"):
                        src = es_res["_source"]
                        recipe["image_url"] = clean_text(src.get("image_url", ""))
                        recipe["name"] = src.get("name", "")
                        recipe["description"] = src.get("description", "")

                folder["recipes"] = recipes

        return jsonify({"folders": folders}), 200
    except Exception as e:
        return jsonify({"error": f"Bookmarks fetch error: {str(e)}"}), 500
    finally:
        connection.close()

@app.route('/bookmark', methods=['DELETE'])
def delete_bookmark():
    data = request.json or {}
    auth_header = request.headers.get('Authorization')
    if not auth_header or not auth_header.startswith('Bearer '):
        return jsonify({"error": "Invalid Authorization header format"}), 401
    token = auth_header.split(" ")[1]

    connection = get_db_connection()
    try:
        with connection.cursor() as cursor:
            cursor.execute("SELECT username FROM sessions WHERE token = %s", (token,))
            user = cursor.fetchone()
            if not user:
                return jsonify({"error": "Invalid token"}), 401
            username = user['username']

            folder_id = data.get('folder_id')
            recipe_id = data.get('recipe_id')
            if not folder_id or not recipe_id:
                return jsonify({"error": "Missing folder_id or recipe_id"}), 400

            cursor.execute("DELETE FROM folder_recipes WHERE folder_id = %s AND recipe_id = %s", (folder_id, recipe_id))
            cursor.execute("""
                DELETE FROM bookmarks 
                WHERE recipe_id = %s AND user_id = 
                (SELECT user_id FROM users WHERE username = %s)
                AND NOT EXISTS (SELECT 1 FROM folder_recipes WHERE recipe_id = %s)
            """, (recipe_id, username, recipe_id))
            connection.commit()

        return jsonify({"message": "Bookmark deleted successfully"}), 200
    except Exception as e:
        return jsonify({"error": f"Delete bookmark error: {str(e)}"}), 500
    finally:
        connection.close()

@app.route('/folder', methods=['DELETE'])
def delete_folder():
    data = request.json or {}
    auth_header = request.headers.get('Authorization')
    if not auth_header or not auth_header.startswith('Bearer '):
        return jsonify({"error": "Invalid Authorization header format"}), 401
    token = auth_header.split(" ")[1]

    connection = get_db_connection()
    try:
        with connection.cursor() as cursor:
            cursor.execute("SELECT username FROM sessions WHERE token = %s", (token,))
            user = cursor.fetchone()
            if not user:
                return jsonify({"error": "Invalid token"}), 401
            username = user['username']

            folder_id = data.get('folder_id')
            if not folder_id:
                return jsonify({"error": "Missing folder_id"}), 400

            cursor.execute("DELETE FROM folder_recipes WHERE folder_id = %s", (folder_id,))
            cursor.execute("DELETE FROM folders WHERE id = %s AND username = %s", (folder_id, username))
            connection.commit()

        return jsonify({"message": "Folder and bookmarks deleted successfully"}), 200
    except Exception as e:
        return jsonify({"error": f"Delete folder error: {str(e)}"}), 500
    finally:
        connection.close()

@app.route('/suggestions/<int:folder_id>', methods=['GET'])
def generate_suggestions(folder_id):
    auth_header = request.headers.get('Authorization')
    if not auth_header or not auth_header.startswith('Bearer '):
        return jsonify({"error": "Invalid Authorization header format"}), 401
    token = auth_header.split(" ")[1]

    connection = get_db_connection()
    try:
        with connection.cursor() as cursor:
            cursor.execute("SELECT user_id FROM sessions WHERE token = %s", (token,))
            user = cursor.fetchone()
            if not user:
                return jsonify({"error": "Invalid token"}), 401
            user_id = user['user_id']

            cursor.execute("SELECT recipe_id FROM folder_recipes WHERE folder_id = %s", (folder_id,))
            folder_recipes = cursor.fetchall()

        if not folder_recipes:
            return jsonify({"error": "Folder is empty, no suggestions can be generated."}), 400

        folder_recipe_ids = [str(r['recipe_id']) for r in folder_recipes]

        folder_texts = []
        for rid in folder_recipe_ids:
            res = es.get(index=INDEX_NAME, id=rid, ignore=[404])
            if res.get("found"):
                src = res["_source"]
                combined = f"{src.get('name', '')} {src.get('description', '')} {src.get('instructions', '')}"
                folder_texts.append(combined.strip())

        es_query = {
            "query": {
                "bool": {
                    "must_not": {
                        "terms": {"recipe_id": folder_recipe_ids}
                    }
                }
            },
            "size": 100
        }

        es_res = es.search(index=INDEX_NAME, body=es_query)
        candidates = es_res.get("hits", {}).get("hits", [])

        candidate_texts = []
        candidate_meta = []
        for hit in candidates:
            src = hit["_source"]
            combined = f"{src.get('name', '')} {src.get('description', '')} {src.get('instructions', '')}"
            candidate_texts.append(combined.strip())
            candidate_meta.append({
                "recipe_id": src.get("recipe_id"),
                "name": src.get("name"),
                "description": src.get("description"),
                "instructions": src.get("instructions"),
                "image_url": clean_text(src.get("image_url", "")),
                "calories": src.get("calories"),
                "rating": src.get("rating", 0),
                "score": 0
            })

        all_texts = folder_texts + candidate_texts
        vectorizer = TfidfVectorizer(stop_words='english')
        tfidf_matrix = vectorizer.fit_transform(all_texts)

        folder_vecs = tfidf_matrix[:len(folder_texts)]
        candidate_vecs = tfidf_matrix[len(folder_texts):]

        sim_matrix = cosine_similarity(candidate_vecs, folder_vecs)
        sim_scores = np.mean(sim_matrix, axis=1)

        for i, score in enumerate(sim_scores):
            candidate_meta[i]['score'] = float(score)

        ranked = sorted(candidate_meta, key=lambda x: x['score'], reverse=True)[:10]
        return jsonify({"suggestions": ranked}), 200
    except Exception as e:
        return jsonify({"error": f"Suggestion error: {str(e)}"}), 500
    finally:
        connection.close()


✅ Updated max_result_window to 100000 for index: recipes


In [None]:
if __name__ == '__main__':
    app.run(debug=False)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [16/Mar/2025 22:17:40] "OPTIONS /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:17:40] "GET /top-searches HTTP/1.1" 200 -
  res = es.get(index=INDEX_NAME, id=str(rid), ignore=[404])
127.0.0.1 - - [16/Mar/2025 22:17:40] "GET /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:18:28] "OPTIONS /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:18:28] "GET /top-searches HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:18:28] "GET /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:21:06] "OPTIONS /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:21:06] "GET /top-searches HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:21:06] "GET /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:21:47] "OPTIONS /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:21:47] "GET /top-searches HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:21:47] "GET /recommendat

Total hits for 'cake': 10000


  response = es.get(index=INDEX_NAME, id=recipe_id, ignore=[404])
127.0.0.1 - - [16/Mar/2025 22:22:02] "GET /recipe/72813 HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:22:05] "OPTIONS /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:22:05] "GET /top-searches HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:22:05] "GET /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:25:13] "OPTIONS /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:25:13] "GET /top-searches HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:25:13] "GET /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:25:49] "OPTIONS /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:25:49] "GET /top-searches HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:25:49] "GET /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:25:50] "GET /top-searches HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:25:50] "GET /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:27:07] 

Total hits for 'Fish': 10000


127.0.0.1 - - [16/Mar/2025 22:28:10] "GET /search?q=Fish&page=2 HTTP/1.1" 200 -


Total hits for 'Fish': 10000


127.0.0.1 - - [16/Mar/2025 22:28:12] "GET /search?q=Fish&page=3 HTTP/1.1" 200 -


Total hits for 'Fish': 10000


127.0.0.1 - - [16/Mar/2025 22:28:14] "GET /search?q=Fish&page=4 HTTP/1.1" 200 -


Total hits for 'Fish': 10000


127.0.0.1 - - [16/Mar/2025 22:28:15] "GET /search?q=Fish&page=3 HTTP/1.1" 200 -


Total hits for 'Fish': 10000


127.0.0.1 - - [16/Mar/2025 22:28:16] "GET /search?q=Fish&page=2 HTTP/1.1" 200 -


Total hits for 'Fish': 10000


127.0.0.1 - - [16/Mar/2025 22:28:16] "GET /search?q=Fish&page=1 HTTP/1.1" 200 -


Total hits for 'Fish': 10000


127.0.0.1 - - [16/Mar/2025 22:29:09] "OPTIONS /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:29:09] "GET /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:29:09] "GET /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:29:10] "GET /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:29:11] "OPTIONS /bookmarks HTTP/1.1" 200 -
  es_res = es.get(index="recipes", id=str(recipe["recipe_id"]), ignore=[404])
127.0.0.1 - - [16/Mar/2025 22:29:11] "GET /bookmarks HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:29:14] "OPTIONS /suggestions/8 HTTP/1.1" 200 -
  res = es.get(index=INDEX_NAME, id=rid, ignore=[404])
127.0.0.1 - - [16/Mar/2025 22:29:14] "GET /suggestions/8 HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:37:05] "OPTIONS /suggestions/8 HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:37:05] "GET /suggestions/8 HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:37:09] "GET /recipe/118 HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:37:11] "OPTIONS /sugg

Total hits for 'Chicken': 10000


127.0.0.1 - - [16/Mar/2025 22:38:21] "GET /search?q=Chicken&page=2 HTTP/1.1" 200 -


Total hits for 'Chicken': 10000


127.0.0.1 - - [16/Mar/2025 22:38:42] "GET /recipe/463779 HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:38:44] "OPTIONS /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:38:44] "GET /top-searches HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:38:44] "GET /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:38:45] "GET /recipe/463779 HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:38:57] "OPTIONS /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:38:57] "GET /top-searches HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:38:57] "GET /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:38:58] "OPTIONS /bookmarks HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:38:58] "GET /bookmarks HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:39:06] "OPTIONS /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:39:06] "GET /top-searches HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:39:06] "GET /recommendations HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 22:39:0