In [6]:
import pandas as pd

# Load recipe data from CSV
recipes_df = pd.read_csv('../Resource/completed_recipes.csv')
print(f"Loaded {len(recipes_df)} recipes.")


Loaded 522517 recipes.


In [7]:
from elasticsearch import Elasticsearch

es = Elasticsearch(
    "https://localhost:9200",
    basic_auth=("elastic", "D*d4-0+Kl+lxfbbzh5ut"),
    ca_certs="~/http_ca.crt"
)

if es.ping():
    print("Elasticsearch connect for now!!")
else:
    print("failed to connect")

Elasticsearch connect for now!!


In [8]:
import json
import numpy as np
from elasticsearch.helpers import bulk
import re

# Define index name and sample size for development
index_name = "recipes"
sample_size = 1000  # Set the sample size for testing (adjust as needed)

# Delete the index if it already exists
es.indices.delete(index=index_name, ignore=[400, 404])

mapping = {
    "settings": {
        "analysis": {
            "tokenizer": {
                "ngram_tokenizer": {
                    "type": "ngram",
                    "min_gram": 2,  # Minimum length of n-grams
                    "max_gram": 3,  # Maximum length of n-grams
                    "token_chars": ["letter", "digit"]
                }
            },
            "analyzer": {
                "default": {
                    "type": "english"
                },
                "ngram_analyzer": {  # Custom N-gram analyzer for partial word search
                    "type": "custom",
                    "tokenizer": "ngram_tokenizer",
                    "filter": ["lowercase"]
                }
            }
        }
    },
    "mappings": {
        "properties": {
            "recipe_id": {"type": "keyword"},
            "name": { 
                "type": "text", 
                "analyzer": "english",
                "fields": { 
                    "ngram": {  # N-gram variant for better partial matches
                        "type": "text", 
                        "analyzer": "ngram_analyzer"
                    }
                }
            },
            "description": {"type": "text", "analyzer": "english"},
            "instructions": {"type": "text", "analyzer": "english"},
            "text": {"type": "text", "analyzer": "english"},
            "calories": {"type": "float"},
            "rating": {"type": "float"},
            "image_url": {"type": "keyword"}
        }
    }
}

es.indices.create(index=index_name, body=mapping)
print(f"Created index: {index_name}")

recipes_sample = recipes_df.head(sample_size)

def clean_text(text):
    """Remove unwanted characters like c("..."), quotes, and escape sequences."""
    if not isinstance(text, str):  
        return ""  

    text = re.sub(r'c\("', '', text)
    text = re.sub(r'"\)', '', text)

    text = text.replace('\\"', '')  
    text = text.replace('"', '')    
    text = text.replace("\\", '')  
    cleaned_urls = re.sub(r'\s+', ' ', text.strip())
    # Split the string by ', ' (comma followed by a space)
    urls = cleaned_urls.split(', ')
    return urls

def clean_instructions_combined_v2(instructions):
    """Cleans and formats recipe instructions into a single, well-structured sentence."""
    if isinstance(instructions, list):
        instructions = " ".join(instructions)  # Join list into a single string
    
    if not isinstance(instructions, str):
        return ""

    # Remove unwanted wrapping (c(...)) and extra quotes
    instructions = re.sub(r'c\s*\(\s*', '', instructions)  # Remove "c("
    instructions = re.sub(r'\s*\)$', '', instructions)  # Remove trailing ")"
    instructions = instructions.strip('"')

    # Fix letter-by-letter spacing issues (e.g., "M i x" → "Mix in")
    words = instructions.split()  # Split into words
    cleaned_words = []
    buffer = ""

    for word in words:
        if len(word) == 1:  # If it's a single letter, buffer it
            buffer += word
        else:
            if buffer:
                cleaned_words.append(buffer + word)  # Merge buffer with current word
                buffer = ""
            else:
                cleaned_words.append(word)

    if buffer:  # Append remaining buffered text
        cleaned_words.append(buffer)

    instructions = " ".join(cleaned_words)

    # Remove redundant punctuation and ensure proper spacing
    instructions = re.sub(r'\.\s*\.', '.', instructions)  # Remove repeated periods
    instructions = re.sub(r'\s*\.\s*', '. ', instructions)  # Ensure proper space after periods
    instructions = re.sub(r'\s*,', ',', instructions)  # Remove spaces before commas

    # Remove leading/trailing unwanted characters (like extra quotes)
    instructions = re.sub(r'(^\"|\"$)', '', instructions)  # Remove leading/trailing quotes

    # Ensure proper formatting of sentences and remove unnecessary escape sequences
    instructions = instructions.replace('\\"', '')  # Remove escaped quotes
    instructions = instructions.replace('", "', ', ')  # Convert improperly formatted list items into a natural sentence

    # Convert sentence breaks into commas for a continuous explanation
    instructions = re.sub(r'\s*\.\s*', ', ', instructions)  # Convert periods into commas for a smoother flow

    # Fix double commas and extra spaces
    instructions = re.sub(r',\s*,+', ', ', instructions)  # Remove repeated commas
    instructions = re.sub(r'\s+', ' ', instructions)  # Remove extra spaces

    # Ensure only one period at the very end
    instructions = instructions.strip().rstrip(',') + "."  # Remove trailing comma and add a period

    return instructions



def generate_docs(df):
    for idx, row in df.iterrows():
        recipe_id = str(int(float(row.get('RecipeId', idx))))
        name = row.get('Name', '')
        description = row.get('Description', '')  # Replacing "ingredients"
        instructions = clean_instructions_combined_v2(row.get('RecipeInstructions', []))  # ✅ Clean instructions at indexing
        text = clean_text(row.get('text', ''))  # Clean text
        calories = float(row.get('Calories', 0))
        image_url = row.get('image_link', [])

        try:
            rating = float(row['AggregatedRating']) if not np.isnan(row['AggregatedRating']) else 0
        except (KeyError, TypeError, ValueError):
            rating = 0
        
        doc = {
            "_op_type": "index",
            "_index": index_name,
            "_id": recipe_id,
            "_source": {
                "recipe_id": recipe_id,
                "name": name,
                "description": description,  # Updated field
                "instructions": instructions,  # ✅ Cleaned instructions
                "text": text,  # Updated field
                "calories": calories,
                "rating": rating,
                "image_url": image_url
            }
        }
        if not np.isnan(row.get('AggregatedRating', np.nan)):
            doc["_source"]["rating"] = float(row["AggregatedRating"])
        yield doc


bulk(es, generate_docs(recipes_sample))

print(f"Indexed {len(recipes_sample)} recipes into Elasticsearch.")


  es.indices.delete(index=index_name, ignore=[400, 404])


Created index: recipes
Indexed 1000 recipes into Elasticsearch.


### Flask

In [15]:
from flask import Flask, request, jsonify, session
from flask_cors import CORS
from werkzeug.security import generate_password_hash, check_password_hash
import pymysql
import re
from elasticsearch import Elasticsearch
import random

In [17]:
app = Flask(__name__)
app.secret_key = "SOME_RANDOM_SECRET_KEY"
CORS(app)

connection = pymysql.connect(
    host='127.0.0.1',  # or 'localhost'
    port=3309,         # Mapped port from Docker
    user='root',       # or 'user'
    password='root_password', 
    db='my_database',
    cursorclass=pymysql.cursors.DictCursor
)

# Connect to Elasticsearch 
INDEX_NAME = "recipes"

@app.route('/search', methods=['GET'])
def search():
    """Search endpoint for querying recipes."""
    query = request.args.get('q', '')  
    if not query:
        return jsonify({"error": "Query parameter 'q' is required"}), 400
    
    # Elasticsearch search query
    es_query = {
        "query": {
            "multi_match": {
                "query": query,
                "fields": ["name^3", "description^2", "text", "name.ngram^2"],  
                "fuzziness": "AUTO" 
            }
        }
    }
    
    response = es.search(index=INDEX_NAME, body=es_query)
    results = [
        {
            "recipe_id": hit["_source"].get("recipe_id"),
            "name": hit["_source"].get("name"),
            "description": hit["_source"].get("description"),  
            "text": hit["_source"].get("text"), 
            "image_url": clean_text(hit["_source"].get("image_url")),
            "calories": hit["_source"].get("calories"),
            "rating": hit["_source"].get("rating", 0),
            "score": hit["_score"]
            
        }
        for hit in response.get("hits", {}).get("hits", [])
    ]
    
    return jsonify({"results": results})

@app.route('/recipe/<recipe_id>', methods=['GET'])
def get_recipe(recipe_id):
    """Fetches a single recipe by ID."""
    response = es.get(index=INDEX_NAME, id=recipe_id, ignore=[404])
    
    if response and response.get("found"):
        source = response["_source"]
        
        # Clean instructions dynamically
        source["instructions"] = clean_instructions_combined_v2(source.get("instructions", ""))
        
        # Clean image URL using clean_text
        image_urls = clean_text(source.get("image_url", ""))
        source["image_url"] = image_urls[0] if image_urls else ""

        return jsonify(source)
    
    return jsonify({"error": "Recipe not found"}), 404

@app.route('/register', methods=['POST'])
def register():
    data = request.json
    username = data.get('username')
    password = data.get('password')

    if not username or not password:
        return jsonify({"error": "Missing username or password"}), 400

    # Hash the password
    password_hash = generate_password_hash(password)

    with connection.cursor() as cursor:
        # Check if user already exists
        cursor.execute("SELECT user_id FROM users WHERE username = %s", (username,))
        existing = cursor.fetchone()
        if existing:
            return jsonify({"error": "Username already taken"}), 400

        # Insert new user
        cursor.execute(
            "INSERT INTO users (username, password_hash) VALUES (%s, %s)",
            (username, password_hash)
        )
        connection.commit()

    return jsonify({"message": "Registration successful"}), 200

@app.route('/login', methods=['POST'])
def login():
    data = request.json
    username = data.get('username')
    password = data.get('password')

    if not username or not password:
        return jsonify({"error": "Missing username or password"}), 400

    with connection.cursor() as cursor:
        cursor.execute("SELECT user_id, username, password_hash FROM users WHERE username = %s", (username,))
        user = cursor.fetchone()
        if not user:
            return jsonify({"error": "User not found"}), 404

        # Check password
        if check_password_hash(user['password_hash'], password):
            token = str(random.randint(1000,9999))
            # Optionally store user info in session
            session['user_id'] = user['user_id']
            session['username'] = user['username']
            session['token'] = token
            
            return jsonify({"message": "Login successful", "username": user['username'], "token": token}), 200
        else:
            return jsonify({"error": "Incorrect password"}), 401
        
@app.route('/session', methods=['GET'])
def check_session():
    if 'username' in session:
        return jsonify({"logged_in": True, "username": session['username']}), 200
    else:
        return jsonify({"logged_in": False}), 200
    
@app.route('/logout', methods=['POST'])
def logout():
    session.clear()
    return jsonify({"message": "Logged out"}), 200

@app.route('/bookmark', methods=['POST'])
def add_bookmark():
    """
    Expects JSON:
    {
      "recipe_id": <int>,
      "folder_name": <str>,
      "rating": <int>
    }
    """
    if 'user_id' not in session:
        return jsonify({"error": "Not logged in"}), 401

    data = request.json or {}
    recipe_id = data.get('recipe_id')
    folder_name = data.get('folder_name', '').strip()
    rating = data.get('rating', 0)

    if not recipe_id:
        return jsonify({"error": "Missing recipe_id"}), 400
    if not folder_name:
        return jsonify({"error": "Missing folder_name"}), 400

    user_id = session['user_id']

    try:
        with connection.cursor() as cursor:
            # 1) Find or create folder
            sql_folder = """
                SELECT id FROM folders
                WHERE user_id = %s AND folder_name = %s
            """
            cursor.execute(sql_folder, (user_id, folder_name))
            folder_row = cursor.fetchone()

            if folder_row:
                folder_id = folder_row['id']
            else:
                sql_insert_folder = """
                    INSERT INTO folders (user_id, folder_name)
                    VALUES (%s, %s)
                """
                cursor.execute(sql_insert_folder, (user_id, folder_name))
                folder_id = cursor.lastrowid

            # 2) Insert or ignore into folder_recipes
            sql_fr = """
                INSERT IGNORE INTO folder_recipes (folder_id, recipe_id)
                VALUES (%s, %s)
            """
            cursor.execute(sql_fr, (folder_id, recipe_id))

            # 3) Insert or update bookmarks
            sql_bm = """
                INSERT INTO bookmarks (user_id, recipe_id, rating)
                VALUES (%s, %s, %s)
                ON DUPLICATE KEY UPDATE rating = VALUES(rating)
            """
            cursor.execute(sql_bm, (user_id, recipe_id, rating))

            connection.commit()

        return jsonify({"message": "Bookmark added successfully"}), 200

    except pymysql.MySQLError as e:
        return jsonify({"error": str(e)}), 500
    
@app.route('/bookmarks', methods=['GET'])
def get_bookmarks():
    """
    Returns:
    {
      "folders": [
        {
          "folder_id": <int>,
          "folder_name": <str>,
          "recipes": [
            { "recipe_id": <int>, "rating": <int> },
            ...
          ]
        },
        ...
      ]
    }
    """
    if 'user_id' not in session:
        return jsonify({"error": "Not logged in"}), 401

    user_id = session['user_id']

    try:
        with connection.cursor() as cursor:
            # Fetch all folders for this user
            sql_folders = """
                SELECT id AS folder_id, folder_name
                FROM folders
                WHERE user_id = %s
            """
            cursor.execute(sql_folders, (user_id,))
            folder_rows = cursor.fetchall()

            folders_data = []
            for frow in folder_rows:
                folders_data.append({
                    "folder_id": frow['folder_id'],
                    "folder_name": frow['folder_name'],
                    "recipes": []
                })

            # For each folder, join folder_recipes + bookmarks
            for folder_obj in folders_data:
                folder_id = folder_obj["folder_id"]
                sql_join = """
                    SELECT fr.recipe_id, b.rating
                    FROM folder_recipes fr
                    JOIN bookmarks b ON b.recipe_id = fr.recipe_id
                    WHERE fr.folder_id = %s
                      AND b.user_id = %s
                """
                cursor.execute(sql_join, (folder_id, user_id))
                recipe_rows = cursor.fetchall()

                folder_obj["recipes"] = [
                    {
                        "recipe_id": row["recipe_id"],
                        "rating": row["rating"]
                    }
                    for row in recipe_rows
                ]

        return jsonify({"folders": folders_data}), 200

    except pymysql.MySQLError as e:
        return jsonify({"error": str(e)}), 500


In [None]:
if __name__ == '__main__':
    app.run(debug=False)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [14/Mar/2025 17:16:21] "POST /logout HTTP/1.1" 200 -
127.0.0.1 - - [14/Mar/2025 17:16:27] "OPTIONS /login HTTP/1.1" 200 -
127.0.0.1 - - [14/Mar/2025 17:16:27] "POST /login HTTP/1.1" 200 -
