In [89]:
%pip install "flask-jwt-extended"


Note: you may need to restart the kernel to use updated packages.


In [90]:
from flask import Flask, request, make_response, jsonify
from scipy.sparse import hstack
import pickle
import string
import re
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
import nltk
import joblib
from elasticsearch import Elasticsearch, helpers
import time
import pandas as pd
from flask_jwt_extended import create_access_token, get_jwt, jwt_required, JWTManager
import numpy as np


nltk.download("stopwords")
nltk.download("punkt")


[nltk_data] Downloading package stopwords to /home/amogus/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home/amogus/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [91]:
app = Flask(__name__)
app.es_client = Elasticsearch(
    "https://127.0.0.1:9200",
    basic_auth=(
        "elastic",
        "yHcm1Pyq=jnDL_4gw93i",
    ),
    ca_certs="http_ca.crt",
)
app.user_df = pd.read_parquet("resources/food/user.parquet")
app.config["JWT_SECRET_KEY"] = "recipeme79"
jwt = JWTManager(app)


recommend_query = {
    "function_score": {
        "query": {"match_all": {}},
        "functions": [
            {
                "script_score": {
                    "script": {
                        "source": "(doc['AggregatedRating'].value * doc['ReviewCount'].value + 4.632013709922984 * 100) / (doc['AggregatedRating'].value + 100)"
                    },
                },
                "weight": 1,
            },
        ],
        "score_mode": "multiply",
    }
}

@app.route("/user-detail", methods=["GET"])
@jwt_required()
def get_jwt_data():
    claims = get_jwt()
    return make_response(jsonify(dict(claims)), 200, {"Access-Control-Allow-Origin": "*"})

@app.route("/login", methods=["POST"])
def login():
    username = request.json.get("username", None)
    password = request.json.get("password", None)

    try:
        user = app.user_df.reset_index()[(app.user_df["username"] == username) & (app.user_df["password"] == password)].iloc[0].to_dict()
        print(user)
        additional_claims = {"disp": user.get("display_name")}
        access_token = create_access_token(identity=user.get("username"), additional_claims=additional_claims)
        return make_response(jsonify(access_token=access_token), 200, {"Access-Control-Allow-Origin": "*"})
    except IndexError:
        return make_response(jsonify({"msg": "Bad username or password"}), 401, {"Access-Control-Allow-Origin": "*"})


@app.route("/register", methods=["POST"])
def register():
    username = request.json.get("username", None)
    password = request.json.get("password", None)
    display_name = request.json.get("display_name", None)

    if app.user_df[(app.user_df["username"] == "username")].shape[0] > 0:
        return make_response(jsonify({"msg": "Bad username"}), 401, {"Access-Control-Allow-Origin": "*"})
    else:
        app.user_df = pd.concat([app.user_df, pd.DataFrame([[username, password, display_name]], columns=["username", "password", "display_name"])], ignore_index=True)
        app.user_df.to_parquet("resources/food/user.parquet")
        additional_claims = {"disp": display_name}
        access_token = create_access_token(identity=username, additional_claims=additional_claims)
        return make_response(jsonify(access_token=access_token), 200, {"Access-Control-Allow-Origin": "*"})


def get_search_query(query: str):
    return {
        "function_score": {
            "query": {
                "dis_max": {
                    "queries": [
                        {"match": {"Name": query}},
                        {"match": {"RecipeIngredientParts": query}},
                        {"match": {"RecipeInstructions": query}},
                        {"match": {"Keywords": query}},
                    ],
                    "tie_breaker": 0.3,
                }
            },
            "functions": [
                {
                    "script_score": {
                        "script": {
                            "source": "(doc['AggregatedRating'].value * doc['ReviewCount'].value + 4.632013709922984 * 100) / (doc['AggregatedRating'].value + 100)"
                        },
                    },
                    "weight": 1,
                },
                {
                    "script_score": {
                        "script": {"source": "_score"},
                    },
                    "weight": 1,
                },
            ],
            "score_mode": "multiply",
        }
    }


@app.route("/recommended", methods=["GET"])
def get_recommended():
    start = time.time()
    response_object = {"status": "success"}
    results = app.es_client.search(
        index="recipes",
        size=6,
        query=recommend_query,
    )
    end = time.time()
    total_hit = results["hits"]["total"]["value"]
    results_df = pd.DataFrame(
        [[hit["_score"], *hit["_source"].values()] for hit in results["hits"]["hits"]],
        columns=["score"] + list(results["hits"]["hits"][0]["_source"].keys()),
    )
    response_object["total_hit"] = total_hit
    response_object["results"] = results_df.to_dict("records")
    response_object["elapse"] = end - start
    return make_response(response_object, 200, {"Access-Control-Allow-Origin": "*"})


@app.route("/search", methods=["GET"])
def search():
    start = time.time()
    response_object = {"status": "success"}
    argList = request.args.to_dict(flat=False)
    query = argList["query"][0]
    results = app.es_client.search(
        index="recipes",
        size=12,
        query=get_search_query(query),
    )
    end = time.time()
    total_hit = results["hits"]["total"]["value"]
    if len(results["hits"]["hits"]) > 0:
        results_df = pd.DataFrame(
            [[hit["_score"], *hit["_source"].values()] for hit in results["hits"]["hits"]],
            columns=["score"] + list(results["hits"]["hits"][0]["_source"].keys()),
        )
    else:
        results_df = pd.DataFrame()
    response_object["total_hit"] = total_hit
    response_object["results"] = results_df.to_dict("records")
    response_object["elapse"] = end - start
    return make_response(response_object, 200, {"Access-Control-Allow-Origin": "*"})


@app.route("/recipes/<int:id>", methods=["GET"])
def get_by_id(id: int):
    start = time.time()
    response_object = {"status": "success"}
    result = app.es_client.get(index="recipes", id=id)
    end = time.time()
    result_df = pd.DataFrame(
        [[*result["_source"].values()]],
        columns=list(result["_source"].keys()),
    )
    response_object["results"] = result_df.to_dict("records")
    response_object["elapse"] = end - start
    return make_response(response_object, 200, {"Access-Control-Allow-Origin": "*"})


@app.route("/suggest", methods=["GET"])
def suggest():
    start = time.time()
    response_object = {"status": "success"}
    argList = request.args.to_dict(flat=False)
    query = argList["query"][0]
    suggest_dictionary = {
        "text": query,
        "suggest-1": {"term": {"field": "all_texts"}},
        "suggest-2": {"term": {"field": "Name"}},
        "suggest-3": {"term": {"field": "Description"}},
        "suggest-4": {"term": {"field": "RecipeInstructions"}},

    }

    query_dictionary = {"suggest": suggest_dictionary}
    res = app.es_client.search(
        index='recipes',
        body=query_dictionary)

    p = []
    for term in np.array(list(res["suggest"].values())).T:
        result = {}
        result["text"] = term[0]["text"]
        options = [v["options"] for v in term]
        result["candidates"] = {}
        # df = pd.DataFrame([v["options"] for v in term])
        for option in options:
            candidates = {}
            if len(option) > 0:
                candidates["text"] = option[0]["text"]
                for candidate in option:
                    # print(candidate)
                    if candidate["text"] not in result["candidates"]:
                        result["candidates"][candidate["text"]] = {"score": candidate["score"], "freq": candidate["freq"]}
                    else:
                        result["candidates"][candidate["text"]]["score"] = (result["candidates"][candidate["text"]]["score"] * result["candidates"][candidate["text"]]["freq"] + candidate["score"] * candidate["freq"]) / (result["candidates"][candidate["text"]]["freq"] + candidate["freq"])
                        result["candidates"][candidate["text"]]["freq"] = result["candidates"][candidate["text"]]["freq"] + candidate["freq"]
        p += [result["candidates"]]
    out = [""] * len(query.split())
    for i, pp in enumerate(p):
        if pp:
            df = pd.DataFrame.from_dict(pp,orient="index")
            R = (df["score"] * df["freq"]).sum() / df["freq"].sum()
            W = df["freq"].mean()
            df["bayes_score"] = (df["score"] * df["freq"] + W * R) / (df["freq"] + W)
            out[i] = df.sort_values("bayes_score", ascending=False).head(1).index[0]
        else:
            out[i] = query.split()[i]
    end = time.time()
    response_object["suggest"] = " ".join(out)
    response_object["elapse"] = end - start
    return make_response(response_object, 200, {"Access-Control-Allow-Origin": "*"})

@app.after_request
def apply_caching(response):
    response.headers["Access-Control-Allow-Origin"] = "*"
    response.headers["Access-Control-Allow-Headers"] = "*"
    return response


In [92]:
app.run(debug=False, host="0.0.0.0")


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://10.148.0.2:5000
Press CTRL+C to quit
180.183.224.101 - - [13/Mar/2024 16:05:27] "GET /suggest?query=boild%20beef%20wellingon HTTP/1.1" 200 -
180.183.224.101 - - [13/Mar/2024 16:25:07] "GET /recommended HTTP/1.1" 200 -
180.183.224.101 - - [13/Mar/2024 16:25:08] "GET /recommended HTTP/1.1" 200 -
180.183.224.101 - - [13/Mar/2024 16:25:12] "GET /search?query=beef%20ewellin%20ton HTTP/1.1" 200 -
180.183.224.101 - - [13/Mar/2024 16:25:14] "GET /search?query=beef%20ewellinton HTTP/1.1" 200 -
162.142.125.212 - - [13/Mar/2024 16:26:34] code 400, message Bad request version ('À\x14À')
162.142.125.212 - - [13/Mar/2024 16:26:34] "\x16\x03\x01\x00î\x01\x00\x00ê\x03\x03Â\x9dfþê<\x98Ég9ªÛ\x0cêñ¢;\x19ZL\x90\x15zb÷Ü\x86\x00FDw¡ ¡\x0f¤%,g*,yFÔw¤ª\x8dPÃ\x8f²ó°w6Á\x95_\x03\x99;ä!@\x00&Ì¨Ì©À/À0À+À,À\x13À\x09À\x14À" HTTPStatus.BAD_REQUEST -
162.142.125.212 - - [13/Mar/2024 16:26:37] "GET / HTTP/1.1" 404 -
162.142.