## Cloud Function Script

##### Note:
##### This cloud function is to get recommendation from userId

[What is cloud function](https://cloud.google.com/vertex-ai/docs/pipelines/trigger-pubsub)

### Cloud Function

In [None]:
import functions_framework
import json

from google.cloud import bigquery
from google.cloud import storage
import pickle
import pandas as pd
import surprise

def query_data(sql_query,cust_user_id):
    client = bigquery.Client(project='brave-watch-414204')
    
    # Define query parameters
    query_params = [
        bigquery.ScalarQueryParameter("user_id", "INT64", int(cust_user_id))
    ]

    # Execute the query job
    job_config = bigquery.QueryJobConfig(query_parameters=query_params)
    query_job = client.query(sql_query, job_config=job_config)

    # Fetch the result
    result = query_job.result()

    return result

# Function 1
# Return 'CF' or 'CB' based on how many recipes did this user rate before
# input parm: user_id -> which will be send by the end application
def determine_rcs_model(cust_user_id):
    # Initialize BigQuery client
    client = bigquery.Client(project='brave-watch-414204')

    sql_query = """
    SELECT COUNT(DISTINCT recipe_id) AS num_occurrences
    FROM `brave-watch-414204.RecipeQuery.interactions`
    WHERE user_id = {}
    """.format(cust_user_id)
    
    # Define query parameters
    query_params = [
        bigquery.ScalarQueryParameter("user_id", "INT64", int(cust_user_id))
    ]

    # Execute the query job
    job_config = bigquery.QueryJobConfig(query_parameters=query_params)
    query_job = client.query(sql_query, job_config=job_config)

    # Fetch the result
    result = query_job.result()
    num_occurrences = list(result)[0].num_occurrences

    if num_occurrences > 115:
        return 'CF'
    else:
        return 'CB'

# Function 3
# load the trained CF model from GCS Bucket -> the model is saved by the pipeline part
def get_CF_model(bucket_name,blob_name):
    
    storage_client = storage.Client()

    bucket = storage_client.bucket(bucket_name)
    
    blob = bucket.blob(blob_name)
    
    pickle_file = blob.download_as_bytes()

    model = pickle.loads(pickle_file)
    
    return model

def process_CF_result(model,cust_user_id):
    # get recipes list
    sql_query_rated_recipe = """
        SELECT distinct recipe_id
        FROM `brave-watch-414204.RecipeQuery.interactions`
        WHERE user_id = {}
    """.format(cust_user_id)

    sql_query_all_recipe = """
        SELECT distinct recipe_id
        FROM `brave-watch-414204.RecipeQuery.interactions`
    """
    rated_recipes_id_result = query_data(sql_query_rated_recipe,cust_user_id)
    all_recipes_id_result = query_data(sql_query_all_recipe,cust_user_id)

    rated_recipes_id = []
    for row in rated_recipes_id_result:
        rated_recipes_id.append(row[0])

    all_recipes_id = []
    for row in all_recipes_id_result:
        all_recipes_id.append(row[0])

    unrated_recipes_list = list(set(all_recipes_id) - set(rated_recipes_id))

    predictions = []
    for recipe_id in unrated_recipes_list:
        prediction = model.predict(cust_user_id, recipe_id)
        predictions.append({'recipe_id': recipe_id, 'predicted_rating': prediction.est})

    predictions_df = pd.DataFrame(predictions)
    # Sort recipes by predicted ratings in descending order
    predictions_df = predictions_df.sort_values(by='predicted_rating', ascending=False)

    # get the recipes name and description
    sql_query_recipe_details = """
    SELECT  
      distinct id,
      name,
      description
    FROM `brave-watch-414204.RecipeQuery.recipes` 
    """

    recipes_detail_result = query_data(sql_query_recipe_details,'0')
    recipes_detail = recipes_detail_result.to_dataframe()

    final_predictions_df = pd.merge(predictions_df,recipes_detail,left_on='recipe_id',right_on='id',how='left')

    top5_rec = []
    for value in final_predictions_df.head(4).values:
        top5_rec.append({'id':value[0],'description':value[4],'name':value[3]})

    final_result = {"recipes":top5_rec}
    return final_result

def query_CB_data(sql_query,cust_user_id):
    client = bigquery.Client(project='brave-watch-414204')
    
    # Define query parameters
    query_params = [
        bigquery.ScalarQueryParameter("cust_user_id", "INT64", int(cust_user_id))
    ]

    # Execute the query job
    job_config = bigquery.QueryJobConfig(query_parameters=query_params)
    query_job = client.query(sql_query, job_config=job_config)

    # Fetch the result
    result = query_job.result()

    return result

#This part will be done by jh&th
def process_CB_results(cust_user_id):


    sql_result_processed_by_CB = """
    #It's a UDF function written by Javascript help calculate vector multiply scalar
    CREATE TEMP FUNCTION VECTOR_MULT_SCALAR(vector ARRAY<FLOAT64>, scalar FLOAT64)
    ETURNS ARRAY<FLOAT64>
    LANGUAGE js AS """
    if (vector === null) {
        return null;
  }
    return vector.map(function(element) {
        return element * scalar;
  });
"""

#Logarithmic decay weighting has been applied to different reviews based on the number of days since the current date. The choice of logarithmic decay is because users' preferences for recipes tend to be relatively stable.
WITH RankedReviews AS (
    SELECT
        user_id,
        review,
        date,
         1 / LOG(DATE_DIFF('2018-12-21', date, DAY) + 1) AS weight
    FROM
         `brave-watch-414204.RecipeQuery.interactions` 
    WHERE
        user_id = @cust_user_id
)

,

# Call textembedding-gecko to do embedding for each review
ReviewEmbeddings AS (
            SELECT 
            ml_generate_embedding_result as review_embedding,
            content AS review,
            title as user_id
            FROM
            ML.GENERATE_EMBEDDING(
                MODEL `RecipeQuery.gecko_model`,
                (SELECT review as content, user_id as title FROM RankedReviews
                ),
                STRUCT(TRUE AS flatten_json_output)
                )
)
,
#merge review,weight,embedding together
RevieWeightMergedEmbedding as (

            select 
            t1.user_id,
            t1.review,
            t1.date,
            t1.weight,
            t2.review_embedding
            from RankedReviews t1 left join ReviewEmbeddings t2 on t1.review=t2.review

            
            )
,

#Performing a weighted average of different embedding vectors.
WeightedAverageEmbeddings AS (
    SELECT
        user_id,
        ARRAY_AGG(
            STRUCT(
                VECTOR_MULT_SCALAR(review_embedding, weight) AS ml_generate_embedding_result
            )
        ) AS weighted_embeddings
    FROM
        RevieWeightMergedEmbedding
        group by user_id
)
        ,
WeightedAverageEmbeddingsStandardize as (
SELECT user_id as query,ml_generate_embedding_result
FROM WeightedAverageEmbeddings,
UNNEST(weighted_embeddings) AS we
)
,
user_rated_recipe_id as(
            SELECT distinct recipe_id
            FROM `brave-watch-414204.RecipeQuery.interactions`
            WHERE user_id =  @cust_user_id
)
        ,
recipe_similar_to_review_top_120 as (
        SELECT query.query,base.content,distance, base.title
        FROM VECTOR_SEARCH(
            TABLE `VectorDB.AfterEmbedding_768`, 'ml_generate_embedding_result',
            (
                SELECT * from WeightedAverageEmbeddingsStandardize
                ),
                top_k => 120, options => '{"fraction_lists_to_search": 0.01}')
)
        ,
recipe_similar_to_review_top_5_exclude_rated_recipe as (
        select 
        t1.title as recipe_id
        from recipe_similar_to_review_top_120 t1
        left join user_rated_recipe_id t2
        on t1.title=t2.recipe_id
        where t2.recipe_id is null
        limit 5
)
    


        SELECT  
        id,
        name,
        description
        FROM `brave-watch-414204.RecipeQuery.recipes` 
        where id in (select * from recipe_similar_to_review_top_5_exclude_rated_recipe)
"""

    query_job= query_CB_data(sql_result_processed_by_CB,cust_user_id)

    results_df = query_job.to_dataframe()

    top5_rec = []
    for value in results_df.head().values:
        top5_rec.append({'id':value[0],'description':value[2],'name':value[1]})

    final_result = {"recipes":top5_rec}
    
    return final_result


@functions_framework.http
def hello_http(request):
    """HTTP Cloud Function.
    Args:
        request (flask.Request): The request object.
        <https://flask.palletsprojects.com/en/1.1.x/api/#incoming-request-data>
    Returns:
        The response text, or any set of values that can be turned into a
        Response object using `make_response`
        <https://flask.palletsprojects.com/en/1.1.x/api/#flask.make_response>.
    """
    request_json = request.get_json(silent=True)
    request_args = request.args

    cust_user_id = 0
    if request_json and 'userId' in request_json:
        cust_user_id = request_json['userId']

    flag = determine_rcs_model(cust_user_id)
    if flag == 'CB':
        final_result = process_CB_results(cust_user_id)
        return json.dumps(final_result), 200, {'Content-Type': 'application/json'}
    elif flag == 'CF':
        # get model
        model = get_CF_model('brave-watch-414204','Model/knn_model.pkl')
        final_result = process_CF_result(model,cust_user_id)
        
        return json.dumps(final_result), 200, {'Content-Type': 'application/json'}
    else:
        raise ValueError(f"Unknown flag value: {flag}")
        return None



## requirements.txt

### Add schema.yaml file into your cloud funtion folder