# Create Customer Summary (Quantitative Analysis)
- This notebook take about 5 to 10 minutes to execute
- Extract quantitative data from the Trips data
  - Does the customer only use the service certain days of the week?
  - What time of day does the customer use the service (rush hour)?
- Create a LLM summary of the extracted data

## Create Summary Prompt and run through LLM

In [None]:
%%bigquery

-- OPTIONAL: Reset all the fields to null
-- If you need to reset you data back to fresh data run the stored procedure: CALL `${project_id}.${bigquery_rideshare_llm_curated_dataset}.sp_reset_demo`();

/*
UPDATE `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.customer`
   SET customer_quantitative_analysis_prompt     = NULL,
       llm_customer_quantitative_analysis_json   = NULL,
       llm_customer_quantitative_analysis        = NULL
 WHERE TRUE;
*/

In [None]:
%%bigquery

-- Create the LLM prompt
UPDATE `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.customer` AS customer
   SET customer_quantitative_analysis_prompt =
       CONCAT('Write a 2 to 3 sentence summary of the following attributes of a customer who uses a rideshare services. ',
                CASE WHEN day_of_week = 'weekend-customer' THEN CONCAT('- ', customer.customer_name, ' uses the service on weekends.\n')
                     WHEN day_of_week = 'weekday-customer' THEN CONCAT('- ', customer.customer_name, ' uses the service on weekdays.\n')
                     ELSE CONCAT('- ', customer.customer_name ,' uses the rideshare service any day of the week.\n')
                END,

                CASE WHEN hour_of_day = 'night-hour-customer' THEN CONCAT('- ',customer.customer_name,' likes to use the service at night.\n')
                     WHEN hour_of_day = 'rush-hour-customer'  THEN CONCAT('- ',customer.customer_name,' likes to use the service during the morning and afternoon rush hours.\n')
                    ELSE CONCAT('- ',customer.customer_name,' uses the rideshare service at any time of the day.\n')
                END
             )
  FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.customer_quantitative_analysis` AS customer_quantitative_analysis
WHERE customer.customer_id = customer_quantitative_analysis.customer_id
;



In [None]:
%%bigquery

SELECT customer_quantitative_analysis_prompt
  FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.customer` AS customer
 WHERE customer_quantitative_analysis_prompt IS NOT NULL
 LIMIT 10;

## Run the LLM to generate a Customer Summary on Quantitative Analysis

In [None]:
from google.cloud import bigquery
import pandas as pd

client = bigquery.Client()

In [None]:
# Process in batches
batch_size = 100

# Set the parameters so we are more deterministic and less creative/random responses
llm_temperature = .80
llm_max_output_tokens = 1024
llm_top_p = .70
llm_top_k = 25

update_sql="""
UPDATE `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.customer` AS customer
  SET llm_customer_quantitative_analysis_json = child.ml_generate_text_result
  FROM (SELECT *
          FROM ML.GENERATE_TEXT(MODEL`${project_id}.${bigquery_rideshare_llm_enriched_dataset}.cloud_ai_llm_v1`,
              (SELECT customer_id,
                      customer_quantitative_analysis_prompt AS prompt
                FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.customer`
                WHERE (llm_customer_quantitative_analysis_json IS NULL
                       OR
                       JSON_VALUE(llm_customer_quantitative_analysis_json, '$.predictions[0].content') IS NULL
                       )
                  AND include_in_llm_processing = TRUE
                  AND customer_quantitative_analysis_prompt IS NOT NULL
                LIMIT {batch_size}),
              STRUCT(
                {llm_temperature} AS temperature,
                {llm_max_output_tokens} AS max_output_tokens,
                {llm_top_p} AS top_p,
                {llm_top_k} AS top_k
                ))
  ) AS child
WHERE customer.customer_id = child.customer_id
  """.format(batch_size = batch_size,
             llm_temperature = llm_temperature,
             llm_max_output_tokens = llm_max_output_tokens,
             llm_top_p = llm_top_p,
             llm_top_k = llm_top_k)

print("SQL: {update_sql}".format(update_sql=update_sql))


In [None]:
# Score while records remain
# score in groups of batch_size records (we can do up to 10,000 at a time)
import time

done = False
displayed_first_sql = False
original_record_count = 0

while done == False:
  # Get the count of records to score
  sql = """
        SELECT COUNT(*) AS cnt
          FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.customer`
         WHERE (llm_customer_quantitative_analysis_json IS NULL
                OR
                JSON_VALUE(llm_customer_quantitative_analysis_json, '$.predictions[0].content') IS NULL
                )
           AND include_in_llm_processing = TRUE
           AND customer_quantitative_analysis_prompt IS NOT NULL;
        """

  df_record_count = client.query(sql).to_dataframe()
  cnt = df_record_count['cnt'].head(1).item()
  if displayed_first_sql == False:
    original_record_count = cnt
    displayed_first_sql = True

  print("Remaining records to process: ", cnt, " out of", original_record_count, " batch_size: ", batch_size)


  if cnt == 0:
    done = True
  else:
    # https://github.com/googleapis/python-bigquery/tree/master/samples
    job_config = bigquery.QueryJobConfig(priority=bigquery.QueryPriority.INTERACTIVE)
    query_job = client.query(update_sql, job_config=job_config)

    # Check on the progress by getting the job's updated state.
    query_job = client.get_job(
        query_job.job_id, location=query_job.location
    )
    print("Job {} is currently in state {}".format(query_job.job_id, query_job.state))

    while query_job.state != "DONE":
      time.sleep(5)
      query_job = client.get_job(
          query_job.job_id, location=query_job.location
          )
      print("Job {} is currently in state {}".format(query_job.job_id, query_job.state))


## Parse the LLM JSON results

In [None]:
%%bigquery

UPDATE `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.customer` customer
   SET llm_customer_quantitative_analysis = JSON_VALUE(llm_customer_quantitative_analysis_json, '$.predictions[0].content')
 WHERE llm_customer_quantitative_analysis_json IS NOT NULL
   AND llm_customer_quantitative_analysis IS NULL;

In [None]:
%%bigquery

SELECT customer_id, customer_quantitative_analysis_prompt, llm_customer_quantitative_analysis_json, llm_customer_quantitative_analysis
  FROM `${project_id}.${bigquery_rideshare_llm_enriched_dataset}.customer`
 WHERE llm_customer_quantitative_analysis_json IS NOT NULL
   AND llm_customer_quantitative_analysis IS NOT NULL
LIMIT 20;
