### <font color='#4285f4'>Overview</font>

Overview: Generates synthetic truck telemetry data

Author:
* Adam Paternostro

### <font color='#4285f4'>License</font>

```
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
```

### <font color='#4285f4'>Pip installs</font>

In [None]:
# PIP Installs (if necessary)
import sys

# !{sys.executable} -m pip install REPLACE-ME

### <font color='#4285f4'>Initialize</font>

In [None]:
from PIL import Image
from IPython.display import HTML
import IPython.display
import google.auth
import requests
import json
import uuid
import base64
import os
import cv2
import random
import time
import datetime
import base64
import random

import logging
from tenacity import retry, wait_exponential, stop_after_attempt, before_sleep_log, retry_if_exception

In [None]:
# Set these (run this cell to verify the output)

bigquery_location = "${bigquery_non_multi_region}"
region = "${region}"
location = "${location}"


# Get the current date and time
now = datetime.datetime.now()

# Format the date and time as desired
formatted_date = now.strftime("%Y-%m-%d-%H-%M")

# Get some values using gcloud
project_id = os.environ["GOOGLE_CLOUD_PROJECT"]
user = !(gcloud auth list --filter=status:ACTIVE --format="value(account)")

if len(user) != 1:
  raise RuntimeError(f"user is not set: {user}")
user = user[0]

print(f"project_id = {project_id}")
print(f"user = {user}")

### <font color='#4285f4'>Helper Methods</font>

#### restAPIHelper
Calls the Google Cloud REST API using the current users credentials.

In [None]:
def restAPIHelper(url: str, http_verb: str, request_body: str) -> str:
  """Calls the Google Cloud REST API passing in the current users credentials"""

  import google.auth.transport.requests
  import requests
  import google.auth
  import json

  # Get an access token based upon the current user
  creds, project = google.auth.default()
  auth_req = google.auth.transport.requests.Request()
  creds.refresh(auth_req)
  access_token=creds.token

  headers = {
    "Content-Type" : "application/json",
    "Authorization" : "Bearer " + access_token
  }

  if http_verb == "GET":
    response = requests.get(url, headers=headers)
  elif http_verb == "POST":
    response = requests.post(url, json=request_body, headers=headers)
  elif http_verb == "PUT":
    response = requests.put(url, json=request_body, headers=headers)
  elif http_verb == "PATCH":
    response = requests.patch(url, json=request_body, headers=headers)
  elif http_verb == "DELETE":
    response = requests.delete(url, headers=headers)
  else:
    raise RuntimeError(f"Unknown HTTP verb: {http_verb}")

  if response.status_code == 200:
    return json.loads(response.content)
    #image_data = json.loads(response.content)["predictions"][0]["bytesBase64Encoded"]
  else:
    error = f"Error restAPIHelper -> ' Status: '{response.status_code}' Text: '{response.text}'"
    raise RuntimeError(error)

#### RetryCondition (for retrying LLM calls)

In [None]:
def RetryCondition(error):
  error_string = str(error)
  print(error_string)

  retry_errors = [
      "RESOURCE_EXHAUSTED",
      "No content in candidate",
      # Add more error messages here as needed
  ]

  for retry_error in retry_errors:
    if retry_error in error_string:
      print("Retrying...")
      return True

  return False

#### Gemini LLM

In [None]:
@retry(wait=wait_exponential(multiplier=1, min=1, max=60), stop=stop_after_attempt(10), retry=retry_if_exception(RetryCondition), before_sleep=before_sleep_log(logging.getLogger(), logging.INFO))
def GeminiLLM(prompt, model = "gemini-2.5-flash", response_schema = None,
                 temperature = 1, topP = 1, topK = 32):

  # https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#supported_models

  llm_response = None
  if temperature < 0:
    temperature = 0

  creds, project = google.auth.default()
  auth_req = google.auth.transport.requests.Request() # required to acess access token
  creds.refresh(auth_req)
  access_token=creds.token

  headers = {
      "Content-Type" : "application/json",
      "Authorization" : "Bearer " + access_token
  }

  # https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference
  url = f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/publishers/google/models/{model}:generateContent"

  generation_config = {
    "temperature": temperature,
    "topP": topP,
    "maxOutputTokens": 8192,
    "candidateCount": 1,
    "responseMimeType": "application/json",
  }

  # Add inthe response schema for when it is provided
  if response_schema is not None:
    generation_config["responseSchema"] = response_schema

  if model == "gemini-2.0-flash":
    generation_config["topK"] = topK

  payload = {
    "contents": {
      "role": "user",
      "parts": {
          "text": prompt
      },
    },
    "generation_config": {
      **generation_config
    },
    "safety_settings": {
      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
      "threshold": "BLOCK_LOW_AND_ABOVE"
    }
  }

  response = requests.post(url, json=payload, headers=headers)

  if response.status_code == 200:
    try:
      json_response = json.loads(response.content)
    except Exception as error:
      raise RuntimeError(f"An error occurred parsing the JSON: {error}")

    if "candidates" in json_response:
      candidates = json_response["candidates"]
      if len(candidates) > 0:
        candidate = candidates[0]
        if "content" in candidate:
          content = candidate["content"]
          if "parts" in content:
            parts = content["parts"]
            if len(parts):
              part = parts[0]
              if "text" in part:
                text = part["text"]
                llm_response = text
              else:
                raise RuntimeError("No text in part: {response.content}")
            else:
              raise RuntimeError("No parts in content: {response.content}")
          else:
            raise RuntimeError("No parts in content: {response.content}")
        else:
          raise RuntimeError("No content in candidate: {response.content}")
      else:
        raise RuntimeError("No candidates: {response.content}")
    else:
      raise RuntimeError("No candidates: {response.content}")

    # Remove some typically response characters (if asking for a JSON reply)
    llm_response = llm_response.replace("```json","")
    llm_response = llm_response.replace("```","")
    llm_response = llm_response.replace("\n","")

    return llm_response

  else:
    raise RuntimeError(f"Error with prompt:'{prompt}'  Status:'{response.status_code}' Text:'{response.text}'")

In [None]:
@retry(wait=wait_exponential(multiplier=1, min=1, max=60), stop=stop_after_attempt(10), retry=retry_if_exception(RetryCondition), before_sleep=before_sleep_log(logging.getLogger(), logging.INFO))
def GeminiLLM_VerifyImage(prompt, imageBase64, model = "gemini-2.0-flash", response_schema = None,
                 temperature = 1, topP = 1, topK = 32):

  # https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#supported_models

  llm_response = None
  if temperature < 0:
    temperature = 0

  creds, project = google.auth.default()
  auth_req = google.auth.transport.requests.Request() # required to acess access token
  creds.refresh(auth_req)
  access_token=creds.token

  headers = {
      "Content-Type" : "application/json",
      "Authorization" : "Bearer " + access_token
  }

  # https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference
  url = f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/publishers/google/models/{model}:generateContent"

  generation_config = {
    "temperature": temperature,
    "topP": topP,
    "maxOutputTokens": 8192,
    "candidateCount": 1,
    "responseMimeType": "application/json",
  }

  # Add inthe response schema for when it is provided
  if response_schema is not None:
    generation_config["responseSchema"] = response_schema

  if model == "gemini-2.0-flash":
    generation_config["topK"] = topK

  payload = {
    "contents": {
      "role": "user",
      "parts": [
          { "text": prompt },
          { "inlineData": {  "mimeType": "image/png", "data": f"{imageBase64}" } }
        ]
    },
    "generation_config": {
      **generation_config
    },
    "safety_settings": {
      "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
      "threshold": "BLOCK_LOW_AND_ABOVE"
    }
  }

  response = requests.post(url, json=payload, headers=headers)

  if response.status_code == 200:
    try:
      json_response = json.loads(response.content)
    except Exception as error:
      raise RuntimeError(f"An error occurred parsing the JSON: {error}")

    if "candidates" in json_response:
      candidates = json_response["candidates"]
      if len(candidates) > 0:
        candidate = candidates[0]
        if "content" in candidate:
          content = candidate["content"]
          if "parts" in content:
            parts = content["parts"]
            if len(parts):
              part = parts[0]
              if "text" in part:
                text = part["text"]
                llm_response = text
              else:
                raise RuntimeError("No text in part: {response.content}")
            else:
              raise RuntimeError("No parts in content: {response.content}")
          else:
            raise RuntimeError("No parts in content: {response.content}")
        else:
          raise RuntimeError("No content in candidate: {response.content}")
      else:
        raise RuntimeError("No candidates: {response.content}")
    else:
      raise RuntimeError("No candidates: {response.content}")

    # Remove some typically response characters (if asking for a JSON reply)
    llm_response = llm_response.replace("```json","")
    llm_response = llm_response.replace("```","")
    llm_response = llm_response.replace("\n","")

    return llm_response

  else:
    raise RuntimeError(f"Error with prompt:'{prompt}'  Status:'{response.status_code}' Text:'{response.text}'")

#### Imagen

In [None]:
def ImageGen(prompt):
  creds, project = google.auth.default()
  auth_req = google.auth.transport.requests.Request()
  creds.refresh(auth_req)
  access_token=creds.token

  headers = {
      "Content-Type" : "application/json",
      "Authorization" : "Bearer " + access_token
  }

  model_version = "imagen-4.0-generate-preview-06-06" # Preview Access Model

  # https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/image-generation
  # url = f"https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/google/models/imagegeneration:predict"
  url = f"https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/publishers/google/models/{model_version}:predict"

  payload = {
    "instances": [
      {
        "prompt": prompt
      }
    ],
    "parameters": {
      "sampleCount": 1,
      "personGeneration" : "dont_allow"  # change to allow_adult for people generation
    }
  }

  response = requests.post(url, json=payload, headers=headers)

  if response.status_code == 200:
    response_json = json.loads(response.content)
    # print(f"Imagen3 response_json: {response_json}")

    if "blocked" in response_json:
      print(f"Blocked: {response_json['blocked']}")

    if "predictions" in response_json:
      image_data = response_json["predictions"][0]["bytesBase64Encoded"]
      image_data = base64.b64decode(image_data)
      filename= str(uuid.uuid4()) + ".png"
      with open(filename, "wb") as f:
        f.write(image_data)
      print(f"Image generated OK.")
      return filename
    else:
      raise RuntimeError(f"No predictions in response: {response.content}")
  else:
    error = f"Error with prompt:'{prompt}'  Status:'{response.status_code}' Text:'{response.text}'"
    raise RuntimeError(error)

#### Helper Functions

In [None]:
def RunQuery(sql):
  import time
  from google.cloud import bigquery
  client = bigquery.Client()

  if (sql.startswith("SELECT") or sql.startswith("WITH")):
      df_result = client.query(sql).to_dataframe()
      return df_result
  else:
    job_config = bigquery.QueryJobConfig(priority=bigquery.QueryPriority.INTERACTIVE)
    query_job = client.query(sql, job_config=job_config)

    # Check on the progress by getting the job's updated state.
    query_job = client.get_job(
        query_job.job_id, location=query_job.location
    )
    print("Job {} is currently in state {} with error result of {}".format(query_job.job_id, query_job.state, query_job.error_result))

    while query_job.state != "DONE":
      time.sleep(2)
      query_job = client.get_job(
          query_job.job_id, location=query_job.location
          )
      print("Job {} is currently in state {} with error result of {}".format(query_job.job_id, query_job.state, query_job.error_result))

    if query_job.error_result == None:
      return True
    else:
      raise Exception(query_job.error_result)

#### GCS

In [None]:
# This was generated by GenAI

def copy_file_to_gcs(local_file_path, bucket_name, destination_blob_name):
  """Copies a file from a local drive to a GCS bucket.

  Args:
      local_file_path: The full path to the local file.
      bucket_name: The name of the GCS bucket to upload to.
      destination_blob_name: The desired name of the uploaded file in the bucket.

  Returns:
      None
  """

  import os
  from google.cloud import storage

  # Ensure the file exists locally
  if not os.path.exists(local_file_path):
      raise FileNotFoundError(f"Local file '{local_file_path}' not found.")

  # Create a storage client
  storage_client = storage.Client()

  # Get a reference to the bucket
  bucket = storage_client.bucket(bucket_name)

  # Create a blob object with the desired destination path
  blob = bucket.blob(destination_blob_name)

  # Upload the file from the local filesystem
  content_type = ""
  if local_file_path.endswith(".html"):
    content_type = "text/html; charset=utf-8"

  if local_file_path.endswith(".json"):
    content_type = "application/json; charset=utf-8"

  if content_type == "":
    blob.upload_from_filename(local_file_path)
  else:
    blob.upload_from_filename(local_file_path, content_type = content_type)

  print(f"File '{local_file_path}' uploaded to GCS bucket '{bucket_name}' as '{destination_blob_name}.  Content-Type: {content_type}'.")

### <font color='#4285f4'>MAIN CODE - Create IoT Tables</font>

In [None]:
%%bigquery
CREATE SCHEMA IF NOT EXISTS `agentic_beans_raw` OPTIONS(location = 'us-central1');

#### Coffee Machine Telemetry Table

In [None]:
%%bigquery

--drop table `agentic_beans_raw.telemetry_coffee_machine`;

CREATE TABLE IF NOT EXISTS `agentic_beans_raw.telemetry_coffee_machine`
(
    telemetry_coffee_machine_id     STRING         NOT NULL OPTIONS(description="A unique identifier for each telemetry reading."),
    telemetry_load_id               STRING         NOT NULL OPTIONS(description="A unique identifier for the batch load."),
    machine_id                      INT64          NOT NULL OPTIONS(description="The foreign key referencing the 'machine_id' from the 'machine_dim' table."),
    truck_id                        INT64          NOT NULL OPTIONS(description="The foreign key referencing the 'truck_id' from the 'truck' table, indicating which truck this machine belongs to."),
    telemetry_timestamp             TIMESTAMP      NOT NULL OPTIONS(description="The timestamp when the telemetry reading was recorded by the machine."),
    boiler_temperature_celsius      NUMERIC(5, 2)           OPTIONS(description="The current temperature of the machine's boiler in Celsius."),
    brew_pressure_bar               NUMERIC(4, 2)           OPTIONS(description="The current pressure during brewing in bars."),
    water_flow_rate_ml_per_sec      NUMERIC(5, 2)           OPTIONS(description="The water flow rate during brewing in milliliters per second."),
    grinder_motor_rpm               INT64                   OPTIONS(description="The revolutions per minute (RPM) of the coffee grinder's motor."),
    grinder_motor_torque_nm         NUMERIC(5, 2)           OPTIONS(description="The torque applied by the grinder motor in Newton-meters."),
    water_reservoir_level_percent   NUMERIC(5, 2)           OPTIONS(description="The percentage of water remaining in the machine's reservoir."),
    bean_hopper_level_grams         NUMERIC(8, 2)           OPTIONS(description="The quantity of coffee beans remaining in the hopper in grams."),
    total_brew_cycles_counter       INT64                   OPTIONS(description="Cumulative count of brew cycles completed by the machine."),
    last_error_code                 STRING                  OPTIONS(description="The most recent error code reported by the machine."),
    last_error_description          STRING                  OPTIONS(description="A description for the most recent error code."),
    power_consumption_watts         NUMERIC(8, 2)           OPTIONS(description="Current power consumption of the machine in Watts."),
    cleaning_cycle_status           STRING                  OPTIONS(description="Current status of the cleaning cycle (e.g., 'completed', 'in_progress', 'due').")
)
CLUSTER BY machine_id, telemetry_timestamp
OPTIONS(
    description="Raw telemetry data from coffee machines, used for real-time health monitoring, anomaly detection, and predictive maintenance insights."
);

#### Inventory Telemetry Table

In [None]:
%%bigquery

--drop table `agentic_beans_raw.telemetry_inventory`;

CREATE TABLE IF NOT EXISTS `agentic_beans_raw.telemetry_inventory`
(
    telemetry_inventory_id           STRING         NOT NULL OPTIONS(description="A unique identifier for each inventory update event."),
    telemetry_load_id               STRING         NOT NULL OPTIONS(description="A unique identifier for the batch load."),
    truck_id                        INT64          NOT NULL OPTIONS(description="The foreign key referencing the 'truck_id' from the 'truck' table."),
    telemetry_timestamp             TIMESTAMP      NOT NULL OPTIONS(description="The timestamp when the inventory level was recorded or updated."),
    ingredient_id                   INT64         NOT NULL OPTIONS(description="The foreign key referencing the 'ingredient_id' from the 'ingredient_dim' table."),
    current_quantity_value          NUMERIC(10, 3) NOT NULL OPTIONS(description="The current measured quantity of the ingredient."),
    unit_of_measure                 STRING         NOT NULL OPTIONS(description="The unit of measure for the quantity (e.g., 'grams', 'liters', 'count', 'sheets')."),
    event_type                      STRING         NOT NULL OPTIONS(description="The type of inventory event (e.g., 'sensor_reading', 'replenished', 'consumed_by_sale', 'waste', 'manual_adjustment')."),
    associated_transaction_id       STRING                  OPTIONS(description="Optional: ID of the POS transaction that caused a consumption event."),
    source_sensor_id                STRING                  OPTIONS(description="Identifier for the specific sensor (e.g., weight sensor ID, RFID reader ID).")
)
CLUSTER BY truck_id, telemetry_timestamp
OPTIONS(
    description="Raw inventory level updates for all consumables on coffee trucks, used for real-time stock management and replenishment planning."
);

#### Camera Telemetry Table

In [None]:
%%bigquery

--drop table `agentic_beans_raw.telemetry_camera_vision`;

CREATE TABLE IF NOT EXISTS `agentic_beans_raw.telemetry_camera_vision`
(
    telemetry_camera_vision_id      STRING         NOT NULL OPTIONS(description="A unique identifier for each queue analysis event."),
    telemetry_load_id               STRING         NOT NULL OPTIONS(description="A unique identifier for the batch load."),
    truck_id                        INT64          NOT NULL OPTIONS(description="The foreign key referencing the 'truck_id' from the 'truck' table."),
    telemetry_timestamp             TIMESTAMP      NOT NULL OPTIONS(description="The timestamp when the camera analysis was performed."),
    camera_id                       INT64         NOT NULL OPTIONS(description="The foreign key referencing the 'camera_id' from the 'camera_dim' table."),
    people_in_queue_count           INT64          NOT NULL OPTIONS(description="The number of people detected in the coffee truck's service queue."),
    foot_traffic_count_nearby       INT64          NOT NULL OPTIONS(description="The number of people detected walking by the truck in the immediate vicinity."),
    ai_detection_confidence_score   NUMERIC(3, 2)           OPTIONS(description="The AI model's confidence score for the detection accuracy (0.0 to 1.0)."),
    image_reference_url             STRING                  OPTIONS(description="Optional URL to the raw image or a key to the image in storage for audit/debugging."),
    detection_model_version         STRING                  OPTIONS(description="The version of the AI model used for detection.")
)
CLUSTER BY truck_id, telemetry_timestamp
OPTIONS(
    description="Raw data from AI-powered camera analysis, tracking customer queue lengths and general foot traffic around coffee trucks."
);

#### Machine Table

In [None]:
%%bigquery

--drop table `agentic_beans_raw.machine_dim`;

CREATE TABLE IF NOT EXISTS `agentic_beans_raw.machine`
(
    machine_id              INT64         NOT NULL OPTIONS(description="The unique identifier for the coffee machine."),
    machine_model           STRING                  OPTIONS(description="The model name of the coffee machine (e.g., 'EspressoBot 3000', 'BrewMaster 500')."),
    manufacturer            STRING                  OPTIONS(description="The manufacturer of the coffee machine."),
    serial_number           STRING                  OPTIONS(description="The manufacturer's serial number for the machine."),
    installation_date       DATE                    OPTIONS(description="The date the machine was installed in a truck."),
    last_maintenance_date   DATE                    OPTIONS(description="The date of the last recorded maintenance activity on the machine."),
    status                  STRING                  OPTIONS(description="Current operational status of the machine (e.g., 'active', 'in_maintenance', 'retired').")
)
CLUSTER BY machine_id
OPTIONS(
    description="Dimension table containing static information about each coffee machine in the fleet."
);

In [None]:
%%bigquery

INSERT INTO `agentic_beans_raw.machine`
(machine_id, machine_model, manufacturer, serial_number, installation_date, last_maintenance_date, status)
VALUES
(1, 'La Marzocco KB90 4-Group', 'La Marzocco', 'LMKB90-001', '2020-01-01', '2025-07-01', 'active'),
(2, 'Slayer Espresso Steam LP 3-Group', 'Slayer Espresso', 'SESL3-002', '2020-01-01', '2025-06-25', 'active'),
(3, 'Franke A1000 FM CM', 'Franke Coffee Systems', 'FA1KC-003', '2020-01-01', '2025-07-10', 'active'),
(4, 'Nuova Simonelli Mythos II', 'Nuova Simonelli', 'NSM2-004', '2020-01-01', '2025-05-30', 'active'),
(5, 'Thermoplan Black&White3 CTS', 'Thermoplan AG', 'TBW3C-005', '2020-01-01', '2025-07-15', 'active'),
(6, 'La Marzocco KB90 4-Group', 'La Marzocco', 'LMKB90-006', '2020-01-01', '2025-06-10', 'active'),
(7, 'Slayer Espresso Steam LP 3-Group', 'Slayer Espresso', 'SESL3-007', '2020-01-01', '2025-07-20', 'active'),
(8, 'Franke A1000 FM CM', 'Franke Coffee Systems', 'FA1KC-008', '2020-01-01', '2025-06-05', 'active'),
(9, 'Nuova Simonelli Mythos II', 'Nuova Simonelli', 'NSM2-009', '2020-01-01', '2025-07-02', 'active'),
(10, 'Thermoplan Black&White3 CTS', 'Thermoplan AG', 'TBW3C-010', '2020-01-01', '2025-06-28', 'active'),
(11, 'La Marzocco KB90 4-Group', 'La Marzocco', 'LMKB90-011', '2020-01-01', '2025-07-08', 'active'),
(12, 'Slayer Espresso Steam LP 3-Group', 'Slayer Espresso', 'SESL3-012', '2020-01-01', '2025-05-20', 'active'),
(13, 'Franke A1000 FM CM', 'Franke Coffee Systems', 'FA1KC-013', '2020-01-01', '2025-07-05', 'active'),
(14, 'Nuova Simonelli Mythos II', 'Nuova Simonelli', 'NSM2-014', '2020-01-01', '2025-06-18', 'active'),
(15, 'Thermoplan Black&White3 CTS', 'Thermoplan AG', 'TBW3C-015', '2020-01-01', '2025-07-22', 'active'),
(16, 'La Marzocco KB90 4-Group', 'La Marzocco', 'LMKB90-016', '2020-01-01', '2025-06-14', 'active'),
(17, 'Slayer Espresso Steam LP 3-Group', 'Slayer Espresso', 'SESL3-017', '2020-01-01', '2025-07-09', 'active'),
(18, 'Franke A1000 FM CM', 'Franke Coffee Systems', 'FA1KC-018', '2020-01-01', '2025-05-25', 'active'),
(19, 'Nuova Simonelli Mythos II', 'Nuova Simonelli', 'NSM2-019', '2020-01-01', '2025-07-11', 'active'),
(20, 'Thermoplan Black&White3 CTS', 'Thermoplan AG', 'TBW3C-020', '2020-01-01', '2025-06-01', 'active');

#### Ingredient Table

In [None]:
%%bigquery

--drop table `agentic_beans_raw.ingredient_dim`;

CREATE TABLE IF NOT EXISTS `agentic_beans_raw.ingredient`
(
    ingredient_id           INT64         NOT NULL OPTIONS(description="A unique identifier for each distinct ingredient."),
    ingredient_name         STRING         NOT NULL OPTIONS(description="The common name of the ingredient (e.g., 'Espresso Beans', 'Dairy Milk Whole', 'Chocolate Syrup')."),
    ingredient_category     STRING                  OPTIONS(description="The category of the ingredient (e.g., 'Coffee Beans', 'Milk', 'Syrup', 'Packaging')."),
    standard_unit_of_measure STRING                  OPTIONS(description="The primary unit of measure for this ingredient (e.g., 'grams', 'liters', 'count')."),
    is_perishable           BOOL                    OPTIONS(description="Indicates if the ingredient is perishable (TRUE/FALSE).")
)
CLUSTER BY ingredient_id
OPTIONS(
    description="Dimension table listing all trackable ingredients and consumables used on coffee trucks."
);


In [None]:
%%bigquery

INSERT INTO `agentic_beans_raw.ingredient`
(ingredient_id, ingredient_name, ingredient_category, standard_unit_of_measure, is_perishable)
VALUES
-- Coffee Beans (8 types)
(1, 'Signature Espresso Blend', 'Coffee Beans', 'grams', TRUE),
(2, 'Single Origin Colombian Beans', 'Coffee Beans', 'grams', TRUE),
(3, 'Decaf House Blend Beans', 'Coffee Beans', 'grams', TRUE),
(4, 'Dark Roast French Press Beans', 'Coffee Beans', 'grams', TRUE),
(5, 'Light Roast Pour Over Beans', 'Coffee Beans', 'grams', TRUE),
(6, 'Nitro Cold Brew Beans', 'Coffee Beans', 'grams', TRUE),
(7, 'Seasonal Limited Edition Beans', 'Coffee Beans', 'grams', TRUE),
(8, 'Breakfast Blend Beans', 'Coffee Beans', 'grams', TRUE),

-- Coffee Concentrates/Base (2 types)
(9, 'Cold Brew Concentrate (Regular)', 'Coffee Liquid', 'liters', TRUE),
(10, 'Nitro Cold Brew Concentrate', 'Coffee Liquid', 'liters', TRUE),

-- Milk & Dairy Alternatives (6 types)
(11, 'Whole Dairy Milk', 'Milk', 'liters', TRUE),
(12, 'Skim Dairy Milk', 'Milk', 'liters', TRUE),
(13, 'Organic Oat Milk', 'Milk', 'liters', TRUE),
(14, 'Barista Blend Almond Milk', 'Milk', 'liters', TRUE),
(15, 'Coconut Milk', 'Milk', 'liters', TRUE),
(16, 'Soy Milk Unsweetened', 'Milk', 'liters', TRUE),

-- Syrups & Sauces (12 types)
(17, 'Classic Vanilla Syrup', 'Syrup', 'milliliters', TRUE),
(18, 'Sugar-Free Vanilla Syrup', 'Syrup', 'milliliters', TRUE),
(19, 'Caramel Sauce', 'Sauce', 'milliliters', TRUE),
(20, 'Chocolate Mocha Sauce', 'Sauce', 'milliliters', TRUE),
(21, 'White Chocolate Sauce', 'Sauce', 'milliliters', TRUE),
(22, 'Hazelnut Syrup', 'Syrup', 'milliliters', TRUE),
(23, 'Toasted Marshmallow Syrup', 'Syrup', 'milliliters', TRUE),
(24, 'Spiced Chai Concentrate', 'Syrup', 'liters', TRUE),
(25, 'Pumpkin Spice Sauce', 'Sauce', 'milliliters', TRUE), -- Seasonal
(26, 'Peppermint Bark Syrup', 'Syrup', 'milliliters', TRUE), -- Seasonal
(27, 'Agave Nectar', 'Sweetener Liquid', 'milliliters', TRUE),
(28, 'Brown Sugar Cinnamon Syrup', 'Syrup', 'milliliters', TRUE),

-- Other Beverages / Bases (2 types)
(29, 'Hot Cocoa Mix', 'Beverage Powder', 'grams', FALSE),
(30, 'Matcha Green Tea Powder', 'Tea', 'grams', FALSE),

-- Consumables / Toppings (10 types)
(31, 'Standard 12oz Hot Cup', 'Packaging', 'count', FALSE),
(32, 'Standard 16oz Hot Cup', 'Packaging', 'count', FALSE),
(33, 'Standard 20oz Cold Cup', 'Packaging', 'count', FALSE),
(34, 'Hot Cup Lids (Mixed Size)', 'Packaging', 'count', FALSE),
(35, 'Cold Cup Lids (Domed)', 'Packaging', 'count', FALSE),
(36, 'Coffee Sleeves', 'Packaging', 'count', FALSE),
(37, 'Stir Sticks (Wood)', 'Packaging', 'count', FALSE),
(38, 'Sugar Packets', 'Sweetener', 'count', FALSE),
(39, 'Splenda Sweetener Packets', 'Sweetener', 'count', FALSE),
(40, 'Napkins (Recycled)', 'Packaging', 'count', FALSE);

#### Camera Table

In [None]:
%%bigquery

--drop table `agentic_beans_raw.camera_dim`;

CREATE TABLE IF NOT EXISTS `agentic_beans_raw.camera`
(
    camera_id               INT64         NOT NULL OPTIONS(description="The unique identifier for the camera."),
    camera_model            STRING                  OPTIONS(description="The model name of the camera."),
    camera_location_type    STRING                  OPTIONS(description="The mounting location/type of the camera (e.g., 'front_facing_queue', 'side_foot_traffic')."),
    resolution_pixels       STRING                  OPTIONS(description="The resolution of the camera in pixels (e.g., '1920x1080')."),
    field_of_view_degrees   NUMERIC(5, 2)           OPTIONS(description="The camera's field of view in degrees."),
    installation_date       DATE                    OPTIONS(description="The date the camera was installed."),
    status                  STRING                  OPTIONS(description="Current operational status of the camera (e.g., 'online', 'offline', 'calibrating').")
)
CLUSTER BY camera_id
OPTIONS(
    description="Dimension table containing static information about cameras used for customer queue and foot traffic analysis."
);

In [None]:
%%bigquery

INSERT INTO `agentic_beans_raw.camera`
(camera_id, camera_model, camera_location_type, resolution_pixels, field_of_view_degrees, installation_date, status)
VALUES
-- Truck 1
(1, 'EyeOnDemand-XT', 'exterior_queue', '1920x1080', 120.0, '2023-01-05', 'online'),
(2, 'CabinView 300', 'interior_cabin', '1280x720', 90.0, '2023-01-05', 'online'),
-- Truck 2
(3, 'EyeOnDemand-XT', 'exterior_queue', '1920x1080', 120.0, '2023-01-10', 'online'),
(4, 'CabinView 300', 'interior_cabin', '1280x720', 90.0, '2023-01-10', 'online'),
-- Truck 3
(5, 'EyeOnDemand-XT', 'exterior_queue', '1920x1080', 120.0, '2023-01-15', 'online'),
(6, 'CabinView 300', 'interior_cabin', '1280x720', 90.0, '2023-01-15', 'online'),
-- Truck 4
(7, 'EyeOnDemand-XT', 'exterior_queue', '1920x1080', 120.0, '2023-01-20', 'online'),
(8, 'CabinView 300', 'interior_cabin', '1280x720', 90.0, '2023-01-20', 'online'),
-- Truck 5
(9, 'EyeOnDemand-XT', 'exterior_queue', '1920x1080', 120.0, '2023-02-01', 'online'),
(10, 'CabinView 300', 'interior_cabin', '1280x720', 90.0, '2023-02-01', 'online'),
-- Truck 6
(11, 'EyeOnDemand-XT', 'exterior_queue', '1920x1080', 120.0, '2023-02-05', 'online'),
(12, 'CabinView 300', 'interior_cabin', '1280x720', 90.0, '2023-02-05', 'online'),
-- Truck 7
(13, 'EyeOnDemand-XT', 'exterior_queue', '1920x1080', 120.0, '2023-02-10', 'online'),
(14, 'CabinView 300', 'interior_cabin', '1280x720', 90.0, '2023-02-10', 'online'),
-- Truck 8
(15, 'EyeOnDemand-XT', 'exterior_queue', '1920x1080', 120.0, '2023-02-15', 'online'),
(16, 'CabinView 300', 'interior_cabin', '1280x720', 90.0, '2023-02-15', 'online'),
-- Truck 9
(17, 'EyeOnDemand-XT', 'exterior_queue', '1920x1080', 120.0, '2023-03-01', 'online'),
(18, 'CabinView 300', 'interior_cabin', '1280x720', 90.0, '2023-03-01', 'online'),
-- Truck 10
(19, 'EyeOnDemand-XT', 'exterior_queue', '1920x1080', 120.0, '2023-03-05', 'online'),
(20, 'CabinView 300', 'interior_cabin', '1280x720', 90.0, '2023-03-05', 'online'),
-- Truck 11
(21, 'EyeOnDemand-XT', 'exterior_queue', '1920x1080', 120.0, '2023-03-10', 'online'),
(22, 'CabinView 300', 'interior_cabin', '1280x720', 90.0, '2023-03-10', 'online'),
-- Truck 12
(23, 'EyeOnDemand-XT', 'exterior_queue', '1920x1080', 120.0, '2023-03-15', 'online'),
(24, 'CabinView 300', 'interior_cabin', '1280x720', 90.0, '2023-03-15', 'online'),
-- Truck 13
(25, 'EyeOnDemand-XT', 'exterior_queue', '1920x1080', 120.0, '2023-04-01', 'online'),
(26, 'CabinView 300', 'interior_cabin', '1280x720', 90.0, '2023-04-01', 'online'),
-- Truck 14
(27, 'EyeOnDemand-XT', 'exterior_queue', '1920x1080', 120.0, '2023-04-05', 'online'),
(28, 'CabinView 300', 'interior_cabin', '1280x720', 90.0, '2023-04-05', 'online'),
-- Truck 15
(29, 'EyeOnDemand-XT', 'exterior_queue', '1920x1080', 120.0, '2023-04-10', 'online'),
(30, 'CabinView 300', 'interior_cabin', '1280x720', 90.0, '2023-04-10', 'online'),
-- Truck 16
(31, 'EyeOnDemand-XT', 'exterior_queue', '1920x1080', 120.0, '2023-04-15', 'online'),
(32, 'CabinView 300', 'interior_cabin', '1280x720', 90.0, '2023-04-15', 'online'),
-- Truck 17
(33, 'EyeOnDemand-XT', 'exterior_queue', '1920x1080', 120.0, '2023-05-01', 'online'),
(34, 'CabinView 300', 'interior_cabin', '1280x720', 90.0, '2023-05-01', 'online'),
-- Truck 18
(35, 'EyeOnDemand-XT', 'exterior_queue', '1920x1080', 120.0, '2023-05-05', 'online'),
(36, 'CabinView 300', 'interior_cabin', '1280x720', 90.0, '2023-05-05', 'online'),
-- Truck 19
(37, 'EyeOnDemand-XT', 'exterior_queue', '1920x1080', 120.0, '2023-05-10', 'online'),
(38, 'CabinView 300', 'interior_cabin', '1280x720', 90.0, '2023-05-10', 'online'),
-- Truck 20
(39, 'EyeOnDemand-XT', 'exterior_queue', '1920x1080', 120.0, '2023-05-15', 'online'),
(40, 'CabinView 300', 'interior_cabin', '1280x720', 90.0, '2023-05-15', 'online');

#### Generate Coffee Machine Telemetry

In [None]:
import csv
import uuid
from datetime import datetime, timedelta
import random

# --- Configuration ---
OUTPUT_FILENAME_MACHINE = 'telemetry_coffee_machine_raw.csv'
START_DATE = datetime(2020, 1, 1, 0, 0, 0)
END_DATE = datetime(2027, 1, 1, 0, 0, 0)
INTERVAL_SECONDS_READING = 15 # Data reading interval is now every 15 seconds
INTERVAL_MINUTES_BATCH = 5   # Batch upload ID is still generated every 5 minutes
NUM_TRUCKS = 20              # Corresponds to machine_id 1-20 and truck_id 1-20

# --- Normal Operating Ranges (for clean data) ---
NORMAL_RANGES = {
    'boiler_temperature_celsius': {'min': 92.0, 'max': 96.0},
    'brew_pressure_bar': {'min': 8.8, 'max': 9.2},
    'water_flow_rate_ml_per_sec': {'min': 1.2, 'max': 1.8},
    'grinder_motor_rpm': {'min': 1450, 'max': 1750},
    'grinder_motor_torque_nm': {'min': 0.9, 'max': 1.4},
    'power_consumption_watts': {'min': 2800, 'max': 4200},
}

# --- Initial State & Simulation Parameters for each machine ---
machine_states = {}
for i in range(1, NUM_TRUCKS + 1):
    machine_states[i] = {
        'water_level_percent': 100.0,
        'bean_level_grams': 2000.0,
        'total_brew_cycles': 0
    }

# Parameters for simulating consumption and refills - adjusted for 15-second interval
# These rates are per 15 seconds, so they are much smaller than previous per-minute rates
WATER_CONSUMPTION_PER_15S = {'min': 0.0025, 'max': 0.0125} # % of reservoir per 15 seconds (e.g., 0.01% - 0.05% of 100%)
BEAN_CONSUMPTION_PER_15S = {'min': 0.25, 'max': 1.25}      # grams per 15 seconds (e.g., 1-5g per minute)
BREW_CYCLES_PER_15S = {'min': 0, 'max': 0.25}             # new cycles per 15 seconds (averaging 0-1 brew per minute)
WATER_REFILL_THRESHOLD = 10.0 # % below which a refill happens
BEAN_REFILL_THRESHOLD = 200.0 # grams below which a refill happens
WATER_REFILL_LEVEL = 100.0
BEAN_REFILL_LEVEL = 2000.0

# --- CSV Header ---
CSV_HEADER_MACHINE = [
    "telemetry_coffee_machine_id",
    "telemetry_load_id",
    "machine_id",
    "truck_id",
    "telemetry_timestamp",
    "boiler_temperature_celsius",
    "brew_pressure_bar",
    "water_flow_rate_ml_per_sec",
    "grinder_motor_rpm",
    "grinder_motor_torque_nm",
    "water_reservoir_level_percent",
    "bean_hopper_level_grams",
    "total_brew_cycles_counter",
    "last_error_code",
    "last_error_description",
    "power_consumption_watts",
    "cleaning_cycle_status"
]

# --- Main Data Generation Logic for Coffee Machine Telemetry ---
def generate_telemetry_coffee_machine_data():
    """
    Generates telemetry data for coffee machines (15-second granularity, 5-min batch ID).
    """
    print(f"Starting coffee machine telemetry generation for {NUM_TRUCKS} trucks from {START_DATE} to {END_DATE}...")
    print(f"Output file: {OUTPUT_FILENAME_MACHINE}")
    print(f"Generating data every {INTERVAL_SECONDS_READING} second(s), with batch IDs every {INTERVAL_MINUTES_BATCH} minutes.")

    with open(OUTPUT_FILENAME_MACHINE, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(CSV_HEADER_MACHINE) # Write the header row

        current_timestamp = START_DATE
        row_count = 0
        current_batch_load_id = None

        while current_timestamp < END_DATE:
            # Generate a new batch ID every INTERVAL_MINUTES_BATCH minutes
            # This ensures the batch ID changes at 0, 5, 10, 15... minutes of each hour
            if current_timestamp.minute % INTERVAL_MINUTES_BATCH == 0 and current_timestamp.second == 0:
                 current_batch_load_id = current_timestamp.strftime('%Y%m%d%H%M%S') + '_machine_batch'

            for truck_id in range(1, NUM_TRUCKS + 1):
                machine_id = truck_id

                state = machine_states[machine_id]

                # Simulate water consumption and refills
                state['water_level_percent'] -= random.uniform(WATER_CONSUMPTION_PER_15S['min'], WATER_CONSUMPTION_PER_15S['max'])
                if state['water_level_percent'] <= WATER_REFILL_THRESHOLD:
                    state['water_level_percent'] = WATER_REFILL_LEVEL - random.uniform(0.1, 0.5)

                # Simulate bean consumption and refills
                state['bean_level_grams'] -= random.uniform(BEAN_CONSUMPTION_PER_15S['min'], BEAN_CONSUMPTION_PER_15S['max'])
                if state['bean_level_grams'] <= BEAN_REFILL_THRESHOLD:
                    state['bean_level_grams'] = BEAN_REFILL_LEVEL - random.uniform(5, 10)

                # Simulate cumulative brew cycles (round to nearest integer for realistic count)
                state['total_brew_cycles'] += round(random.uniform(BREW_CYCLES_PER_15S['min'], BREW_CYCLES_PER_15S['max']))

                # Generate telemetry values within normal ranges
                boiler_temp = round(random.uniform(NORMAL_RANGES['boiler_temperature_celsius']['min'], NORMAL_RANGES['boiler_temperature_celsius']['max']), 2)
                brew_pressure = round(random.uniform(NORMAL_RANGES['brew_pressure_bar']['min'], NORMAL_RANGES['brew_pressure_bar']['max']), 2)
                water_flow = round(random.uniform(NORMAL_RANGES['water_flow_rate_ml_per_sec']['min'], NORMAL_RANGES['water_flow_rate_ml_per_sec']['max']), 2)
                grinder_rpm = random.randint(NORMAL_RANGES['grinder_motor_rpm']['min'], NORMAL_RANGES['grinder_motor_rpm']['max'])
                grinder_torque = round(random.uniform(NORMAL_RANGES['grinder_motor_torque_nm']['min'], NORMAL_RANGES['grinder_motor_torque_nm']['max']), 2)
                power_watts = random.randint(NORMAL_RANGES['power_consumption_watts']['min'], NORMAL_RANGES['power_consumption_watts']['max'])

                last_error_code = None
                last_error_description = None
                cleaning_cycle_status = 'not_due'

                row = [
                    str(uuid.uuid4()),
                    current_batch_load_id,
                    machine_id,
                    truck_id,
                    current_timestamp.isoformat(),
                    boiler_temp,
                    brew_pressure,
                    water_flow,
                    grinder_rpm,
                    grinder_torque,
                    round(state['water_level_percent'], 2),
                    round(state['bean_level_grams'], 2),
                    state['total_brew_cycles'],
                    last_error_code,
                    last_error_description,
                    power_watts,
                    cleaning_cycle_status
                ]
                writer.writerow(row)
                row_count += 1

            current_timestamp += timedelta(seconds=INTERVAL_SECONDS_READING)

            # Print progress update (e.g., every 100,000 rows)
            if row_count % 100000 == 0:
                print(f"Generated {row_count} rows up to {current_timestamp}...")

    print(f"\nCoffee machine telemetry generation complete! Total rows generated: {row_count}. File saved to: {OUTPUT_FILENAME_MACHINE}")

In [None]:
generate_telemetry_coffee_machine_data()

In [None]:
!head telemetry_coffee_machine_raw.csv

In [None]:
!gsutil cp telemetry_coffee_machine_raw.csv gs://gcs-bucket-namet/telemetry_coffee_machine_raw/telemetry_coffee_machine_raw.csv

In [None]:
%%bigquery

LOAD DATA INTO `agentic_beans_raw.telemetry_coffee_machine`
(
    telemetry_coffee_machine_id     STRING,
    telemetry_load_id               STRING,
    machine_id                      INT64,
    truck_id                        INT64,
    telemetry_timestamp             TIMESTAMP,
    boiler_temperature_celsius      NUMERIC(5, 2),
    brew_pressure_bar               NUMERIC(4, 2),
    water_flow_rate_ml_per_sec      NUMERIC(5, 2),
    grinder_motor_rpm               INT64,
    grinder_motor_torque_nm         NUMERIC(5, 2),
    water_reservoir_level_percent   NUMERIC(5, 2),
    bean_hopper_level_grams         NUMERIC(8, 2),
    total_brew_cycles_counter       INT64,
    last_error_code                 STRING,
    last_error_description          STRING,
    power_consumption_watts         NUMERIC(8, 2),
    cleaning_cycle_status           STRING

)
FROM FILES (
 format = 'csv',
 skip_leading_rows = 1,
 uris = ['gs://gcs-bucket-namet/telemetry_coffee_machine_raw/telemetry_coffee_machine_raw.csv']);

#### Generate Inventory Telemetry

In [None]:
import csv
import uuid
from datetime import datetime, timedelta
import random

# --- Configuration ---
OUTPUT_FILENAME_INVENTORY = 'telemetry_inventory_raw.csv'
START_DATE = datetime(2020, 1, 1, 0, 0, 0)
END_DATE = datetime(2027, 1, 1, 0, 0, 0)
INTERVAL_MINUTES_READING = 1 # Data reading interval is now every 1 minute
INTERVAL_MINUTES_BATCH = 5   # Batch upload ID is still generated every 5 minutes
NUM_TRUCKS = 20

# --- Ingredient Definitions and Simulation Parameters ---
INGREDIENT_PROPERTIES = {
    # Coffee Beans (grams) - consumption per minute
    1: {'name': 'Signature Espresso Blend', 'category': 'Coffee Beans', 'uom': 'grams', 'perishable': True, 'initial_qty': (2000, 5000), 'consumption': (2, 10), 'refill_threshold': (200, 500), 'refill_qty': (2000, 5000), 'sensor_type': 'weight_sensor_bean'},
    2: {'name': 'Single Origin Colombian Beans', 'category': 'Coffee Beans', 'uom': 'grams', 'perishable': True, 'initial_qty': (1000, 3000), 'consumption': (1, 6), 'refill_threshold': (100, 300), 'refill_qty': (1000, 3000), 'sensor_type': 'weight_sensor_bean'},
    3: {'name': 'Decaf House Blend Beans', 'category': 'Coffee Beans', 'uom': 'grams', 'perishable': True, 'initial_qty': (500, 1500), 'consumption': (0.5, 3), 'refill_threshold': (50, 150), 'refill_qty': (500, 1500), 'sensor_type': 'weight_sensor_bean'},
    4: {'name': 'Dark Roast French Press Beans', 'category': 'Coffee Beans', 'uom': 'grams', 'perishable': True, 'initial_qty': (1000, 2500), 'consumption': (1.5, 8), 'refill_threshold': (100, 250), 'refill_qty': (1000, 2500), 'sensor_type': 'weight_sensor_bean'},
    5: {'name': 'Light Roast Pour Over Beans', 'category': 'Coffee Beans', 'uom': 'grams', 'perishable': True, 'initial_qty': (700, 1800), 'consumption': (0.8, 4), 'refill_threshold': (70, 180), 'refill_qty': (700, 1800), 'sensor_type': 'weight_sensor_bean'},
    6: {'name': 'Nitro Cold Brew Beans', 'category': 'Coffee Beans', 'uom': 'grams', 'perishable': True, 'initial_qty': (1500, 4000), 'consumption': (1.4, 7), 'refill_threshold': (150, 400), 'refill_qty': (1500, 4000), 'sensor_type': 'weight_sensor_bean'},
    7: {'name': 'Seasonal Limited Edition Beans', 'category': 'Coffee Beans', 'uom': 'grams', 'perishable': True, 'initial_qty': (500, 1000), 'consumption': (0.4, 2), 'refill_threshold': (50, 100), 'refill_qty': (500, 1000), 'sensor_type': 'weight_sensor_bean'},
    8: {'name': 'Breakfast Blend Beans', 'category': 'Coffee Beans', 'uom': 'grams', 'perishable': True, 'initial_qty': (2000, 4500), 'consumption': (2, 9), 'refill_threshold': (200, 450), 'refill_qty': (2000, 4500), 'sensor_type': 'weight_sensor_bean'},

    # Coffee Liquids/Concentrates (liters) - consumption per minute
    9: {'name': 'Cold Brew Concentrate (Regular)', 'category': 'Coffee Liquid', 'uom': 'liters', 'perishable': True, 'initial_qty': (10, 20), 'consumption': (0.02, 0.1), 'refill_threshold': (1, 3), 'refill_qty': (10, 20), 'sensor_type': 'level_sensor_liquid'},
    10: {'name': 'Nitro Cold Brew Concentrate', 'category': 'Coffee Liquid', 'uom': 'liters', 'perishable': True, 'initial_qty': (5, 15), 'consumption': (0.01, 0.06), 'refill_threshold': (0.5, 2), 'refill_qty': (5, 15), 'sensor_type': 'level_sensor_liquid'},

    # Milk & Dairy Alternatives (liters) - consumption per minute
    11: {'name': 'Whole Dairy Milk', 'category': 'Milk', 'uom': 'liters', 'perishable': True, 'initial_qty': (8, 16), 'consumption': (0.02, 0.08), 'refill_threshold': (1, 3), 'refill_qty': (8, 16), 'sensor_type': 'level_sensor_liquid'},
    12: {'name': 'Skim Dairy Milk', 'category': 'Milk', 'uom': 'liters', 'perishable': True, 'initial_qty': (4, 8), 'consumption': (0.01, 0.04), 'refill_threshold': (0.5, 1.5), 'refill_qty': (4, 8), 'sensor_type': 'level_sensor_liquid'},
    13: {'name': 'Organic Oat Milk', 'category': 'Milk', 'uom': 'liters', 'perishable': True, 'initial_qty': (6, 12), 'consumption': (0.02, 0.06), 'refill_threshold': (0.8, 2), 'refill_qty': (6, 12), 'sensor_type': 'level_sensor_liquid'},
    14: {'name': 'Barista Blend Almond Milk', 'category': 'Milk', 'uom': 'liters', 'perishable': True, 'initial_qty': (4, 10), 'consumption': (0.01, 0.05), 'refill_threshold': (0.5, 1.5), 'refill_qty': (4, 10), 'sensor_type': 'level_sensor_liquid'},
    15: {'name': 'Coconut Milk', 'category': 'Milk', 'uom': 'liters', 'perishable': True, 'initial_qty': (2, 5), 'consumption': (0.004, 0.02), 'refill_threshold': (0.2, 0.5), 'refill_qty': (2, 5), 'sensor_type': 'level_sensor_liquid'},
    16: {'name': 'Soy Milk Unsweetened', 'category': 'Milk', 'uom': 'liters', 'perishable': True, 'initial_qty': (3, 7), 'consumption': (0.008, 0.03), 'refill_threshold': (0.3, 0.8), 'refill_qty': (3, 7), 'sensor_type': 'level_sensor_liquid'},

    # Syrups & Sauces (milliliters) - consumption per minute
    17: {'name': 'Classic Vanilla Syrup', 'category': 'Syrup', 'uom': 'milliliters', 'perishable': True, 'initial_qty': (750, 1500), 'consumption': (2, 6), 'refill_threshold': (75, 150), 'refill_qty': (750, 1500), 'sensor_type': 'weight_sensor_syrup'},
    18: {'name': 'Sugar-Free Vanilla Syrup', 'category': 'Syrup', 'uom': 'milliliters', 'perishable': True, 'initial_qty': (500, 1000), 'consumption': (1, 4), 'refill_threshold': (50, 100), 'refill_qty': (500, 1000), 'sensor_type': 'weight_sensor_syrup'},
    19: {'name': 'Caramel Sauce', 'category': 'Sauce', 'uom': 'milliliters', 'perishable': True, 'initial_qty': (500, 1200), 'consumption': (1.6, 5), 'refill_threshold': (50, 120), 'refill_qty': (500, 1200), 'sensor_type': 'weight_sensor_syrup'},
    20: {'name': 'Chocolate Mocha Sauce', 'category': 'Sauce', 'uom': 'milliliters', 'perishable': True, 'initial_qty': (750, 1800), 'consumption': (2.4, 8), 'refill_threshold': (75, 180), 'refill_qty': (750, 1800), 'sensor_type': 'weight_sensor_syrup'},
    21: {'name': 'White Chocolate Sauce', 'category': 'Sauce', 'uom': 'milliliters', 'perishable': True, 'initial_qty': (400, 800), 'consumption': (1, 3), 'refill_threshold': (40, 80), 'refill_qty': (400, 800), 'sensor_type': 'weight_sensor_syrup'},
    22: {'name': 'Hazelnut Syrup', 'category': 'Syrup', 'uom': 'milliliters', 'perishable': True, 'initial_qty': (300, 700), 'consumption': (0.6, 2), 'refill_threshold': (30, 70), 'refill_qty': (300, 700), 'sensor_type': 'weight_sensor_syrup'},
    23: {'name': 'Toasted Marshmallow Syrup', 'category': 'Syrup', 'uom': 'milliliters', 'perishable': True, 'initial_qty': (200, 500), 'consumption': (0.4, 1.6), 'refill_threshold': (20, 50), 'refill_qty': (200, 500), 'sensor_type': 'weight_sensor_syrup'},
    24: {'name': 'Spiced Chai Concentrate', 'category': 'Syrup', 'uom': 'liters', 'perishable': True, 'initial_qty': (3, 7), 'consumption': (0.01, 0.04), 'refill_threshold': (0.3, 0.7), 'refill_qty': (3, 7), 'sensor_type': 'level_sensor_liquid'},
    25: {'name': 'Pumpkin Spice Sauce', 'category': 'Sauce', 'uom': 'milliliters', 'perishable': True, 'initial_qty': (200, 600), 'consumption': (1, 4), 'refill_threshold': (20, 60), 'refill_qty': (200, 600), 'sensor_type': 'weight_sensor_syrup'},
    26: {'name': 'Peppermint Bark Syrup', 'category': 'Syrup', 'uom': 'milliliters', 'perishable': True, 'initial_qty': (200, 500), 'consumption': (0.8, 3), 'refill_threshold': (20, 50), 'refill_qty': (200, 500), 'sensor_type': 'weight_sensor_syrup'},
    27: {'name': 'Agave Nectar', 'category': 'Sweetener Liquid', 'uom': 'milliliters', 'perishable': True, 'initial_qty': (300, 800), 'consumption': (0.6, 2.4), 'refill_threshold': (30, 80), 'refill_qty': (300, 800), 'sensor_type': 'weight_sensor_syrup'},
    28: {'name': 'Brown Sugar Cinnamon Syrup', 'category': 'Syrup', 'uom': 'milliliters', 'perishable': True, 'initial_qty': (400, 900), 'consumption': (1.2, 5), 'refill_threshold': (40, 90), 'refill_qty': (400, 900), 'sensor_type': 'weight_sensor_syrup'},

    # Other Beverages / Bases (grams) - consumption per minute
    29: {'name': 'Hot Cocoa Mix', 'category': 'Beverage Powder', 'uom': 'grams', 'perishable': False, 'initial_qty': (500, 1500), 'consumption': (2, 10), 'refill_threshold': (50, 150), 'refill_qty': (500, 1500), 'sensor_type': 'weight_sensor_powder'},
    30: {'name': 'Matcha Green Tea Powder', 'category': 'Tea', 'uom': 'grams', 'perishable': False, 'initial_qty': (200, 600), 'consumption': (1, 4), 'refill_threshold': (20, 60), 'refill_qty': (200, 600), 'sensor_type': 'weight_sensor_powder'},

    # Consumables / Packaging (count) - consumption per minute
    31: {'name': 'Standard 12oz Hot Cup', 'category': 'Packaging', 'uom': 'count', 'perishable': False, 'initial_qty': (500, 1000), 'consumption': (1, 5), 'refill_threshold': (50, 100), 'refill_qty': (500, 1000), 'sensor_type': 'count_sensor_stack'},
    32: {'name': 'Standard 16oz Hot Cup', 'category': 'Packaging', 'uom': 'count', 'perishable': False, 'initial_qty': (400, 800), 'consumption': (0.8, 4), 'refill_threshold': (40, 80), 'refill_qty': (400, 800), 'sensor_type': 'count_sensor_stack'},
    33: {'name': 'Standard 20oz Cold Cup', 'category': 'Packaging', 'uom': 'count', 'perishable': False, 'initial_qty': (300, 600), 'consumption': (0.6, 3), 'refill_threshold': (30, 60), 'refill_qty': (300, 600), 'sensor_type': 'count_sensor_stack'},
    34: {'name': 'Hot Cup Lids (Mixed Size)', 'category': 'Packaging', 'uom': 'count', 'perishable': False, 'initial_qty': (700, 1400), 'consumption': (1.4, 7), 'refill_threshold': (70, 140), 'refill_qty': (700, 1400), 'sensor_type': 'count_sensor_stack'},
    35: {'name': 'Cold Cup Lids (Domed)', 'category': 'Packaging', 'uom': 'count', 'perishable': False, 'initial_qty': (250, 500), 'consumption': (0.5, 2), 'refill_threshold': (25, 50), 'refill_qty': (250, 500), 'sensor_type': 'count_sensor_stack'},
    36: {'name': 'Coffee Sleeves', 'category': 'Packaging', 'uom': 'count', 'perishable': False, 'initial_qty': (800, 1600), 'consumption': (1.6, 8), 'refill_threshold': (80, 160), 'refill_qty': (800, 1600), 'sensor_type': 'count_sensor_stack'},
    37: {'name': 'Stir Sticks (Wood)', 'category': 'Packaging', 'uom': 'count', 'perishable': False, 'initial_qty': (1000, 2000), 'consumption': (2, 10), 'refill_threshold': (100, 200), 'refill_qty': (1000, 2000), 'sensor_type': 'count_sensor_bin'},
    38: {'name': 'Sugar Packets', 'category': 'Sweetener', 'uom': 'count', 'perishable': False, 'initial_qty': (1000, 2500), 'consumption': (3, 15), 'refill_threshold': (100, 250), 'refill_qty': (1000, 2500), 'sensor_type': 'count_sensor_bin'},
    39: {'name': 'Splenda Sweetener Packets', 'category': 'Sweetener', 'uom': 'count', 'perishable': False, 'initial_qty': (500, 1000), 'consumption': (1, 5), 'refill_threshold': (50, 100), 'refill_qty': (500, 1000), 'sensor_type': 'count_sensor_bin'},
    40: {'name': 'Napkins (Recycled)', 'category': 'Packaging', 'uom': 'count', 'perishable': False, 'initial_qty': (1500, 3000), 'consumption': (4, 20), 'refill_threshold': (150, 300), 'refill_qty': (1500, 3000), 'sensor_type': 'count_sensor_bin'}
}

# --- Initial State for each truck's inventory ---
truck_inventory_states = {}
for truck_id in range(1, NUM_TRUCKS + 1):
    truck_inventory_states[truck_id] = {}
    for ing_id, props in INGREDIENT_PROPERTIES.items():
        # Initialize slightly above refill threshold to avoid immediate refill at start
        initial_qty = random.uniform(props['refill_threshold'][1], props['initial_qty'][1])
        truck_inventory_states[truck_id][ing_id] = initial_qty

# --- CSV Header ---
CSV_HEADER_INVENTORY = [
    "telemetry_inventory_id",
    "telemetry_load_id",
    "truck_id",
    "telemetry_timestamp",
    "ingredient_id",
    "current_quantity_value",
    "unit_of_measure",
    "event_type",
    "associated_transaction_id",
    "source_sensor_id"
]

# --- Main Data Generation Logic for Inventory Telemetry ---
def generate_inventory_data():
    """
    Generates telemetry data for inventory levels (1-minute granularity, 5-min batch ID).
    """
    print(f"Starting inventory data generation for {NUM_TRUCKS} trucks from {START_DATE} to {END_DATE}...")
    print(f"Output file: {OUTPUT_FILENAME_INVENTORY}")
    print(f"Generating data every {INTERVAL_MINUTES_READING} minute(s), with batch IDs every {INTERVAL_MINUTES_BATCH} minutes.")


    with open(OUTPUT_FILENAME_INVENTORY, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(CSV_HEADER_INVENTORY) # Write the header row

        current_timestamp = START_DATE
        row_count = 0
        current_batch_load_id = None

        while current_timestamp < END_DATE:
            # Generate a new batch ID every INTERVAL_MINUTES_BATCH minutes
            if current_timestamp.minute % INTERVAL_MINUTES_BATCH == 0 and current_timestamp.second == 0:
                 current_batch_load_id = current_timestamp.strftime('%Y%m%d%H%M%S') + '_inventory_batch'

            for truck_id in range(1, NUM_TRUCKS + 1):
                for ing_id, props in INGREDIENT_PROPERTIES.items():
                    current_qty = truck_inventory_states[truck_id][ing_id]

                    # Simulate consumption for this interval
                    consumption_amount = random.uniform(props['consumption'][0], props['consumption'][1])
                    new_qty = max(0, current_qty - consumption_amount)

                    # Record sensor reading for current state
                    writer.writerow([
                        str(uuid.uuid4()),
                        current_batch_load_id,
                        truck_id,
                        current_timestamp.isoformat(),
                        ing_id,
                        round(new_qty, 3),
                        props['uom'],
                        'sensor_reading',
                        None,
                        props['sensor_type']
                    ])
                    row_count += 1
                    truck_inventory_states[truck_id][ing_id] = new_qty

                    # Check for replenishment needed
                    if new_qty <= random.uniform(props['refill_threshold'][0], props['refill_threshold'][1]):
                        refill_amount = random.uniform(props['refill_qty'][0], props['refill_qty'][1])
                        replenished_qty = refill_amount + random.uniform(0, consumption_amount * 0.1)

                        writer.writerow([
                            str(uuid.uuid4()),
                            current_batch_load_id, # Same batch ID as the sensor reading in this interval
                            truck_id,
                            current_timestamp.isoformat(),
                            ing_id,
                            round(replenished_qty, 3),
                            props['uom'],
                            'replenished',
                            None,
                            'manual_entry'
                        ])
                        row_count += 1
                        truck_inventory_states[truck_id][ing_id] = replenished_qty

            current_timestamp += timedelta(minutes=INTERVAL_MINUTES_READING)

            # Print progress update (e.g., every simulated day's start)
            if current_timestamp.minute == 0 and current_timestamp.hour == 0:
                print(f"Generated {row_count} rows up to {current_timestamp.strftime('%Y-%m-%d')}...")

    print(f"\nInventory telemetry generation complete! Total rows generated: {row_count}. File saved to: {OUTPUT_FILENAME_INVENTORY}")


In [None]:
generate_inventory_data()

In [None]:
!head telemetry_inventory_raw.csv

In [None]:
!gsutil cp telemetry_inventory_raw.csv gs://gcs-bucket-namet/telemetry_inventory_raw/telemetry_inventory_raw.csv

In [None]:
%%bigquery

LOAD DATA INTO `agentic_beans_raw.telemetry_inventory`
(
    telemetry_inventory_id          STRING,
    telemetry_load_id               STRING,
    truck_id                        INT64,
    telemetry_timestamp             TIMESTAMP,
    ingredient_id                   INT64,
    current_quantity_value          NUMERIC(10, 3),
    unit_of_measure                 STRING,
    event_type                      STRING,
    associated_transaction_id       STRING,
    source_sensor_id                STRING,
)
FROM FILES (
 format = 'csv',
 skip_leading_rows = 1,
 uris = ['gs://gcs-bucket-namet/telemetry_inventory_raw/telemetry_inventory_raw.csv']);


#### Generate Camera Telemetry

In [None]:
import csv
import uuid
from datetime import datetime, timedelta
import random

# --- Configuration ---
OUTPUT_FILENAME_CAMERA = 'telemetry_camera_vision_raw.csv'
START_DATE = datetime(2020, 1, 1, 0, 0, 0)
END_DATE = datetime(2027, 1, 1, 0, 0, 0)
INTERVAL_SECONDS_READING = 30 # Data reading interval is now every 30 seconds
INTERVAL_MINUTES_BATCH = 5   # Batch upload ID is still generated every 5 minutes
NUM_TRUCKS = 20

# --- Camera ID Mapping (based on prior camera_dim inserts) ---
# For truck_id T (1-20), the cameras are:
# Exterior Queue Camera ID: (T * 2) - 1
# Interior Cabin Camera ID: (T * 2)
CAMERA_TYPES_PER_TRUCK = {
    'exterior_queue': {'id_offset': -1}, # (truck_id * 2) - 1
    'interior_cabin': {'id_offset': 0}   # (truck_id * 2)
}

# --- Normal Operating Ranges for AI detections (for clean data) ---
# These are dynamic ranges to simulate natural fluctuations
NORMAL_DETECTION_RANGES = {
    'exterior_queue': {
        'people_in_queue': {'min': 0, 'max': 15}, # Max typical queue length
        'foot_traffic_nearby': {'min': 10, 'max': 100}, # General foot traffic
        'confidence': {'min': 0.90, 'max': 0.99}, # High confidence for clean data
        'model_version': 'AI_Vision_v2.5'
    },
    'interior_cabin': {
        'people_in_queue': {'min': 0, 'max': 5}, # Internal staff/customers not necessarily in queue
        'foot_traffic_nearby': {'min': 0, 'max': 10}, # Less relevant, but some movement
        'confidence': {'min': 0.90, 'max': 0.99},
        'model_version': 'AI_Vision_v2.5_Interior'
    }
}

# --- CSV Header ---
CSV_HEADER_CAMERA = [
    "telemetry_camera_vision_id",
    "telemetry_load_id",
    "truck_id",
    "telemetry_timestamp",
    "camera_id",
    "people_in_queue_count",
    "foot_traffic_count_nearby",
    "ai_detection_confidence_score",
    "image_reference_url",
    "detection_model_version"
]

# --- Main Data Generation Logic for Camera Vision Telemetry ---
def generate_telemetry_camera_vision_data():
    """
    Generates telemetry data for camera vision (30-second granularity, 5-min batch ID).
    """
    print(f"Starting camera vision telemetry generation for {NUM_TRUCKS} trucks from {START_DATE} to {END_DATE}...")
    print(f"Output file: {OUTPUT_FILENAME_CAMERA}")
    print(f"Generating data every {INTERVAL_SECONDS_READING} second(s), with batch IDs every {INTERVAL_MINUTES_BATCH} minutes.")

    with open(OUTPUT_FILENAME_CAMERA, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(CSV_HEADER_CAMERA) # Write the header row

        current_timestamp = START_DATE
        row_count = 0
        current_batch_load_id = None

        while current_timestamp < END_DATE:
            # Generate a new batch ID every INTERVAL_MINUTES_BATCH minutes
            # This ensures the batch ID changes at 0, 5, 10, 15... minutes and 0 seconds of each hour
            if current_timestamp.minute % INTERVAL_MINUTES_BATCH == 0 and current_timestamp.second == 0:
                 current_batch_load_id = current_timestamp.strftime('%Y%m%d%H%M%S') + '_camera_batch'

            for truck_id in range(1, NUM_TRUCKS + 1):
                # Generate data for each camera type for this truck
                for camera_type, params in CAMERA_TYPES_PER_TRUCK.items():
                    camera_id = (truck_id * 2) + params['id_offset']
                    detection_props = NORMAL_DETECTION_RANGES[camera_type]

                    people_in_queue = random.randint(detection_props['people_in_queue']['min'], detection_props['people_in_queue']['max'])
                    foot_traffic = random.randint(detection_props['foot_traffic_nearby']['min'], detection_props['foot_traffic_nearby']['max'])
                    confidence = round(random.uniform(detection_props['confidence']['min'], detection_props['confidence']['max']), 2)
                    model_version = detection_props['model_version']
                    image_ref_url = None # For clean data, no specific image URL needed for demo

                    # Create the row data
                    row = [
                        str(uuid.uuid4()), # telemetry_camera_vision_id (unique per row)
                        current_batch_load_id, # This ID will change every 5 minutes
                        truck_id,
                        current_timestamp.isoformat(), # ISO format for BigQuery TIMESTAMP
                        camera_id,
                        people_in_queue,
                        foot_traffic,
                        confidence,
                        image_ref_url,
                        model_version
                    ]
                    writer.writerow(row)
                    row_count += 1

            current_timestamp += timedelta(seconds=INTERVAL_SECONDS_READING)

            # Print progress update (e.g., every 100,000 rows)
            if row_count % 100000 == 0:
                print(f"Generated {row_count} rows up to {current_timestamp}...")

    print(f"\nCamera vision telemetry generation complete! Total rows generated: {row_count}. File saved to: {OUTPUT_FILENAME_CAMERA}")


In [None]:
generate_telemetry_camera_vision_data()

In [None]:
!head telemetry_camera_vision_raw.csv

In [None]:
!gsutil cp telemetry_camera_vision_raw.csv gs://gcs-bucket-namet/telemetry_camera_vision_raw/telemetry_camera_vision_raw.csv

In [None]:
%%bigquery

LOAD DATA INTO `agentic_beans_raw.telemetry_camera_vision`
(
    telemetry_camera_vision_id      STRING,
    telemetry_load_id               STRING,
    truck_id                        INT64,
    telemetry_timestamp             TIMESTAMP,
    camera_id                       INT64,
    people_in_queue_count           INT64,
    foot_traffic_count_nearby       INT64,
    ai_detection_confidence_score   NUMERIC(3, 2),
    image_reference_url             STRING,
    detection_model_version         STRING,
)
FROM FILES (
 format = 'csv',
 skip_leading_rows = 1,
 uris = ['gs://gcs-bucket-namet/telemetry_camera_vision_raw/telemetry_camera_vision_raw.csv']);