In [None]:
%load_ext autoreload
%autoreload 2

import datetime
import json

from logger_utils import (
    log_query,
    query_logger,
    generate_log_file,
    get_scene_log_stats,
    get_latest_revision,
)

project_id = "cerulean-338116"
service_name = "cerulean-cloud-test-cr-orchestrator"
# service_name = "cerulean-cloud-test-cr-offset-tiles"

# Update to current revision name
revision_name = get_latest_revision(project_id, service_name)
# revision_name = "cerulean-cloud-test-cr-orchestrator-00072-9x7"

# set start time to 1 day ago
utc = datetime.timezone.utc
start_time = datetime.datetime.now(utc) - datetime.timedelta(days=1)
your_timezone = "US/Eastern" # or "US/Arizona"

print(f"\n{revision_name}")
print(f"{start_time}Z")


# Querying the Logger

## The `log_query` function 
- Creates the query string to query the CloudRun Logs
- Args:
  - service_name (str): The name of the Cloud Run service.
  - revision_name (str, optional): The revision name to filter logs for a specific service revision.
  - instance_id (str, optional): The instance ID to filter logs for a specific Cloud Run instance.
  - start_time (datetime, optional): The start timestamp for log filtering.
  - end_time (datetime, optional): The end timestamp for log filtering.
  - textPayload (str, optional): A string to match in the text payload of the logs.
  - not_textPayload (str, optional): A string to exclude from the text payload of the logs.
  - jsonPayload (dict, optional): A dictionary of key-value pairs to match in the JSON payload of the logs.
      - e.g. {"message": "Initiating Orchestrator"}
  - severity (str, optional): The exact severity level to filter logs (e.g., "ERROR").
  - min_severity (str, optional): The minimum severity level to filter logs (e.g., "WARNING").
- Most frequently, 

## The `query_logger` function 
- Queries the CloudRun Logs and returns a dataframe
- Args:
  - project_id (str): The ID of the Google Cloud project.
  - query (str): The filter query string for retrieving logs.


# Get uncompleted scenes

In [None]:
query = log_query(
    service_name,
    revision_name=revision_name,
    jsonPayload={"message": "Initiating Orchestrator"},
    start_time=start_time,
)
start_logs = query_logger(project_id, query)
scenes_started = set()
if len(start_logs) > 0:
    start_logs = start_logs.groupby("scene_id").first().reset_index()
    scenes_started = set(
        start_logs["json_payload"].apply(lambda x: x["scene_id"]).unique()
    )


query = log_query(
    service_name,
    revision_name=revision_name,
    jsonPayload={"message": "Orchestration complete!"},
    start_time=start_time,
)
finish_logs = query_logger(project_id, query)
scenes_finished = set()
if len(finish_logs) > 0:
    finish_logs = finish_logs.groupby("scene_id").first().reset_index()
    scenes_finished = set(
        finish_logs["json_payload"].apply(lambda x: x["scene_id"]).unique()
    )

if scenes_finished is not None:
    print("Incomplete scenes:")
    display(scenes_started - scenes_finished)

# Get logs for one scene

In [None]:
logs = get_scene_log_stats(
    project_id,
    service_name,
    revision_name,
    start_time,
    "S1A_IW_GRDH_1SDV_20230408T175150_20230408T175215_048008_05C52F_3F9F",
)
if logs.empty == False:
    logs["timestamp"] = logs["timestamp"].dt.tz_convert(your_timezone)
    display(logs.head())

# Generate a log file
This will save a log file from the above logs (log.txt) which shows logs with most recent first

In [None]:
generate_log_file(logs, filename="log.txt")

# Get all Errors and Warnings

In [None]:
query = log_query(
    service_name,
    revision_name=revision_name,
    start_time=start_time,
    min_severity="WARNING",
)
logs = query_logger(project_id, query)

if logs.empty == False:
    # filter for only logs created with our manual logger (if the error isn't caught by our logging, this line should be removed)
    logs = logs[logs["instanceId"].isnull() == False]

    print(len(logs))
    display(logs.head())

In [None]:
generate_log_file(logs, filename="log.txt")

# Get all logs for revision 
(only for testing small number of tiles or this will be very large and probably crash)

In [None]:
query = log_query(
    service_name,
    revision_name=revision_name,
    start_time=start_time,
    min_severity="INFO",
)
logs = query_logger(project_id, query)
print(len(logs))

display(logs.head())

# Get all logs for Instance 


In [None]:
# Instance ID
instance_id = "0066d92480ea2f10e387a3d4930777cd5ec7dac3849054a147e7012b81eb62e0cd49983e8196e4cc20b86712733793259e4a161a83b56714e6a8600effc6cb62a19fc3b560"

query = log_query(
    service_name,
    revision_name=revision_name,
    instance_id=instance_id,
    start_time=start_time,
    min_severity="INFO",
)
logs = query_logger(project_id, query)
print(len(logs))

display(logs.head())

# Get SIGTERM logs

#### Some likely SIGTERM reasons:
- CloudRun Instance Scale Down
    - This should happen every time an instance shuts down
    - example SIGTERM log:
```python
jsonPayload = {
    'message': 'SIGTERM signal received.',
    'line_number': 44.0,
    'file_name': '/usr/lib/python3.9/asyncio/runners.py',
    'scene_id': 'S1B_IW_GRDH_1SDV_20170110T061226_20170110T061251_003782_006809_D7FE'
}
```

- CloudRun Instance Times out
    - Happens when an instance takes longer to run than its timeout
    - example SIGTERM log:
```python
jsonPayload = {
    'message': 'SIGTERM signal received.',
    'line_number': 798.0,
    'file_name': '/app/site-packages/shapely/predicates.py',
    'scene_id': 'S1A_IW_GRDH_1SDV_20230408T175150_20230408T175215_048008_05C52F_3F9F'
}
```

In [None]:
query = log_query(
    service_name,
    revision_name=revision_name,
    jsonPayload={"message": "SIGTERM signal received."},
    start_time=start_time,
)
logs = query_logger(project_id, query)
if logs.empty == False:
    _ = [
        display(i)
        for i in logs.groupby("scene_id").first().reset_index()["json_payload"]
    ]

# Get logs with specific message
This message must be exact, not a subset of the message. For this reason, all messages are specific (not f-strings) with any variables captured by kwargs

In [None]:
message = "Generated image"

jsonPayload = {
    "message": message,
    "scene_id": "S1A_IW_GRDH_1SDV_20230219T043841_20230219T043910_047300_05AD37_C54C",
}
query = log_query(
    service_name,
    revision_name=revision_name,
    jsonPayload=jsonPayload,
    start_time=start_time,
)
logs = query_logger(project_id, query)
if logs.empty == False:
    _ = [display(i) for i in logs["json_payload"]]
else:
    print(
        f"no instances of {message} in the logs for payload: {json.dumps(jsonPayload)}"
    )