# Download and analyse sbeacon log events

## Downloading log events

We are going to analyse the log events of user `admin@example.com` (an admin/manager user).

To perform this task, you must have aws console access, because the keys are needed to access aws console via the cli.


In [18]:
user = "admin@example.com"
scope = "individuals"

In [17]:
%%file download_sbeacon_individuals_logs.sh

#!/bin/bash
LOG_GROUP_NAME="/aws/lambda/sbeacon-backend-getIndividuals"
REGION="ap-southeast-2"
NAME="individuals"

# Get all log stream names
log_streams=$(aws logs describe-log-streams \
  --log-group-name "$LOG_GROUP_NAME" \
  --query 'logStreams[*].logStreamName' \
  --output text \
  --region $REGION)

for stream in $log_streams; do
  echo "Downloading logs for stream: $stream"
  safe_stream_name=$(echo "$stream" | sed 's/\//_/g')
  output_file="${NAME}_${safe_stream_name}.json"
  > "$output_file"  # Clear/create file

  next_token=""
  first_request=true

  while : ; do
    if [ "$first_request" = true ]; then
      response=$(aws logs get-log-events \
        --log-group-name "$LOG_GROUP_NAME" \
        --log-stream-name "$stream" \
        --start-from-head \
        --region $REGION \
        --output json)
      first_request=false
    else
      response=$(aws logs get-log-events \
        --log-group-name "$LOG_GROUP_NAME" \
        --log-stream-name "$stream" \
        --next-token "$next_token" \
        --region $REGION \
        --output json)
    fi

    # Save events (append only the "events" array)
    echo "$response" | jq '.events' >> "$output_file"

    # Get the nextForwardToken for the next page
    new_token=$(echo "$response" | jq -r '.nextForwardToken')

    # If the next token is the same as the previous, we're done
    if [ "$next_token" == "$new_token" ]; then
      break
    fi
    next_token=$new_token
  done

  echo "Finished downloading $stream"
done

echo "All log streams downloaded."



Overwriting download_sbeacon_individuals_logs.sh


In [None]:
# Run follwing command with keys in the terminal
# bash download_sbeacon_individuals_logs.sh

Alternatively you can change the script to download logs related to other beacon endpoinds. The relevant logs groups are as follows.

- /aws/lambda/sbeacon-backend-admin
- /aws/lambda/sbeacon-backend-dataPortal
- /aws/lambda/sbeacon-backend-deidentifyFiles
- /aws/lambda/sbeacon-backend-generateCohortVCfs
- /aws/lambda/sbeacon-backend-generateReports
- /aws/lambda/sbeacon-backend-getAnalyses
- /aws/lambda/sbeacon-backend-getBiosamples
- /aws/lambda/sbeacon-backend-getConfiguration
- /aws/lambda/sbeacon-backend-getDatasets
- /aws/lambda/sbeacon-backend-getEntryTypes
- /aws/lambda/sbeacon-backend-getFilteringTerms
- /aws/lambda/sbeacon-backend-getGenomicVariants
- /aws/lambda/sbeacon-backend-getIndividuals
- /aws/lambda/sbeacon-backend-getInfo
- /aws/lambda/sbeacon-backend-getMap
- /aws/lambda/sbeacon-backend-getProjects
- /aws/lambda/sbeacon-backend-getRuns
- /aws/lambda/sbeacon-backend-indexer
- /aws/lambda/sbeacon-backend-performQuery
- /aws/lambda/sbeacon-backend-splitQuery
- /aws/lambda/sbeacon-backend-submitDataset
- /aws/lambda/sbeacon-backend-updateFiles


## Loading the events


In [19]:
from glob import glob
import json

def iterate_log_entries():
    entries = []
    for file in glob(f"{scope}_*.json"):
        with open(file, "r") as f:
            data = f.read()
            data = data.replace("[]\n", "")
            entries +=  json.loads(data)
    
    log_entry = []
    for entry in entries:
        log_entry.append(entry)
        if entry["message"].startswith("REPORT"):
            yield log_entry
            log_entry = []


## sBeacon individuals scoped events for the user admin@example.com


In [38]:
from textwrap import indent
import re
from urllib.parse import unquote

re_individuals_id_biosamples = re.compile(r"^/individuals/.*/biosamples$")

for log_entry in iterate_log_entries():
    log_event = list(filter(lambda x: x["message"].startswith("Event Received"), log_entry))[0]
    event = log_event["message"]
    event = event.replace("Event Received: ", "")
    event = json.loads(event)
    

    if not event["requestContext"]["authorizer"]["claims"]["email"] == user:
        continue

    if event["httpMethod"] == "GET" and event["path"] == "/individuals/filtering_terms":
        print(f"User {user} listed filtering terms at {log_event['timestamp']}")
        print("\tQuery params:")
        print(indent(json.dumps(event["queryStringParameters"], indent=4), "\t"))

    elif event["httpMethod"] == "POST" and event["path"] == "/individuals" or event["path"] == "/individuals/":
        print(f"User {user} listed individuals at {log_event['timestamp']}")
        print("\tQuery body:")
        print(indent(json.dumps(json.loads(event["body"]), indent=4), "\t"))

    elif event["httpMethod"] == "POST" and re_individuals_id_biosamples.match(event["path"]):
        print(f'User {user} listed individual: "{event['path'].split('/')[-2]}" biosamples at {log_event['timestamp']}')
        print("\tQuery body:")
        print(indent(json.dumps(json.loads(event["body"]), indent=4), "\t"))


    else:
        print("MISSED EVENT", event["httpMethod"], event["path"])




User admin@example.com listed filtering terms at 1745366559418
	Query params:
	{
	    "limit": "100",
	    "projects": "Example Query Project",
	    "skip": "0"
	}
User admin@example.com listed individual: "1-0-0" biosamples at 1745477896996
	Query body:
	{
	    "projects": [
	        "Example Query Project"
	    ],
	    "query": {
	        "filters": [],
	        "requestedGranularity": "record",
	        "pagination": {
	            "skip": 0,
	            "limit": 100
	        }
	    },
	    "meta": {
	        "apiVersion": "v2.0"
	    }
	}
User admin@example.com listed individuals at 1741827872945
	Query body:
	{
	    "projects": [
	        "Example Query Project"
	    ],
	    "query": {
	        "filters": [],
	        "requestedGranularity": "count",
	        "pagination": {}
	    },
	    "meta": {
	        "apiVersion": "v2.0"
	    }
	}
User admin@example.com listed individuals at 1741827954793
	Query body:
	{
	    "projects": [
	        "Example Query Project"
	    ],
	    "que