In [1]:
from datetime import datetime
from typing import List, Dict
import numpy as np


# Calculate the maximum size of a DynamoDB item (4 KB)
max_item_size = 4096

# Calculate the number of elements that can fit in an item
# 13 bytes for other attributes (5 -"u", 5 -"t", 3 - "v")
# 3 bytes for each element of the "v" attribute
max_elements = (max_item_size - 13) // 3   

# Calculate the number of subsegments needed
num_subsegments = 8656 // max_item_size + 1 

def convert_to_dynamodb_documents(user_id: int, day: datetime.date, activity_scores: List[int]) -> List[Dict]:
    '''Convert activity scores to DynamoDB documents
    Args:
        user_id: user ID (from 1 to 1000000)
        day: datetime.day object for which the activity scores are segmented
        activity_scores: List of activity scores of fixed length (2880)
    Returns:
        dynamodb_documents: List of DynamoDB documents (JSON dictionaries)
    '''
    # Initialize an empty list to store JSON dictionaries
    dynamodb_documents = []

    # convert datetime.date object to Unix
    day = int(day.strftime("%s"))

    # Iterate through subsegments
    for subsegment_idx in range(num_subsegments):
        # Calculate the start and end indices for the subsegment
        start_idx = subsegment_idx * max_elements
        end_idx = min((subsegment_idx + 1) * max_elements, len(activity_scores))
        
        # Get the activity scores for the subsegment
        subsegment_scores = activity_scores[start_idx:end_idx]
        
        # Calculate the timestamp for the subsegment
        subsegment_timestamp = day + start_idx * 30
        
        # Create a dictionary for the subsegment
        subsegment_dict = {
            "u": user_id,
            "t": subsegment_timestamp,  # Unix timestamp
            "v": subsegment_scores
        }
        
        # Append the subsegment dictionary to the list
        dynamodb_documents.append(subsegment_dict)
    
    return dynamodb_documents


In [2]:
# convert unix to date
def unix_to_date(unix):
    return datetime.fromtimestamp(unix).strftime('%Y.%m.%d %H:%M:%S')

In [3]:
user_id = np.random.randint(0, 1000000) # user_id
input_date_str  = '2023.09.08'
input_date = datetime.strptime(input_date_str, '%Y.%m.%d').date()
act_sc = [el for el in np.random.randint(0, 100, 2880)]    # activity scores

dynamodb_documents = convert_to_dynamodb_documents(user_id=user_id, day=input_date, activity_scores=act_sc)
for dict_ in dynamodb_documents:
    print(dict_['u'], dict_['t'], len(dict_['v']))

153341 1694120400 1361
153341 1694161230 1361
153341 1694202060 158


In [4]:
for el in dynamodb_documents:
    print(unix_to_date(el['t']), len(el['v']))

2023.09.08 00:00:00 1361
2023.09.08 11:20:30 1361
2023.09.08 22:41:00 158
