<a href="https://colab.research.google.com/github/Henriquestoli/Unbabel/blob/main/backendchalleng.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import json
from datetime import datetime, timedelta
from collections import deque

def read_events(input_file):
    """
    Reads translation events from a JSON file and returns them as a list of dictionaries.
    """
    with open(input_file, 'r') as file:
        return [json.loads(line) for line in file]

def compute_moving_average(events, window_size):
    """
    Computes the moving average delivery time of translation events over a specified window size.
    Below I have added some arguments for a better understanding of what is happening next.

    Args:
        events: List of dictionaries containing translation event data.
        window_size: Integer, the size of the moving window in minutes.

    Returns:
        A list of dictionaries containing the timestamp and the corresponding moving average delivery time.

    """
    # Convert timestamp strings to datetime objects
    events = [{'timestamp': datetime.strptime(e['timestamp'], '%Y-%m-%d %H:%M:%S.%f'), 'duration': e['duration']} for e in events]

    # Determining the start and end times for the computation, rounding to the nearest minute
    start_time = events[0]['timestamp'].replace(second=0, microsecond=0)
    end_time = events[-1]['timestamp'].replace(second=0, microsecond=0)

    current_time = start_time
    moving_window = deque()  # Deque to maintain events within the current window
    result = []

    # Iterate over each minute from start_time to end_time
    while current_time <= end_time:
        # Remove events that are outside the current window
        while moving_window and moving_window[0]['timestamp'] < current_time - timedelta(minutes=window_size):
            moving_window.popleft()

        # Add events that fall within the current minute to the moving window
        while events and events[0]['timestamp'] < current_time + timedelta(minutes=1):
            moving_window.append(events.pop(0))

        # Calculate the average delivery time for events in the moving window
        if moving_window:
            average_duration = sum(event['duration'] for event in moving_window) / len(moving_window)
        else:
            average_duration = 0

        # Append the result for the current time
        result.append({
            'date': current_time.strftime('%Y-%m-%d %H:%M:%S'),
            'average_delivery_time': round(average_duration, 2)
        })

        # Move to the next minute
        current_time += timedelta(minutes=1)

    return result

# Define the paths for input and output files
input_file = 'events.json'
output_file = 'output.json'
window_size = 10

# Read the events from the input file
events = read_events(input_file)

# Compute the moving average delivery time
moving_averages = compute_moving_average(events, window_size)

# Save the computed moving averages to the output file
with open(output_file, 'w') as file:
    for item in moving_averages:
        file.write(json.dumps(item) + '\n')

# Print a message indicating that the output has been saved
print(f'Moving averages saved to {output_file}')
