In [1]:
import json
import random
import time
from collections import defaultdict

import pandas as pd
import requests
from fastapi.encoders import jsonable_encoder
from ranking_challenge.fake import fake_request
from ranking_challenge.request import ContentItem, RankingRequest
from tqdm import tqdm



In [2]:
facebook = pd.read_json('facebook_feed.json')
reddit = pd.read_json('reddit_feed.json')
twitter = pd.read_json('twitter_feed.json')

In [3]:
TARGET_LATENCY = 0.5  # Target latency in seconds (500ms p95)
NUM_REQUESTS = 600   # Number of requests for each platform to generate a statistically valid sample
PLATFORMS = ['Facebook', 'Reddit', 'Twitter']
SAMPLES = {'Facebook' : facebook, 'Reddit' : reddit, 'Twitter': twitter}

In [4]:
# DataFrame to store latency, platform, and number of items
results_df = pd.DataFrame(columns=['Platform', 'Latency', 'Num_Items'])

In [5]:
# Generates the next request for the platform by iterating through the
# dataframe for the next sample, and returning it in json format. Goes through
# each dataframe individually
selected_rows = {}
def generate_items(platform):
    selected_rows = {}
    df = SAMPLES.get(platform)

    selected_indices = selected_rows.get(platform, [])
    filtered_df = df[~df.index.isin(selected_indices)]

    next_row = filtered_df.iloc[0] if not filtered_df.empty else None

    # Update selected_rows dictionary
    if next_row is not None:
        selected_indices.append(next_row.name)
        selected_rows[platform] = selected_indices

    content_items = [ContentItem.model_validate(item_df) for item_df in next_row]
    return content_items


In [6]:
def issue_request(platform, url):
    items = generate_items(platform)
    request = fake_request(n_posts=0, n_comments=0, platform=platform.lower())
    request.items = items

    start_time = time.time()
    #application should be running on localhost:8000
    response = requests.post(url, json=jsonable_encoder(request))
    if response.status_code != 200:
        raise Exception('Request failed with status code: {}, error: {}'.format(response.status_code, response.text))
    #should I be storing this response somewhere?
    end_time = time.time()
    latency = end_time - start_time
    # Store latency, platform, and number of items in DataFrame
    results_df.loc[len(results_df)] = [platform, latency, len(request.items)]
    return latency


In [7]:
# Main function to run the test
latencies = defaultdict(list)
def run_test(url):
    for platform in PLATFORMS:
        for _ in tqdm(range(NUM_REQUESTS), f"Platform: {platform}"):
            latency = issue_request(platform, url)
            latencies[platform].append(latency)


In [8]:
# Execute the test
run_test('http://localhost:8000/rank')

Platform: Facebook:   0%|          | 0/600 [00:00<?, ?it/s]

Platform: Facebook: 100%|██████████| 600/600 [00:08<00:00, 72.07it/s]
Platform: Reddit: 100%|██████████| 600/600 [00:07<00:00, 79.95it/s]
Platform: Twitter: 100%|██████████| 600/600 [00:05<00:00, 105.12it/s]


In [9]:
# this is where we could do something with the latencies.