In [1]:
import matplotlib.pyplot as plt
import os
import pandas as pd

In [2]:
## Open all csvs in the folder data and save them into a dictionary (filename = key, df = value)
data: dict[str, pd.DataFrame] = {}
for file in os.listdir("all_data_prices"):
    if file.endswith(".csv") and file.startswith("prices"):
        data[file] = pd.read_csv("all_data_prices/" + file, sep=";")

In [3]:
# Find average time when the mid_price of BERRIES are the highest

list_timestamp_max = []
for file in data:
    # filter column that aren't berries
    berries_data = data[file][data[file]["product"] == "BERRIES"].copy()
    # moving average for berries data
    berries_data["mid_price"] = berries_data["mid_price"].rolling(1000).mean()
    if not berries_data.empty:
        # change idx to timestamp column
        berries_data.index = berries_data["timestamp"]
        # find the timestamp of the highest mid_price
        list_timestamp_max.append(berries_data["mid_price"].idxmax())

if list_timestamp_max:
    # calculate average time
    avg_time = sum(list_timestamp_max) / len(list_timestamp_max)
    print("Average timestamp when mid_price of BERRIES is highest:", avg_time)
else:
    print("No rows with product BERRIES found in the data.")


Average timestamp when mid_price of BERRIES is highest: 558150.0


In [4]:
# Find the total number of unique products across all files
unique_products = set()
for file in data:
    unique_products.update(data[file]["product"].unique())
num_products = len(unique_products)

# Calculate the number of rows and columns needed for the grid
num_rows = int(num_products ** 0.5)
num_cols = num_products // num_rows + (1 if num_products % num_rows > 0 else 0)

## For all files in data, plot, for each product, midprice as a function of time
for file_idx, file in enumerate(data):
    print(file)
    # Create a new figure for each file
    plt.figure(file_idx, figsize=(15, 15))
    
    # Iterate through each unique product and plot the midprice as a function of time
    for idx, product in enumerate(data[file]["product"].unique()):
        x = data[file][data[file]["product"] == product]["timestamp"]
        y = data[file][data[file]["product"] == product]["mid_price"]
    
        # replace y by a moving average of y
        y = y.rolling(100).mean()
        
        plt.subplot(num_rows, num_cols, idx + 1)
        plt.plot(x, y, label=product)
        plt.legend()
    
    # Show the figure for the current file
    plt.savefig("plots/" + file + ".png")
    plt.close()


prices_round_1_day_-2.csv
prices_round_2_day_-1.csv
prices_round_3_day_0.csv
prices_round_4_day_1.csv
prices_round_4_day_2.csv
prices_round_4_day_3.csv
