In [None]:
# Load necessary library
library(jsonlite)
library(dplyr)
library(ggplot2)
library(quantmod)
library(lubridate)

# Read the CSV file into a dataframe
race_df <- read.csv("race_data.csv")


# Remove rows where any entry in the trades column contains 0
race_df <- race_df %>% filter(trades != "[]")


race_df <- race_df %>%
  mutate(timestamp = as.POSIXct(timestamp_unix / 1000, origin = "1970-01-01", tz = "GMT"))


# Convert the string representations to actual lists
race_df$trades <- lapply(race_df$trades, function(x) fromJSON(as.character(x)))


# Function to calculate the size and price based on the given criteria
calculate_size_and_price <- function(lst) {
  n <- length(lst)
  
  if (n < 2) {
    return(list(size = NA, price = NA))
  }
  
  half_n <- ceiling(n / 2)
  
  # Elements in the first half
  first_half <- lst[1:half_n]
  
  # Elements in the last half
  last_half <- lst[(half_n + 1):n]
  
  # Sum the elements in the last half
  size <- sum(last_half)
  
  # Weighted average of the first half with weights being the corresponding elements in the last half
  if (length(first_half) == length(last_half)) {
    weighted_avg <- sum(first_half * last_half) / size
  } else {
    weighted_avg <- NA
  }
  
  return(list(size = size, price = weighted_avg))
}

# Apply the function to each element in the trades column and create new columns
results <- do.call(rbind, lapply(race_df$trades, calculate_size_and_price))
