In [4]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the data from CSV files, selecting only the relevant columns
flow_df = pd.read_csv(
    '../csvs/marketData/market1InchFlowBNBUSDT.csv',
    usecols=['block_time', 'price']  # Only read block_time and price columns
)

agg_trades_df = pd.read_csv(
    '../csvs/marketData/aggTradesBinance_BNBUSDT.csv',
    usecols=['datetime', 'price'])  # Only read datetime) and price columns

In [6]:
import pandas as pd
import matplotlib.pyplot as plt
import time

# Convert 'block_time' and 'datetime' columns to datetime
flow_df['block_time'] = pd.to_datetime(flow_df['block_time'], utc=True)
agg_trades_df['datetime'] = pd.to_datetime(agg_trades_df['datetime'], utc=True)

# Convert relevant columns to numeric, forcing errors to NaN
flow_df['price'] = pd.to_numeric(flow_df['price'], errors='coerce')
agg_trades_df['price'] = pd.to_numeric(agg_trades_df['price'], errors='coerce')

# Drop rows with NaN values in the 'price' column
flow_df.dropna(subset=['price'], inplace=True)
agg_trades_df.dropna(subset=['price'], inplace=True)

# Set the datetime columns as the index without modifying the original DataFrames
flow_df_indexed = flow_df.set_index('block_time', inplace=False)
agg_trades_df_indexed = agg_trades_df.set_index('datetime', inplace=False)

# Debugging: Check the first few rows of each dataset to ensure data is loaded correctly
print("Flow data (first 5 rows):")
print(flow_df_indexed.head())

print("Agg Trades data (first 5 rows):")
print(agg_trades_df_indexed.head())

# Plot the data day by day (no resampling)
for day in flow_df_indexed.index.date:
    print(f"Processing day: {day}")

    # Filter data for the current day
    flow_day_data = flow_df_indexed[flow_df_indexed.index.date == day]
    agg_trades_day_data = agg_trades_df_indexed[agg_trades_df_indexed.index.date == day]

    # Print the data being plotted for debugging
    print(f"flow_day_data: {flow_day_data[['price']]}")
    print(f"agg_trades_day_data: {agg_trades_day_data[['price']]}")

    # Ensure there is data for the given day in both DataFrames
    if flow_day_data.empty or agg_trades_day_data.empty:
        print(f"Missing price data for {day}, skipping...")
        continue

    # Create the plot for the day
    plt.figure(figsize=(14, 7))

    # Plot market1InchFlowBNBUSDT (flow_df) for the current day (starting with first data point of flow)
    plt.plot(flow_day_data.index, flow_day_data['price'], label='market1InchFlowBNBUSDT', color='blue')

    # Plot aggTradesBinance_BNBUSDT (agg_trades_df) for the current day (align by index with flow_df)
    plt.plot(agg_trades_day_data.index, agg_trades_day_data['price'], label='aggTradesBinance_BNBUSDT', color='red')

    # Add labels and legend
    plt.xlabel('Time')
    plt.ylabel('Price')
    plt.title(f'Price Comparison for {day.strftime("%B %d, %Y")}')
    plt.legend()

    # Show the plot
    plt.show()

    # Pause for 10 seconds between plots
    time.sleep(10)



Before printing lengths of DataFrames
1932778
14000000
Flow Data (first few rows):
                                price
block_time                           
2021-02-25 07:09:55+00:00  247.225685
2021-02-25 07:12:22+00:00  246.711004
2021-02-25 14:30:34+00:00  212.908969
2021-02-25 14:32:21+00:00  229.646102
2021-02-25 14:35:30+00:00  256.653251
Agg Trades Data (first few rows):
                                  price
datetime                               
2022-12-24 15:34:36.131000+00:00  244.2
2022-12-24 15:34:37.933000+00:00  244.1
2022-12-24 15:34:38.163000+00:00  244.2
2022-12-24 15:34:39.793000+00:00  244.2
2022-12-24 15:34:48.048000+00:00  244.1
Flow Data Range: 2021-02-25 to 2024-11-24
Agg Trades Data Range: 2022-12-24 to 2023-03-22
Missing data for 2022-12-24, skipping...
Missing data for 2022-12-25, skipping...
Missing data for 2022-12-26, skipping...
Missing data for 2022-12-27, skipping...
Missing data for 2022-12-28, skipping...


KeyboardInterrupt: 