In [None]:
import requests
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

API_KEY = "pJN3ekSv6pOm6zp2hnX9klzzxSqYjzZp"
symbol = "ROKU"

def get_trade_data(symbol, timespan="minute", multiplier=1, from_date=None, to_date=None):
    if from_date is None:
        from_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
    if to_date is None:
        to_date = datetime.now().strftime("%Y-%m-%d")

    url = f"https://api.polygon.io/v2/aggs/ticker/{symbol}/range/{multiplier}/{timespan}/{from_date}/{to_date}?adjusted=true&sort=asc&limit=50000&apiKey={API_KEY}"
    response = requests.get(url)

    if response.status_code == 200:
        results = response.json().get("results", [])
        times = [datetime.fromtimestamp(item["t"] / 1000) for item in results]
        trade_counts = [item.get("n", 0) for item in results]
        return times, trade_counts
    else:
        print(f"Error: {response.status_code} - {response.text}")
        return [], []

def plot_trade_counts(times, trade_counts, title):
    plt.figure(figsize=(12, 6))
    plt.plot(times, trade_counts, label="Trade Count")
    plt.title(title)
    plt.xlabel("Time")
    plt.ylabel("Number of Trades")
    plt.grid(True)
    plt.tight_layout()
    plt.show()

# Minute-level (last 24 hours)
minute_times, minute_trades = get_trade_data(symbol, timespan="minute")
plot_trade_counts(minute_times, minute_trades, f"{symbol} Trade Count Per Minute")

# Daily-level (last 30 days)
daily_times, daily_trades = get_trade_data(symbol, timespan="day", from_date="2024-03-01", to_date="2024-04-01")
plot_trade_counts(daily_times, daily_trades, f"{symbol} Trade Count Per Day")


In [None]:
symbol = "ROKU"
num_days = 60

In [None]:
import requests
import pandas as pd
import random
import time
import pandas_market_calendars as mcal

symbol = "ROKU"
num_days = 60

# Get list of valid US market days in the last 180 days
nyse = mcal.get_calendar('NYSE')
end = pd.Timestamp.today()
start = end - pd.Timedelta(days=180)
schedule = nyse.valid_days(start_date=start, end_date=end)
market_days = [d.strftime('%Y-%m-%d') for d in schedule]

# Pick random 60 market days
random_days = random.sample(market_days, num_days)

all_data = []

def fetch_intraday_data(symbol, date):
    url = f"https://api.polygon.io/v2/aggs/ticker/{symbol}/range/1/minute/{date}/{date}?adjusted=true&apiKey={API_KEY}"
    resp = requests.get(url)
    if resp.status_code == 200:
        data = resp.json().get("results", [])
        return [
            {"timestamp": pd.to_datetime(d["t"], unit='ms'), "trade_count": d["n"], "date": date}
            for d in data
        ]
    else:
        print(f"Error fetching data for {date}: {resp.status_code}")
        return []

print(f"Fetching 1-minute trade count data for {num_days} random days...")

for day in random_days:
    daily_data = fetch_intraday_data(symbol, day)
    all_data.extend(daily_data)
    print(f"Fetched {len(daily_data)} points for {day}")
    time.sleep(15)  # Respect API rate limit

# Store in DataFrame
df = pd.DataFrame(all_data)

# Save to file for seasonal analysis
df.to_csv("roku_intraday_1min_trade_counts.csv", index=False)
print("Data saved to 'roku_intraday_1min_trade_counts.csv'")


In [None]:
### import saved data and clean it up###
import pandas as pd

# Load the data
df = pd.read_csv("roku_intraday_1min_trade_counts.csv", parse_dates=["timestamp"])

df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df.set_index('timestamp')
df.index = df.index.tz_localize('UTC').tz_convert('America/New_York')
df_resampled = df["trade_count"].resample("1min").sum().fillna(0)
df_reset = df_resampled.reset_index()
df_reset['date'] = df_reset['timestamp'].dt.date
df_reset['time_str'] = df_reset['timestamp'].dt.strftime('%H:%M')

# Pivot the data (rows = date, columns = time)
pivot_table = df_reset.pivot_table(index="date", columns="time_str", values="trade_count", aggfunc="sum")

# Replace 0s with NaN to avoid issues with log scaling
pivot_table = pivot_table.fillna(0)

# Optionally, remove black days first:
heatmap_data_no_black = pivot_table[pivot_table.sum(axis=1) != 0]

# Step 1: Reset and melt the pivot table
flat_df = heatmap_data_no_black.reset_index().melt(
    id_vars='date',
    var_name='time_str',
    value_name='trade_count'
)

# Step 2: Combine 'date' and 'time' into a full timestamp
flat_df['timestamp'] = pd.to_datetime(flat_df['date'].astype(str) + ' ' + flat_df['time_str'])

# Step 3: Sort by timestamp and set as index
flat_df = flat_df.sort_values('timestamp').set_index('timestamp')

flat_df['time'] = flat_df.index.time
flat_df['hour'] = flat_df.index.hour

# Final time series
trade_series = flat_df['trade_count']
trade_series

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(14, 6))
sns.boxplot(x="hour", y="trade_count", data=flat_df, showfliers=False)
plt.title("Distribution of Trade Counts by Hour", fontsize=16)
plt.xlabel("Hour of Day")
plt.ylabel("Trade Count")
plt.tight_layout()


In [None]:

# Sample 9 random days
sample_days = flat_df["date"].drop_duplicates().sample(9, random_state=42)
df_sample = flat_df[flat_df["date"].isin(sample_days)]

# Plot
g = sns.FacetGrid(df_sample, col="date", col_wrap=3, height=3.5, sharey=False)
g.map_dataframe(sns.lineplot, x="time_str", y="trade_count")
g.set_titles(col_template="{col_name}")
for ax in g.axes.flatten():
    ax.set_xticks(ax.get_xticks()[::120])  # show fewer ticks
    ax.tick_params(axis='x', rotation=45)
g.set_axis_labels("Time of Day", "Trade Count")
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import numpy as np
import plotly.express as px

# Plot interactive heatmap
fig = px.imshow(
    heatmap_data_no_black.map(lambda x: np.log1p(x)),  # Log scale
    labels=dict(x="Time of Day", y="Date", color="log(1 + Trades)"),
    x=heatmap_data_no_black.columns,
    y=heatmap_data_no_black.index.astype(str),
    color_continuous_scale='Oranges',
    aspect='auto'
)

# Customize hover to show actual values
fig.update_traces(
    hovertemplate="Date: %{y}<br>Time: %{x}<br>log(1 + Trades): %{z:.2f}<extra></extra>"
)

fig.update_layout(
    title="Interactive Intraday Trade Activity Heatmap",
    xaxis_nticks=20,
    yaxis_nticks=20,
    height=800
)

fig.show()


In [None]:
heatmap_data_no_black

In [None]:
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import STL

# STL Decomposition (daily seasonality = 1440 minutes)
stl = STL(trade_series, period=1440)
result = stl.fit()

# Plot components
result.plot()
plt.show()

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
import matplotlib.pyplot as plt
import seaborn as sns

# Each day has 1440 minutes
result = seasonal_decompose(trade_series, period=1440, model='additive')

# Set a nice Seaborn style
sns.set_style("whitegrid")
plt.rcParams.update({
    "figure.figsize": (14, 10),
    "axes.titlesize": 16,
    "axes.labelsize": 12,
    "xtick.labelsize": 10,
    "ytick.labelsize": 10
})

# Extract components
trend = result.trend
seasonal = result.seasonal
resid = result.resid
observed = result.observed

# Plot
fig, axes = plt.subplots(4, 1, sharex=True)

observed.plot(ax=axes[0], color="steelblue", linewidth=1)
axes[0].set_title("Observed")

trend.plot(ax=axes[1], color="orange", linewidth=1)
axes[1].set_title("Trend")

seasonal.plot(ax=axes[2], color="seagreen", linewidth=1)
axes[2].set_title("Seasonality (per day)")

resid.plot(ax=axes[3], color="grey", linewidth=1)
axes[3].set_title("Residual (noise)")

plt.suptitle("Seasonal Decomposition of Trade Activity", fontsize=18)
plt.tight_layout(rect=[0, 0.03, 1, 0.97])
plt.show()


In [None]:
seasonal_df = seasonal.reset_index()
seasonal_df['time'] = seasonal_df['timestamp'].dt.time
seasonal_df['date'] = seasonal_df['timestamp'].dt.date
seasonal_df["time_str"] = seasonal_df["timestamp"].dt.strftime('%H:%M')
# Sample 9 random days
sample_days = seasonal_df["date"].drop_duplicates().sample(9)
df_sample = seasonal_df[seasonal_df["date"].isin(sample_days)]

# Plot
g = sns.FacetGrid(df_sample, col="date", col_wrap=3, height=3.5, sharey=False)
g.map_dataframe(sns.lineplot, x="time_str", y="seasonal")
g.set_titles(col_template="{col_name}")
for ax in g.axes.flatten():
    ax.set_xticks(ax.get_xticks()[::120])  # show fewer ticks
    ax.tick_params(axis='x', rotation=45)
g.set_axis_labels("Time of Day", "Trade Count")
plt.tight_layout()
plt.show()


In [None]:
seasonal_profile = seasonal_df.groupby('time')['seasonal']
seasonal_profile.mean().plot(figsize=(12, 4), title="Average Trade Count per Minute")
plt.xlabel("Time of Day")
plt.ylabel("Avg Trades")
plt.show()

In [None]:
resid_df = resid.reset_index()
resid_df['time'] = resid_df['timestamp'].dt.time
resid_df['date'] = resid_df['timestamp'].dt.date
resid_df["time_str"] = resid_df["timestamp"].dt.strftime('%H:%M')
# Sample 9 random days
sample_days = resid_df["date"].drop_duplicates().sample(9)
df_sample = resid_df[resid_df["date"].isin(sample_days)]

# Plot
g = sns.FacetGrid(df_sample, col="date", col_wrap=3, height=3.5, sharey=False)
g.map_dataframe(sns.lineplot, x="time_str", y="resid")
g.set_titles(col_template="{col_name}")
for ax in g.axes.flatten():
    ax.set_xticks(ax.get_xticks()[::120])  # show fewer ticks
    ax.tick_params(axis='x', rotation=45)
g.set_axis_labels("Time of Day", "Trade Count")
plt.tight_layout()
plt.show()

In [None]:
resid_df["hour"] = resid_df["timestamp"].dt.hour

plt.figure(figsize=(14, 6))
sns.boxplot(x="hour", y="resid", data=resid_df, showfliers=False)
plt.title("Distribution of Residuals by Hours", fontsize=16)
plt.xlabel("Hour of Day")
plt.ylabel("Residuals")
plt.tight_layout()

In [None]:
abs_resid_df = resid_df.copy()
abs_resid_df['resid'] = abs_resid_df['resid'].abs()
plt.figure(figsize=(14, 6))
sns.boxplot(x="hour", y="resid", data=abs_resid_df, showfliers=False)
plt.title("Distribution of Residuals by Hours", fontsize=16)
plt.xlabel("Hour of Day")
plt.ylabel("Residuals")
plt.tight_layout()

In [None]:
abs_resid_profile = abs_resid_df.groupby('time')['resid']
abs_resid_profile.mean().plot(figsize=(12, 4), title="Average Residual per Minute")
plt.xlabel("Time of Day")
plt.ylabel("Avg Residuals")
plt.show()

In [None]:
abs_resid_profile.mean().to_csv(symbol + '_rand' + str(num_days) + "_residuals.csv", index=True)
seasonal_profile.mean().to_csv(symbol + '_rand' + str(num_days) + "_seasonal.csv", index=True)


In [None]:
seasonal_profile.mean()