In [None]:
import pandas as pd
from pathlib import Path

# ============================
# CONFIG â€” update if needed
# ============================
EVENTS = Path("kalshi_capopm_events.csv")
ORDERBOOK = Path("orderbook_data.csv")
TRADES = Path("trades_data.csv")

# ============================
# LOAD RAW FILES
# ============================
print("\n=== LOADING RAW FILES ===")

events_df = pd.read_csv(EVENTS)
orderbook_df = pd.read_csv(ORDERBOOK)
trades_df = pd.read_csv(TRADES)

print(f"events_df: {events_df.shape}")
print(f"orderbook_df: {orderbook_df.shape}")
print(f"trades_df: {trades_df.shape}")

# ============================
# MICROSTRUCTURE MERGE CHECK
# ============================

print("\n=== CHECKING orderbook_data.csv STRUCTURE ===")
print(orderbook_df.head(10))

print("\n=== CHECKING trades_data.csv STRUCTURE ===")
print(trades_df.head(10))

# ============================
# MERGE EXACTLY AS IN MAIN FILE
# ============================

# --- convert timestamp columns ---
for col in ["created_ts", "expiration_ts"]:
    if col in events_df.columns:
        events_df[col] = pd.to_datetime(events_df[col], utc=True, errors="coerce")

# --- microstructure (simple aggregation preview) ---
def _simple_microstructure_preview(orderbook_df, trades_df):
    """
    This is NOT a replacement for full microstructure_features().
    It just verifies whether each market has rows in orderbook/trades.
    """
    ob_counts = orderbook_df.groupby("market_ticker").size().rename("ob_count")
    tr_counts = trades_df.groupby("ticker").size().rename("trade_count")

    merged = pd.DataFrame({
        "orderbook_rows": ob_counts,
        "trade_rows": tr_counts
    }).fillna(0).astype(int)

    return merged

preview_df = _simple_microstructure_preview(orderbook_df, trades_df)

print("\n=== MICROSTRUCTURE PRESENCE CHECK ===")
print(preview_df.head(20))

# ============================
# MERGE WITH EVENTS (as in main script)
# ============================

# merges by market_ticker (trades use ticker == market_ticker)
data_df = events_df.merge(
    orderbook_df, on="market_ticker", how="left", suffixes=("", "_ob")
)

# Finally merge in trade counts summary only for visibility
tr_summary = trades_df.groupby("ticker").agg(
    total_trade_rows=("ticker", "count")
).reset_index().rename(columns={"ticker": "market_ticker"})

data_df = data_df.merge(tr_summary, on="market_ticker", how="left")

print("\n=== FINAL MERGED data_df (first 15 rows) ===")
print(data_df.head(15))

print("\n=== COLUMN SUMMARY ===")
print(data_df.dtypes)

print("\n=== NULL COUNTS ===")
print(data_df.isna().sum())

print("\n=== UNIQUE MARKET COUNT ===")
print("Number of events:", events_df["event_ticker"].nunique())
print("Number of markets:", events_df["market_ticker"].nunique())

print("\n=== DONE ===")





=== LOADING RAW FILES ===
events_df: (45, 30)
orderbook_df: (8, 14)
trades_df: (73116, 10)

=== CHECKING orderbook_data.csv STRUCTURE ===
  series_ticker    event_ticker         market_ticker  \
0        KXINXY  KXINXY-25DEC31  KXINXY-25DEC31-B7500   
1        KXINXY  KXINXY-25DEC31  KXINXY-25DEC31-B7300   
2        KXINXY  KXINXY-25DEC31  KXINXY-25DEC31-B7100   
3        KXINXY  KXINXY-25DEC31  KXINXY-25DEC31-B6900   
4        KXINXY  KXINXY-25DEC31  KXINXY-25DEC31-B6700   
5        KXINXY  KXINXY-25DEC31  KXINXY-25DEC31-B6500   
6        KXINXY  KXINXY-25DEC31  KXINXY-25DEC31-B6300   
7        KXINXY  KXINXY-25DEC31  KXINXY-25DEC31-B6100   

                 created_time       expiration_time  \
0  2024-11-05T14:41:06.87561Z  2026-01-07T18:00:00Z   
1  2024-11-05T14:41:06.87561Z  2026-01-07T18:00:00Z   
2  2024-11-05T14:41:06.87561Z  2026-01-07T18:00:00Z   
3  2024-11-05T14:41:06.87561Z  2026-01-07T18:00:00Z   
4  2024-11-05T14:41:06.87561Z  2026-01-07T18:00:00Z   
5  2024-11-05T14:

In [None]:
trades_df.taker_side

#tr_summary = trades_df.groupby("ticker").agg(total_trade_rows=("ticker", "count")).reset_index().rename(columns={"ticker": "market_ticker"})

def yes_one(x: str) -> int:
    if x == "yes":
        return 1
    else:
        return 0


trades_summary = trades_df.groupby("ticker").apply(yes_one)

TypeError: 'int' object is not callable

In [18]:

import pandas as pd

events = pd.read_csv("kalshi_capopm_events.csv")
ob = pd.read_csv("orderbook_data.csv")

print("Unique event tickers in events:", events["event_ticker"].unique()[:10])
print("Unique event tickers in orderbook:", ob["event_ticker"].unique()[:10])

print("\nSample overlap in market_ticker:")
overlap = set(events["market_ticker"]) & set(ob["market_ticker"])
print("Overlap count:", len(overlap))
print("Sample overlap tickers:", list(overlap)[:10])



Unique event tickers in events: ['INXD-24DEC31' 'INXY-23DEC29' 'INXY-22DEC30']
Unique event tickers in orderbook: ['KXINXY-25DEC31']

Sample overlap in market_ticker:
Overlap count: 0
Sample overlap tickers: []


In [19]:
import datetime

timestamp = str(int(datetime.datetime.now().timestamp() * 1000 ))
print(timestamp)

1764895489315
