In [3]:
# Import IBKR data fetcher
import asyncio
import importlib

# Reload config and ibkr_data_async to pick up any changes (important if you modified config.py)
import config
importlib.reload(config)
import ibkr_data_async
importlib.reload(ibkr_data_async)
from ibkr_data_async import DataFetcherAsync
from config import IBKR_HOST, IBKR_PORT, IBKR_CLIENT_ID

# Verify the port being used
print(f"Connecting to IBKR at {IBKR_HOST}:{IBKR_PORT}")
print(f"Client ID: {IBKR_CLIENT_ID}")

# Set your tickers and years here
tickers = ["SPY"]
years = 20
force_refresh = False  # Set to True to bypass cache and re-download

# Initialize and connect (explicitly pass port to avoid caching issues)
fetcher = DataFetcherAsync(
    host=IBKR_HOST,
    port=IBKR_PORT,  # This will use the reloaded config value
    client_id=IBKR_CLIENT_ID
)

# Connect to IBKR (make sure TWS/Gateway is running!)
await fetcher.connect()
print("Connected to IBKR!")

# Fetch data for multiple tickers
all_data = await fetcher.fetch_all_tickers_data(
    tickers=tickers,
    years=years,
    force_refresh=force_refresh,
    max_concurrent=5
)

# Display results
for ticker, df in all_data.items():
    if df is not None:
        print(f"\n{ticker}:")
        print(f"  Shape: {df.shape}")
        print(f"  Date range: {df.index.min()} to {df.index.max()}")
        print(f"  First few rows:")
        print(df.head())

# Disconnect when done
fetcher.ib.disconnect()
print("\nDisconnected from IBKR")


Connecting to IBKR at 127.0.0.1:7496
Client ID: 1
Connected to IBKR!

SPY:
  Shape: (5028, 8)
  Date range: 0 to 5027
  First few rows:
         date   open   high    low  close      volume    average  barCount
0  2005-11-25  88.87  89.04  88.75  88.95  12874600.0  88.912389     26338
1  2005-11-28  89.06  90.77  87.88  88.34  48378500.0  88.535158     95504
2  2005-11-29  88.64  88.87  88.23  88.25  46506700.0  88.530959     88036
3  2005-11-30  88.30  88.55  87.49  87.80  46989000.0  88.123633     96874
4  2005-12-01  88.20  88.90  88.17  88.65  58830000.0  88.593947    107144

Disconnected from IBKR


In [6]:
from data_cleaning import Data

data_processor = Data(
    tickers=["SPY"],
    dfs=all_data,
    years=20
)

# clean_data() returns a dictionary: {'SPY': DataFrame, ...}
cleaned_data = data_processor.clean_data()

# Access the DataFrame using the ticker as key
SPY_df = cleaned_data["SPY"]

# Or if you have multiple tickers, you can access each one:
# cleaned_data["AAPL"], cleaned_data["NVDA"], etc.

# Now SPY_df is a DataFrame with all the cleaned data
print(f"SPY DataFrame shape: {SPY_df.shape}")
print(f"\nColumns: {SPY_df.columns.tolist()}")
print(f"\nFirst few rows:")
print(SPY_df.head())
print(f"\nAccess returns column:")
print(SPY_df['returns'].head())

SPY DataFrame shape: (5028, 13)

Columns: ['open', 'high', 'low', 'close', 'volume', 'average', 'barCount', 'returns', 'log_returns', 'day_of_month', 'day_of_week', 'month', 'year']

First few rows:
                            open   high    low  close      volume    average  \
date                                                                           
2005-11-25 00:00:00+00:00  88.87  89.04  88.75  88.95  12874600.0  88.912389   
2005-11-28 00:00:00+00:00  89.06  90.77  87.88  88.34  48378500.0  88.535158   
2005-11-29 00:00:00+00:00  88.64  88.87  88.23  88.25  46506700.0  88.530959   
2005-11-30 00:00:00+00:00  88.30  88.55  87.49  87.80  46989000.0  88.123633   
2005-12-01 00:00:00+00:00  88.20  88.90  88.17  88.65  58830000.0  88.593947   

                           barCount   returns  log_returns  day_of_month  \
date                                                                       
2005-11-25 00:00:00+00:00     26338       NaN          NaN            25   
2005-11-28 0