In [2]:
# Data and analysis libraries
import polars as pl                         # Fast dataframes for financial data
import numpy as np                          # Numerical computing library
from datetime import datetime, timedelta    # Date and time operations
import random


# Machine learning libraries  
import torch                                # PyTorch framework
import torch.nn as nn                       # Neural network modules
import torch.optim as optim                 # Optimization algorithms
import research                             # Model building and training utilities


# Visualization and 
import altair as alt                        # Interactive visualization library

# data sources
import binance                              # Binance market data utilities                    

ModuleNotFoundError: No module named 'polars'

In [None]:
# Standard imports
import os
from dotenv import load_dotenv
from datetime import datetime
import polars as pl
import yfinance as yf
import finnhub

# Load environment variables from .env (no path needed if .env is in the same folder)
load_dotenv()

# Load API key from environment
FINNHUB_API_KEY = os.getenv("FINNHUB_API_KEY")
finnhub_client = finnhub.Client(api_key=FINNHUB_API_KEY)

# Define stock symbols
SYMBOLS_STOCKS = ["AAPL", "MSFT", "GOOG"]  # or whatever list


# Sotck Symbols
syms = SYMBOLS_STOCKS

# yfinance

start_date = "2024-10-29"
end_date = "2025-10-09"

data = yf.download(syms, start=start_date, end=end_date, interval="1h")
print(data.head())


# finnhub

all_data = []

start_date = datetime(2024, 10, 29)
end_date = datetime(2025, 10, 9)
chunk_days = 30  # 30-day chunks

for sym in SYMBOLS_STOCKS:
    current_start = start_date
    while current_start < end_date:
        current_end = min(current_start + timedelta(days=chunk_days), end_date)
        res = finnhub_client.stock_candles(
            sym, "60",
            int(current_start.timestamp()),
            int(current_end.timestamp())
        )
        if res['s'] == 'ok':
            df = pl.DataFrame({
                "symbol": [sym] * len(res["t"]),
                "timestamp": res["t"],
                "open": res["o"],
                "high": res["h"],
                "low": res["l"],
                "close": res["c"],
                "volume": res["v"]
            })
            all_data.append(df)
        current_start = current_end

# Combine all symbols into a single DataFrame
final_df = pl.concat(all_data)
print(final_df)


[*********************100%***********************]  3 of 3 completed

Price                           Close                                High  \
Ticker                           AAPL        GOOG        MSFT        AAPL   
Datetime                                                                    
2024-10-29 13:30:00+00:00  233.059998  169.365005  428.222290  234.054993   
2024-10-29 14:30:00+00:00  233.270004  170.520004  429.529999  233.690002   
2024-10-29 15:30:00+00:00  233.100006  170.080002  430.019989  233.589996   
2024-10-29 16:30:00+00:00  233.889999  171.040100  431.170013  234.210007   
2024-10-29 17:30:00+00:00  233.970001  171.380295  432.399994  234.139999   

Price                                                     Low              \
Ticker                           GOOG        MSFT        AAPL        GOOG   
Datetime                                                                    
2024-10-29 13:30:00+00:00  170.000000  429.279999  232.320007  168.660004   
2024-10-29 14:30:00+00:00  170.867004  430.179901  233.039993  169.369995  




FinnhubAPIException: FinnhubAPIException(status_code: 403): You don't have access to this resource.