In [1]:
import pandas as pd
import yfinance as yf
import os
import requests
from time import sleep
from io import StringIO
from matplotlib import pyplot as plt

In [2]:
# this is for training data
start_date = "1980-01-01"  
end_date   = "2022-12-31"   

## Fetching data for S&P 500

In [3]:
# adjust the path if needed
csv_path_sp500 = "../../sp500_tickers.csv"
sp500tickers = pd.read_csv(csv_path_sp500, header=None, usecols=[0])[0].tolist()
# replace dots with dashes (e.g. BRK.B → BRK-B)
sp500tickers = [t.strip().replace(".", "-") for t in sp500tickers]
interval="1d"
#interval = "1h"  # 1 minute interval
sp500tickers[:5]

['AAPL', 'MSFT', 'NVDA', 'AMZN', 'GOOGL']

##### daily data 

In [5]:
out_dir = "../../data/raw/s&p500/1D"
#out_dir = "../../data/raw/s&p500/1h"
os.makedirs(out_dir, exist_ok=True)
failed_tickers = []    
for symbol in sp500tickers:
    try:
        df = yf.Ticker(symbol).history(
            start=start_date,
            end=end_date,
            interval=interval,
            auto_adjust=True
        )
        if df.empty:
            raise ValueError("No data returned")
        df.to_csv(f"{out_dir}/{symbol}-{interval}.csv")
        print(f"Saved {symbol}-{interval} ({len(df)} rows)")
    except Exception as e:
        print(f"{symbol}: {e}")
        failed_tickers.append(symbol)
    sleep(0.1)  # throttle to avoid rate-limit


Saved A-1d (5817 rows)
Saved A-1d (5817 rows)
Saved P-1d (1917 rows)
Saved L-1d (10791 rows)


##### hourly data
it gives only last 730 days data

In [4]:
out_dir = "../../data/raw/s&p500/1h"
os.makedirs(out_dir, exist_ok=True)
failed_tickers = []    
for symbol in sp500tickers:
    try:
        df = yf.Ticker(symbol).history(
            interval="1h",
            period="730d",  # Yahoo's limit for intraday
            auto_adjust=True
        )
        if df.empty:
            raise ValueError("No data returned")
        df.to_csv(f"{out_dir}/{symbol}-{interval}.csv")
        print(f"Saved {symbol}-{interval} ({len(df)} rows)")
    except Exception as e:
        print(f"{symbol}: {e}")
        failed_tickers.append(symbol)
    sleep(0.1)  # throttle to avoid rate-limit



Saved AAPL-1d (5081 rows)
Saved MSFT-1d (5081 rows)
Saved NVDA-1d (5081 rows)
Saved AMZN-1d (5081 rows)
Saved GOOGL-1d (5081 rows)
Saved GOOG-1d (5081 rows)
Saved META-1d (5081 rows)
Saved BRK-B-1d (5081 rows)
Saved TSLA-1d (5081 rows)
Saved AVGO-1d (5081 rows)
Saved LLY-1d (5081 rows)
Saved WMT-1d (5081 rows)
Saved JPM-1d (5081 rows)
Saved V-1d (5081 rows)
Saved MA-1d (5081 rows)
Saved XOM-1d (5081 rows)
Saved NFLX-1d (5074 rows)
Saved COST-1d (5081 rows)
Saved ORCL-1d (5081 rows)
Saved UNH-1d (5081 rows)
Saved PG-1d (5081 rows)
Saved JNJ-1d (5081 rows)
Saved HD-1d (5081 rows)
Saved ABBV-1d (5081 rows)
Saved KO-1d (5081 rows)
Saved BAC-1d (5081 rows)
Saved PM-1d (5081 rows)
Saved PLTR-1d (5081 rows)
Saved TMUS-1d (5081 rows)
Saved CRM-1d (5081 rows)
Saved CVX-1d (5081 rows)
Saved WFC-1d (5080 rows)
Saved MCD-1d (5081 rows)
Saved CSCO-1d (5081 rows)
Saved ABT-1d (5081 rows)
Saved LIN-1d (5081 rows)
Saved GE-1d (5081 rows)
IBM	3B: HTTP Error 404: 
Saved MRK-1d (5081 rows)
Saved NOW-1d (

$KVUE: possibly delisted; no price data found  (period=730d) (Yahoo error = "1h data not available for startTime=1683207000 and endTime=1747061212. The requested range must be within the last 730 days.")


KVUE: No data returned
Saved TGT-1d (5081 rows)
Saved KMB-1d (5081 rows)
Saved CCI-1d (5081 rows)
Saved NDAQ-1d (5081 rows)
Saved VST-1d (5081 rows)
Saved TEL-1d (5081 rows)
Saved MPC-1d (5081 rows)
Saved PWR-1d (5081 rows)
Saved PSX-1d (5081 rows)
Saved FIS-1d (5081 rows)
Saved CTVA-1d (5081 rows)
Saved MSCI-1d (5081 rows)
Saved URI-1d (5081 rows)
Saved YUM-1d (5081 rows)
Saved HES-1d (5081 rows)
Saved LHX-1d (5081 rows)
Saved PEG-1d (5081 rows)
Saved CMI-1d (5081 rows)
Saved VRSK-1d (5081 rows)
Saved FANG-1d (5081 rows)
Saved F-1d (5081 rows)
Saved XEL-1d (5081 rows)
Saved ED-1d (5081 rows)
Saved TTWO-1d (5081 rows)
Saved OXY-1d (5081 rows)
Saved TRGP-1d (5075 rows)
Saved AME-1d (5081 rows)
Saved GRMN-1d (5081 rows)
Saved DHI-1d (5081 rows)
Saved EA-1d (5081 rows)
Saved GLW-1d (5081 rows)
Saved PCG-1d (5081 rows)
Saved OTIS-1d (5081 rows)
Saved ETR-1d (5081 rows)
Saved PRU-1d (5081 rows)
Saved BKR-1d (5081 rows)
Saved VLO-1d (5081 rows)
Saved CTSH-1d (5081 rows)
Saved CBRE-1d (5081 r

$GEHC: possibly delisted; no price data found  (period=730d) (Yahoo error = "1h data not available for startTime=1671114600 and endTime=1747061242. The requested range must be within the last 730 days.")


GEHC: No data returned
Saved EXR-1d (5081 rows)
Saved ODFL-1d (5081 rows)
Saved GIS-1d (5081 rows)
Saved LYV-1d (5080 rows)
Saved TPL-1d (4806 rows)
Saved MLM-1d (5081 rows)
Saved A-1d (5081 rows)
Saved EQT-1d (5081 rows)
Saved IR-1d (5081 rows)
Saved VTR-1d (5081 rows)
Saved WTW-1d (5081 rows)
Saved AVB-1d (5081 rows)
Saved CNC-1d (5081 rows)
Saved K-1d (5011 rows)
Saved LEN-1d (5081 rows)
Saved XYL-1d (5081 rows)
Saved RJF-1d (5081 rows)
Saved DXCM-1d (5081 rows)
Saved ANSS-1d (5081 rows)
Saved ROK-1d (5081 rows)
Saved AWK-1d (5081 rows)
Saved DTE-1d (5081 rows)
Saved MPWR-1d (5081 rows)
Saved BR-1d (5081 rows)
Saved DD-1d (5081 rows)
Saved MTB-1d (5081 rows)
Saved DAL-1d (5081 rows)
Saved TKO-1d (2905 rows)
Saved EQR-1d (5081 rows)
Saved ROL-1d (5081 rows)
Saved WRB-1d (5081 rows)
Saved NUE-1d (5081 rows)
Saved TSCO-1d (5081 rows)
Saved AEE-1d (5074 rows)
Saved PPL-1d (5081 rows)
Saved IQV-1d (5081 rows)
Saved STT-1d (5053 rows)
Saved GDDY-1d (5081 rows)
Saved IRM-1d (5081 rows)
Sav

## Fetching data for DAX 

In [4]:
url = "https://finance.yahoo.com/quote/%5EGDAXI/components?p=%5EGDAXI"
html = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}).text
components = pd.read_html(html, header=0)[0]       # first table on the page
dax_tickers = components["Symbol"].tolist()
dax_tickers

  components = pd.read_html(html, header=0)[0]       # first table on the page


['AIR.DE',
 'BAYN.DE',
 'HEI.DE',
 'RWE.DE',
 'ZAL.DE',
 '1COV.DE',
 'FRE.DE',
 'HNR1.DE',
 'BEI.DE',
 'ENR.DE',
 'MTX.DE',
 'P911.DE',
 'VOW3.DE',
 'MRK.DE',
 'SY1.DE',
 'DBK.DE',
 'BMW.DE',
 'CON.DE',
 'DTE.DE',
 'ALV.DE',
 'BAS.DE',
 'DB1.DE',
 'VNA.DE',
 'SHL.DE',
 'SIE.DE',
 'ADS.DE',
 'EOAN.DE',
 'DHL.DE',
 'DTG.DE',
 'IFX.DE']

##### daily dax

In [11]:
out_dir = "../../data/raw/dax/1D"
os.makedirs(out_dir, exist_ok=True)

for symbol in dax_tickers:
    try:
        df = yf.Ticker(symbol).history(
            start=start_date,
            end=end_date,
            interval=interval,
            auto_adjust=True
        )
        if df.empty:
            raise ValueError("No data returned")
        df.to_csv(f"{out_dir}/{symbol}-{interval}.csv")
        print(f"Saved {symbol}-{interval} ({len(df)} rows)")
    except Exception as e:
        print(f"{symbol}: {e}")
    sleep(0.1)  # throttle to avoid rate-limit

Saved CON.DE-1d (6677 rows)
Saved SHL.DE-1d (1199 rows)
Saved VOW3.DE-1d (6260 rows)
Saved BMW.DE-1d (6703 rows)
Saved EOAN.DE-1d (5882 rows)
Saved ADS.DE-1d (6280 rows)
Saved RWE.DE-1d (6677 rows)
Saved IFX.DE-1d (5832 rows)
Saved HEI.DE-1d (6257 rows)
Saved DTG.DE-1d (271 rows)
Saved SIE.DE-1d (6703 rows)
Saved FRE.DE-1d (6182 rows)
Saved DTE.DE-1d (6697 rows)
Saved MRK.DE-1d (6278 rows)
Saved P911.DE-1d (65 rows)
Saved SY1.DE-1d (4075 rows)
Saved ENR.DE-1d (577 rows)
Saved VNA.DE-1d (2404 rows)
Saved DB1.DE-1d (5597 rows)
Saved BAYN.DE-1d (6677 rows)
Saved ALV.DE-1d (6677 rows)
Saved MTX.DE-1d (4467 rows)
Saved FME.DE-1d (6260 rows)
Saved ZAL.DE-1d (2094 rows)
Saved BEI.DE-1d (6260 rows)
Saved DBK.DE-1d (6697 rows)
Saved AIR.DE-1d (6181 rows)
Saved HNR1.DE-1d (5672 rows)
Saved DHL.DE-1d (5607 rows)
Saved BAS.DE-1d (6677 rows)


##### 1h dax data
latest 730 days 

In [21]:
out_dir = "../../data/raw/dax/1h"
interval = "1h"
os.makedirs(out_dir, exist_ok=True)
failed_tickers = []
for symbol in dax_tickers:
    try:
        df = yf.Ticker(symbol).history(
            interval="1h", period="730d", auto_adjust=True  # Yahoo's limit for intraday
        )
        if df.empty:
            raise ValueError("No data returned")
        df.to_csv(f"{out_dir}/{symbol}-{interval}.csv")
        print(f"Saved {symbol}-{interval} ({len(df)} rows)")
    except Exception as e:
        print(f"{symbol}: {e}")
        failed_tickers.append(symbol)
    sleep(0.1)  # throttle to avoid rate-limit

Saved AIR.DE-1h (6558 rows)
Saved BAYN.DE-1h (6558 rows)
Saved HEI.DE-1h (6558 rows)
Saved RWE.DE-1h (6558 rows)
Saved ZAL.DE-1h (6558 rows)
Saved 1COV.DE-1h (6552 rows)
Saved FRE.DE-1h (6559 rows)
Saved HNR1.DE-1h (6558 rows)
Saved BEI.DE-1h (6558 rows)
Saved ENR.DE-1h (6558 rows)
Saved MTX.DE-1h (6559 rows)


$P911.DE: possibly delisted; no price data found  (period=730d) (Yahoo error = "1h data not available for startTime=1664521200 and endTime=1747065618. The requested range must be within the last 730 days.")


P911.DE: No data returned
Saved VOW3.DE-1h (6558 rows)
Saved MRK.DE-1h (6558 rows)
Saved SY1.DE-1h (6558 rows)
Saved DBK.DE-1h (6559 rows)
Saved BMW.DE-1h (6558 rows)
Saved CON.DE-1h (6558 rows)
Saved DTE.DE-1h (6558 rows)
Saved ALV.DE-1h (6559 rows)
Saved BAS.DE-1h (6558 rows)
Saved DB1.DE-1h (6558 rows)
Saved VNA.DE-1h (6560 rows)
Saved SHL.DE-1h (6558 rows)
Saved SIE.DE-1h (6558 rows)
Saved ADS.DE-1h (6558 rows)
Saved EOAN.DE-1h (6559 rows)
Saved DHL.DE-1h (4240 rows)
Saved DTG.DE-1h (6558 rows)
Saved IFX.DE-1h (6558 rows)


## Fetching data for Commodities

In [22]:
url  = "https://finance.yahoo.com/commodities"
html = requests.get(url, headers={"User-Agent":"Mozilla/5.0"}).text

commodities_df = pd.read_html(StringIO(html))[0]
tickers_commodities = commodities_df["Symbol"].tolist()
print(f"Found {len(tickers_commodities)} commodity symbols")


Found 38 commodity symbols


In [13]:
out_dir = "../../data/raw/commodities/1D"
os.makedirs(out_dir, exist_ok=True)
failed_tickers = []
for symbol in tickers_commodities:
    try:
        df = yf.Ticker(symbol).history(
            start=start_date,
            end=end_date,
            interval=interval,
            auto_adjust=True
        )
        if df.empty:
            raise ValueError("No data returned")
        df.to_csv(f"{out_dir}/{symbol}-{interval}.csv")
        print(f"Saved {symbol}-{interval} ({len(df)} rows)")
    except Exception as e:
        print(f"{symbol}: {e}")
        failed_tickers.append(symbol)
    sleep(0.1)  # throttle to avoid rate-limit

Saved ES=F-1d (5631 rows)
Saved YM=F-1d (5222 rows)
Saved NQ=F-1d (5631 rows)
Saved RTY=F-1d (1381 rows)
Saved ZB=F-1d (5599 rows)
Saved ZN=F-1d (5593 rows)
Saved ZF=F-1d (5605 rows)
Saved ZT=F-1d (5659 rows)
Saved GC=F-1d (5604 rows)
Saved MGC=F-1d (3069 rows)
Saved SI=F-1d (5606 rows)
Saved SIL=F-1d (2402 rows)
Saved PL=F-1d (5632 rows)
Saved HG=F-1d (5608 rows)
Saved PA=F-1d (5644 rows)
Saved CL=F-1d (5613 rows)
Saved HO=F-1d (5606 rows)
Saved NG=F-1d (5609 rows)
Saved RB=F-1d (5567 rows)


$B0=F: possibly delisted; no price data found  (1d 1980-01-01 -> 2022-12-31)


Saved BZ=F-1d (3830 rows)
B0=F: No data returned
Saved ZC=F-1d (5617 rows)
Saved ZO=F-1d (5696 rows)
Saved KE=F-1d (5606 rows)
Saved ZR=F-1d (5839 rows)
Saved ZM=F-1d (5620 rows)
Saved ZL=F-1d (5658 rows)
Saved ZS=F-1d (5609 rows)
Saved GF=F-1d (5442 rows)
Saved HE=F-1d (5540 rows)
Saved LE=F-1d (5472 rows)
Saved CC=F-1d (5766 rows)
Saved KC=F-1d (5763 rows)
Saved CT=F-1d (5765 rows)
Saved LBS=F-1d (5631 rows)
Saved OJ=F-1d (5341 rows)
Saved SB=F-1d (5726 rows)
nan: 'float' object has no attribute 'upper'


In [23]:
out_dir = "../../data/raw/commodities/1h"
interval="1h"
os.makedirs(out_dir, exist_ok=True)
failed_tickers = []
for symbol in tickers_commodities:
    try:
        df = yf.Ticker(symbol).history(
            period="730d",  # Yahoo's limit for intraday
            interval=interval,
            auto_adjust=True
        )
        if df.empty:
            raise ValueError("No data returned")
        df.to_csv(f"{out_dir}/{symbol}-{interval}.csv")
        print(f"Saved {symbol}-{interval} ({len(df)} rows)")
    except Exception as e:
        print(f"{symbol}: {e}")
        failed_tickers.append(symbol)
    sleep(0.1)  # throttle to avoid rate-limit

Saved ES=F-1h (13713 rows)
Saved YM=F-1h (13704 rows)
Saved NQ=F-1h (13708 rows)
Saved RTY=F-1h (13766 rows)
Saved ZB=F-1h (13769 rows)
Saved ZN=F-1h (13769 rows)
Saved ZF=F-1h (13769 rows)
Saved ZT=F-1h (13768 rows)
Saved GC=F-1h (13767 rows)
Saved MGC=F-1h (13764 rows)
Saved SI=F-1h (13767 rows)
Saved SIL=F-1h (13767 rows)
Saved PL=F-1h (13746 rows)
Saved HG=F-1h (13767 rows)
Saved PA=F-1h (13693 rows)
Saved CL=F-1h (13426 rows)
Saved HO=F-1h (13234 rows)
Saved NG=F-1h (13471 rows)
Saved RB=F-1h (13125 rows)
Saved BZ=F-1h (13076 rows)
Saved B0=F-1h (10 rows)
Saved ZC=F-1h (11235 rows)
Saved ZO=F-1h (8007 rows)
Saved KE=F-1h (11233 rows)
Saved ZR=F-1h (4533 rows)
Saved ZM=F-1h (11235 rows)
Saved ZL=F-1h (11235 rows)
Saved ZS=F-1h (11235 rows)
Saved GF=F-1h (3605 rows)
Saved HE=F-1h (3604 rows)
Saved LE=F-1h (3605 rows)
Saved CC=F-1h (5913 rows)
Saved KC=F-1h (5913 rows)
Saved CT=F-1h (9311 rows)
Saved LBS=F-1h (524 rows)
Saved OJ=F-1h (3264 rows)
Saved SB=F-1h (5926 rows)
nan: 'float'

## Fetching data for Forex

In [24]:
# 1. Full list of pairs from your table (no spaces, use Yahoo tickers)
forex_pairs = [
    "EURUSD=X","GBPUSD=X","CHFJPY=X","USDJPY=X","CADJPY=X","AUDUSD=X","NZDUSD=X",
    "EURGBP=X","GBPCHF=X","USDCHF=X","CADCHF=X","AUDCHF=X","NZDCHF=X",
    "EURCHF=X","GBPJPY=X","USDCAD=X","AUDJPY=X","NZDJPY=X",
    "EURJPY=X","GBPCAD=X","AUDNZD=X","NZDCAD=X",
    "EURCAD=X","GBPAUD=X","AUDCAD=X",
    "EURAUD=X","GBPNZD=X",
    "EURNZD=X"
]

In [17]:

out_dir = "../../data/raw/forex/1D"
os.makedirs(out_dir, exist_ok=True)

failed_tickers = []
for symbol in forex_pairs:
    try:
        df = yf.Ticker(symbol).history(
            start=start_date,
            end=end_date,
            interval=interval,
            auto_adjust=True
        )
        if df.empty:
            raise ValueError("No data returned")
        df.to_csv(f"{out_dir}/{symbol}-{interval}.csv")
        print(f"Saved {symbol}-{interval} ({len(df)} rows)")
    except Exception as e:
        print(f"{symbol}: {e}")
        failed_tickers.append(symbol)
    sleep(0.1)  # throttle to avoid rate-limit

Saved EURUSD=X-1d (4951 rows)
Saved GBPUSD=X-1d (4963 rows)
Saved CHFJPY=X-1d (4969 rows)
Saved USDJPY=X-1d (6785 rows)
Saved CADJPY=X-1d (4780 rows)
Saved AUDUSD=X-1d (4327 rows)
Saved NZDUSD=X-1d (4952 rows)
Saved EURGBP=X-1d (6251 rows)
Saved GBPCHF=X-1d (5009 rows)
Saved USDCHF=X-1d (5017 rows)
Saved CADCHF=X-1d (4967 rows)
Saved AUDCHF=X-1d (4942 rows)
Saved NZDCHF=X-1d (4940 rows)
Saved EURCHF=X-1d (5174 rows)
Saved GBPJPY=X-1d (4967 rows)
Saved USDCAD=X-1d (5019 rows)
Saved AUDJPY=X-1d (4970 rows)
Saved NZDJPY=X-1d (4967 rows)
Saved EURJPY=X-1d (5176 rows)
Saved GBPCAD=X-1d (4968 rows)
Saved AUDNZD=X-1d (4970 rows)
Saved NZDCAD=X-1d (4967 rows)
Saved EURCAD=X-1d (4954 rows)
Saved GBPAUD=X-1d (5097 rows)
Saved AUDCAD=X-1d (4971 rows)
Saved EURAUD=X-1d (4968 rows)
Saved GBPNZD=X-1d (5096 rows)
Saved EURNZD=X-1d (4968 rows)


In [25]:
interval="1h"
out_dir = "../../data/raw/forex/1h"
os.makedirs(out_dir, exist_ok=True)

failed_tickers = []
for symbol in forex_pairs:
    try:
        df = yf.Ticker(symbol).history(
            period="730d",  # Yahoo's limit for intraday
            interval=interval,
            auto_adjust=True
        )
        if df.empty:
            raise ValueError("No data returned")
        df.to_csv(f"{out_dir}/{symbol}-{interval}.csv")
        print(f"Saved {symbol}-{interval} ({len(df)} rows)")
    except Exception as e:
        print(f"{symbol}: {e}")
        failed_tickers.append(symbol)
    sleep(0.1)  # throttle to avoid rate-limit

Saved EURUSD=X-1h (17299 rows)
Saved GBPUSD=X-1h (17299 rows)
Saved CHFJPY=X-1h (17298 rows)
Saved USDJPY=X-1h (17206 rows)
Saved CADJPY=X-1h (17308 rows)
Saved AUDUSD=X-1h (17370 rows)
Saved NZDUSD=X-1h (17344 rows)
Saved EURGBP=X-1h (17310 rows)
Saved GBPCHF=X-1h (17303 rows)
Saved USDCHF=X-1h (17238 rows)
Saved CADCHF=X-1h (17308 rows)
Saved AUDCHF=X-1h (17328 rows)
Saved NZDCHF=X-1h (17348 rows)
Saved EURCHF=X-1h (17305 rows)
Saved GBPJPY=X-1h (17305 rows)
Saved USDCAD=X-1h (17379 rows)
Saved AUDJPY=X-1h (17307 rows)
Saved NZDJPY=X-1h (17284 rows)
Saved EURJPY=X-1h (17305 rows)
Saved GBPCAD=X-1h (17362 rows)
Saved AUDNZD=X-1h (17366 rows)
Saved NZDCAD=X-1h (17354 rows)
Saved EURCAD=X-1h (17312 rows)
Saved GBPAUD=X-1h (17361 rows)
Saved AUDCAD=X-1h (17370 rows)
Saved EURAUD=X-1h (17311 rows)
Saved GBPNZD=X-1h (17350 rows)
Saved EURNZD=X-1h (17294 rows)


## Fetching Japan Investment Trust

In [18]:
out_dir = "../../data/raw/japan-investment-trust/1D"
os.makedirs(out_dir, exist_ok=True)

# 1. List of Japan investment-trust tickers on TSE
japan_trust_list = ["1306.T", "1321.T", "1343.T", "1557.T", "1475.T"]
for symbol in japan_trust_list:
    try:
        # 3. Download daily, auto_adjust=True gives you gap-free OHLC
        df = yf.download(
            symbol,
            start=start_date,
            end=end_date,
            interval=interval,
            auto_adjust=True,   
            progress=False
        )
        if df.empty:
            raise ValueError("No data returned")

        # 4. Save to CSV, using interval in the filename
        fname = f"{symbol.replace('.T','')}-{interval}.csv"
        path = os.path.join(out_dir, fname)
        df.to_csv(path, index_label="Date")
        print(f"✅ Saved {fname} ({len(df)} rows)")

    except Exception as e:
        print(f"❌ {symbol}: {e}")

    sleep(0.1) 

✅ Saved 1306-1d.csv (3686 rows)
✅ Saved 1321-1d.csv (3441 rows)
✅ Saved 1343-1d.csv (3512 rows)
✅ Saved 1557-1d.csv (3662 rows)
✅ Saved 1475-1d.csv (1779 rows)


In [26]:
out_dir = "../../data/raw/japan-investment-trust/1h"
os.makedirs(out_dir, exist_ok=True)
interval="1h"

# 1. List of Japan investment-trust tickers on TSE
japan_trust_list = ["1306.T", "1321.T", "1343.T", "1557.T", "1475.T"]
for symbol in japan_trust_list:
    try:
        # 3. Download daily, auto_adjust=True gives you gap-free OHLC
        df = yf.download(
            symbol,
            interval=interval,
            auto_adjust=True,   
            period="730d",  # Yahoo's limit for intraday
        )
        if df.empty:
            raise ValueError("No data returned")

        # 4. Save to CSV, using interval in the filename
        fname = f"{symbol.replace('.T','')}-{interval}.csv"
        path = os.path.join(out_dir, fname)
        df.to_csv(path, index_label="Date")
        print(f"✅ Saved {fname} ({len(df)} rows)")

    except Exception as e:
        print(f"❌ {symbol}: {e}")

    sleep(0.1) 

[*********************100%***********************]  1 of 1 completed


✅ Saved 1306-1h.csv (5098 rows)


[*********************100%***********************]  1 of 1 completed


✅ Saved 1321-1h.csv (5098 rows)


[*********************100%***********************]  1 of 1 completed


✅ Saved 1343-1h.csv (5097 rows)


[*********************100%***********************]  1 of 1 completed


✅ Saved 1557-1h.csv (5098 rows)


[*********************100%***********************]  1 of 1 completed

✅ Saved 1475-1h.csv (5098 rows)





## Fetching 1261 japan stocks

In [27]:
# adjust the path if needed
csv_path_jp = "../../japanstockstickers.csv"
jp_1261tickers = pd.read_csv(csv_path_jp, header=None,skiprows=1, usecols=[0])[0].tolist()
# replace dots with dashes (e.g. BRK.B → BRK-B)
interval="1d"
print(jp_1261tickers[:5])

['9997.T', '9989.T', '9987.T', '9984.T', '9983.T']


In [None]:
out_dir = "../../data/raw/japanstockes/1D"
os.makedirs(out_dir, exist_ok=True)

failed_tickers = []                # ← collect failures here

for symbol in jp_1261tickers:
    try:
        # 3. Download daily, auto_adjust=True gives you gap-free OHLC
        df = yf.download(
            symbol,
            start=start_date,
            end=end_date,
            interval=interval,
            auto_adjust=True,
            progress=False
        )
        if df.empty:
            raise ValueError("No data returned")

        # 4. Save to CSV, using interval in the filename
        fname = f"{symbol.replace('.T','')}-{interval}.csv"
        path = os.path.join(out_dir, fname)
        df.to_csv(path, index_label="Date")
        print(f"✅ Saved {fname} ({len(df)} rows)")

    except Exception as e:
        print(f"❌ {symbol}: {e}")
        failed_tickers.append(symbol)  # ← record the ticker

    sleep(0.1)

In [None]:
out_dir = "../../data/raw/japanstockes/1h"
os.makedirs(out_dir, exist_ok=True)
interval="1h"

failed_tickers = []                # ← collect failures here

for symbol in jp_1261tickers:
    try:
        # 3. Download daily, auto_adjust=True gives you gap-free OHLC
        df = yf.download(
            symbol,
            interval=interval,
            auto_adjust=True,
            period="730d",  # Yahoo's limit for intraday
        )
        if df.empty:
            raise ValueError("No data returned")

        # 4. Save to CSV, using interval in the filename
        fname = f"{symbol.replace('.T','')}-{interval}.csv"
        path = os.path.join(out_dir, fname)
        df.to_csv(path, index_label="Date")
        print(f"✅ Saved {fname} ({len(df)} rows)")

    except Exception as e:
        print(f"❌ {symbol}: {e}")
        failed_tickers.append(symbol)  # ← record the ticker

    sleep(0.1)

In [30]:
print(len(failed_tickers))
print(failed_tickers)
#these tickers are publicly offered after 2023-01-01

29
['9560.T', '9552.T', '9348.T', '9336.T', '9229.T', '7795.T', '7389.T', '7388.T', '7163.T', '6526.T', '5838.T', '5832.T', '5831.T', '5830.T', '5258.T', '5255.T', '5253.T', '5246.T', '5244.T', '5243.T', '5240.T', '5032.T', '5027.T', '4892.T', '4891.T', '4890.T', '4206.T', '2936.T', '165A.T']


In [None]:
out_dir = "../../data/raw/japanstockes/1D"
os.makedirs(out_dir, exist_ok=True)

for symbol in jp_1261tickers:
    try:
        # 3. Download daily, auto_adjust=True gives you gap-free OHLC
        df = yf.download(
            symbol,
            start=start_date,
            end=end_date,
            interval=interval,
            auto_adjust=True,
            progress=False
        )
        if df.empty:
            raise ValueError("No data returned")

        # 4. Save to CSV, using interval in the filename
        fname = f"{symbol.replace('.T','')}-{interval}.csv"
        path = os.path.join(out_dir, fname)
        df.to_csv(path, index_label="Date")
        print(f"✅ Saved {fname} ({len(df)} rows)")

    except Exception as e:
        print(f"❌ {symbol}: {e}")
        failed_tickers.append(symbol)  # ← record the ticker

    sleep(0.1)

## Fetching Indices

In [None]:
indices_tickers = [
    # --- North America
    "^GSPC",   # S&P 500
    "^OEX",    # S&P 100                         <-- US large mega cap :contentReference[oaicite:0]{index=0}
    "^MID",    # S&P 400 MidCap                  <-- US mid cap :contentReference[oaicite:1]{index=1}
    "^SP600",  # S&P 600 SmallCap
    "^W5000",  # Wilshire 5000 Total Market      :contentReference[oaicite:2]{index=2}
    "^DJI",    # Dow Jones Industrial Average
    "^IXIC",   # Nasdaq Composite
    "^RUT",    # Russell 2000
    "^GSPTSE", # S&P/TSX Composite (Canada)      :contentReference[oaicite:3]{index=3}
    "^BVSP",   # Bovespa (Brazil)                :contentReference[oaicite:4]{index=4}
    # --- Europe
    "^FTSE",   # FTSE 100 (UK)
    "^GDAXI",  # DAX PERF (🇩🇪)
    "^FCHI",   # CAC 40 (🇫🇷)
    "^STOXX50E", # Euro STOXX 50 (EMU)          :contentReference[oaicite:5]{index=5}
    "FTSEMIB.MI",# FTSE MIB (Italy)             :contentReference[oaicite:6]{index=6}
    "^AEX",      # AEX (Netherlands)            :contentReference[oaicite:7]{index=7}
    "^IBEX",     # IBEX 35 (Spain)
    "^SSMI",     # Swiss SMI
    "^OMXS30",   # OMX Stockholm 30
    # --- Middle East & Africa
    "^TASI.SR",  # Tadawul All Share (Saudi)    :contentReference[oaicite:8]{index=8}
    "JN0U.FGI",  # FTSE/JSE Top 40 (ZA)         :contentReference[oaicite:9]{index=9}
    # --- Asia-Pacific
    "^N225",   # Nikkei 225
    "^TOPX",   # TOPIX broad market
    "^AXJO",  # S&P/ASX 200 (Australia)         :contentReference[oaicite:10]{index=10}
    "^KS11",  # KOSPI Composite (Korea)         :contentReference[oaicite:11]{index=11}
    "000001.SS",# Shanghai Composite (China)    :contentReference[oaicite:12]{index=12}
    "^BSESN", # BSE Sensex (India)             :contentReference[oaicite:13]{index=13}
    "^NSEI",  # Nifty 50 (India)
    "^HSI",   # Hang Seng (Hong Kong)
    "^STI",   # Straits Times (Singapore)
    "^AXJO",  # ASX 200 already listed—duplicate OK if you want to overwrite
]

In [None]:


out_dir = "../../data/raw/indices/1D"
os.makedirs(out_dir, exist_ok=True)     

for symbol in indices_tickers:
    try:
        # 3. Download daily, auto_adjust=True gives you gap-free OHLC
        df = yf.download(
            symbol,
            start=start_date,
            end=end_date,
            interval=interval,
            auto_adjust=True,
            progress=False
        )
        if df.empty:
            raise ValueError("No data returned")

        # 4. Save to CSV, using interval in the filename
        fname = f"{symbol.replace('.T','')}-{interval}.csv"
        path = os.path.join(out_dir, fname)
        df.to_csv(path, index_label="Date")
        print(f"✅ Saved {fname} ({len(df)} rows)")

    except Exception as e:
        print(f"❌ {symbol}: {e}")
        failed_tickers.append(symbol)  # ← record the ticker

    sleep(0.1)


In [23]:
print(f"Failed tickers: {failed_tickers}")

Failed tickers: ['9348.T', '9235.T', '9168.T', '9166.T', '9164.T', '9158.T', '9023.T', '7163.T', '6525.T', '5870.T', '5842.T', '5838.T', '5595.T', '5590.T', '5574.T', '5258.T', '5255.T', '5253.T', '5027.T', '4896.T', '4894.T', '278A.T', '277A.T', '268A.T', '264A.T', '262A.T', '260A.T', '256A.T', '254A.T', '247A.T', '219A.T', '218A.T', '215A.T', '206A.T', '198A.T', '197A.T', '190A.T', '186A.T', '166A.T', '165A.T', '156A.T', '153A.T', '147A.T', '141A.T', '^OMXS30', 'JN0U.FGI']


##### France stocks

In [40]:
# adjust the path if needed
csv_path_fr = "../../france_31.csv"
france_31 = pd.read_csv(csv_path_fr, header=None,skiprows=1, usecols=[0])[0].tolist()
# replace dots with dashes (e.g. BRK.B → BRK-B)
interval="1d"
print(france_31[:5])
print(len(france_31))

['STLAP.PA', 'ENGI.PA', 'TTE.PA', 'CS.PA', 'SAN.PA']
31


In [None]:
out_dir = "../../data/raw/france/1D"
os.makedirs(out_dir, exist_ok=True)     

for symbol in france_31:
    try:
        # 3. Download daily, auto_adjust=True gives you gap-free OHLC
        df = yf.download(
            symbol,
            start=start_date,
            end=end_date,
            interval=interval,
            auto_adjust=True,
            progress=False
        )
        if df.empty:
            raise ValueError("No data returned")

        # 4. Save to CSV, using interval in the filename
        fname = f"{symbol.replace('.T','')}-{interval}.csv"
        path = os.path.join(out_dir, fname)
        df.to_csv(path, index_label="Date")
        print(f"✅ Saved {fname} ({len(df)} rows)")

    except Exception as e:
        print(f"❌ {symbol}: {e}")
        failed_tickers.append(symbol)  # ← record the ticker

    sleep(0.1)


In [None]:
out_dir = "../../data/raw/france/1h"
os.makedirs(out_dir, exist_ok=True)     
interval="1h"
for symbol in france_31:
    try:
        # 3. Download daily, auto_adjust=True gives you gap-free OHLC
        df = yf.download(
            symbol,
            period="730d",  # Yahoo's limit for intraday
            interval=interval,
            auto_adjust=True,
            progress=False
        )
        if df.empty:
            raise ValueError("No data returned")

        # 4. Save to CSV, using interval in the filename
        fname = f"{symbol.replace('.T','')}-{interval}.csv"
        path = os.path.join(out_dir, fname)
        df.to_csv(path, index_label="Date")
        print(f"✅ Saved {fname} ({len(df)} rows)")

    except Exception as e:
        print(f"❌ {symbol}: {e}")
        failed_tickers.append(symbol)  # ← record the ticker

    sleep(0.1)


### UK stocks

In [None]:
# adjust the path if needed
csv_path_uk = "../../uk.csv"
uk_ticker = pd.read_csv(csv_path_uk, header=None,skiprows=1, usecols=[0])[0].tolist()
# replace dots with dashes (e.g. BRK.B → BRK-B)

print(uk_ticker[:5])
print(len(uk_ticker))

['AZN.L', 'AZNL.XC', 'HSBA.L', 'HSBAL.XC', 'SHEL.L']
199


In [None]:
out_dir = "../../data/raw/uk/1D"
os.makedirs(out_dir, exist_ok=True)     
interval="1d"
failed_tickers = []                # ← collect failures here
for symbol in uk_ticker:
    try:
        # 3. Download daily, auto_adjust=True gives you gap-free OHLC
        df = yf.download(
            symbol,
            start=start_date,
            end=end_date,
            interval=interval,
            auto_adjust=True,
        )
        if df.empty:
            raise ValueError("No data returned")

        # 4. Save to CSV, using interval in the filename
        fname = f"{symbol.replace('.T','')}-{interval}.csv"
        path = os.path.join(out_dir, fname)
        df.to_csv(path, index_label="Date")
        print(f"✅ Saved {fname} ({len(df)} rows)")

    except Exception as e:
        print(f"❌ {symbol}: {e}")
        failed_tickers.append(symbol)  # ← record the ticker

    sleep(0.1)


In [None]:
out_dir = "../../data/raw/uk/1h"
os.makedirs(out_dir, exist_ok=True)     
interval="1h"
failed_tickers = []                # ← collect failures here
for symbol in uk_ticker:
    try:
        # 3. Download daily, auto_adjust=True gives you gap-free OHLC
        df = yf.download(
            symbol,
            period="730d",  # Yahoo's limit for intraday
            interval=interval,
            auto_adjust=True,
        )
        if df.empty:
            raise ValueError("No data returned")

        # 4. Save to CSV, using interval in the filename
        fname = f"{symbol.replace('.T','')}-{interval}.csv"
        path = os.path.join(out_dir, fname)
        df.to_csv(path, index_label="Date")
        print(f"✅ Saved {fname} ({len(df)} rows)")

    except Exception as e:
        print(f"❌ {symbol}: {e}")
        failed_tickers.append(symbol)  # ← record the ticker

    sleep(0.1)
