In [1]:
import pandas as pd
import datetime
import requests


In [2]:
mystocks = ['BZ=F', 'SB=F', 'DX-Y.NYB']  # Brent Crude Oil, WTI Crude Oil, U.S. Dollar Index
historical_data_dict = {}


In [3]:
def get_unix_timestamp(years_ago=15):
    end_date = datetime.datetime.now()
    start_date = end_date - datetime.timedelta(days=365 * years_ago)
    return int(start_date.timestamp()), int(end_date.timestamp())


In [4]:
def fetch_historical_data(symbol, period1, period2, frequency='1d'):
    url = f'https://query1.finance.yahoo.com/v8/finance/chart/{symbol}?period1={period1}&period2={period2}&interval={frequency}'
    headers = {'User-Agent': 'Mozilla/5.0'}

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
    except requests.exceptions.HTTPError as errh:
        print(f"HTTP Error for {symbol}: {errh}")
        return None
    except requests.exceptions.ConnectionError as errc:
        print(f"Connection Error for {symbol}: {errc}")
        return None
    except requests.exceptions.Timeout as errt:
        print(f"Timeout Error for {symbol}: {errt}")
        return None
    except requests.exceptions.RequestException as err:
        print(f"Request Exception for {symbol}: {err}")
        return None

    data = response.json()

    try:
        timestamps = data['chart']['result'][0]['timestamp']
        indicators = data['chart']['result'][0]['indicators']['quote'][0]
    except (KeyError, IndexError, TypeError) as e:
        print(f"Error parsing data for {symbol}: {e}")
        return None

    # Convert timestamps to datetime
    dates = [datetime.datetime.fromtimestamp(ts) for ts in timestamps]

    # Extract OHLCV data
    historical_data = {
        'Date': dates,
        'Open': indicators.get('open', []),
        'High': indicators.get('high', []),
        'Low': indicators.get('low', []),
        'Close': indicators.get('close', []),
        'Volume': indicators.get('volume', [])
    }

    # Create a DataFrame
    df = pd.DataFrame(historical_data)

    # Handle missing data by dropping rows with NaN values
    df.dropna(inplace=True)

    return df


In [5]:
# Compute Unix timestamps once
period1, period2 = get_unix_timestamp(years_ago=15)

for symbol in mystocks:
    historical_df = fetch_historical_data(symbol, period1, period2, frequency='1d')
    if historical_df is not None:
        # Ensure 'Date' column is datetime and sorted
        historical_df['Date'] = pd.to_datetime(historical_df['Date'])
        historical_df.sort_values('Date', inplace=True)
        historical_df.reset_index(drop=True, inplace=True)

        # Store the DataFrame in the dictionary
        historical_data_dict[symbol] = historical_df
        
        # Save to CSV
        csv_filename = f"{symbol}_15year_daily_data.csv"
        historical_df.to_csv(csv_filename, index=False)
        print(f"Data for {symbol} saved to {csv_filename}")
    else:
        print(f"Failed to retrieve historical data for {symbol}")


Data for BZ=F saved to BZ=F_15year_daily_data.csv
Data for SB=F saved to SB=F_15year_daily_data.csv
Data for DX-Y.NYB saved to DX-Y.NYB_15year_daily_data.csv


In [6]:
for symbol, df in historical_data_dict.items():
    print(f"\nFirst 5 rows for {symbol}:")
    display(df.head())



First 5 rows for BZ=F:


Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2010-05-12 12:00:00,81.349998,81.690002,80.519997,81.199997,57.0
1,2010-05-13 12:00:00,80.110001,80.110001,79.910004,80.110001,1.0
2,2010-05-14 12:00:00,77.18,77.18,77.18,77.18,230.0
3,2010-05-17 12:00:00,77.629997,77.629997,75.07,75.099998,20.0
4,2010-05-18 12:00:00,76.489998,76.489998,74.050003,74.43,32.0



First 5 rows for SB=F:


Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2010-05-12 12:00:00,13.88,14.7,13.77,14.67,52821.0
1,2010-05-13 12:00:00,14.61,14.82,14.45,14.66,37461.0
2,2010-05-14 12:00:00,14.67,14.73,13.88,14.13,44535.0
3,2010-05-17 12:00:00,14.03,14.53,13.81,13.89,39107.0
4,2010-05-18 12:00:00,13.95,14.83,13.95,14.8,58211.0



First 5 rows for DX-Y.NYB:


Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2010-05-12 12:00:00,84.709999,84.93,84.209999,84.830002,0.0
1,2010-05-13 12:00:00,84.800003,85.459999,84.540001,85.209999,0.0
2,2010-05-14 12:00:00,85.360001,86.290001,85.199997,86.099998,0.0
3,2010-05-17 12:00:00,86.209999,87.059998,86.050003,86.209999,0.0
4,2010-05-18 12:00:00,86.220001,87.379997,85.910004,87.160004,0.0
