In [28]:
!pip install yfinance




[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [29]:
import yfinance as yf
import pandas as pd
import os

### Define the Tickers and Period

In [30]:
tickers = {
    "SP500": "^GSPC",
    "DOW30": "^DJI",
    "EuroStoxx50": "^STOXX50E",
    "NASDAQ": "^IXIC",
    "CrudeOil": "CL=F",  # Futures contract for Crude Oil
    "SSE": "000001.SS",  # Shanghai Composite Index
    "Gold": "GC=F",  # Futures contract for Gold
    "VIX": "^VIX",
    "Nikkei225": "^N225",
    "FTSE100": "^FTSE",
    "EURUSD": "EURUSD=X"
}

### Fetch the Data

In [31]:
for name, ticker in tickers.items():
    print(f"Downloading {name} data...")
    data = yf.download(ticker, period="2y", interval="1h")
    filename = f"{name}_data.csv"
    data.to_csv(filename)
    print(f"{name} data saved to {filename}")


Downloading SP500 data...


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

SP500 data saved to SP500_data.csv
Downloading DOW30 data...
DOW30 data saved to DOW30_data.csv
Downloading EuroStoxx50 data...



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


EuroStoxx50 data saved to EuroStoxx50_data.csv
Downloading NASDAQ data...
NASDAQ data saved to NASDAQ_data.csv
Downloading CrudeOil data...


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

CrudeOil data saved to CrudeOil_data.csv
Downloading SSE data...
SSE data saved to SSE_data.csv
Downloading Gold data...



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Gold data saved to Gold_data.csv
Downloading VIX data...
VIX data saved to VIX_data.csv
Downloading Nikkei225 data...



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Nikkei225 data saved to Nikkei225_data.csv
Downloading FTSE100 data...
FTSE100 data saved to FTSE100_data.csv
Downloading EURUSD data...


[*********************100%%**********************]  1 of 1 completed

EURUSD data saved to EURUSD_data.csv





In [32]:
# List of dataset names as defined in the previous step
datasets = [
    "SP500",
    "DOW30",
    "EuroStoxx50",
    "NASDAQ",
    "CrudeOil",
    "SSE",
    "Gold",
    "VIX",
    "Nikkei225",
    "FTSE100",
    "EURUSD"
]

for name in datasets:
    filename = f"{name}_data.csv"
    try:
        data = pd.read_csv(filename)
        print(f"{name} dataset contains {len(data)} rows.")
    except FileNotFoundError:
        print(f"File {filename} not found. Please ensure the data is downloaded and saved correctly.")

SP500 dataset contains 3495 rows.
DOW30 dataset contains 3495 rows.
EuroStoxx50 dataset contains 4536 rows.
NASDAQ dataset contains 3495 rows.
CrudeOil dataset contains 11169 rows.
SSE dataset contains 2945 rows.
Gold dataset contains 11504 rows.
VIX dataset contains 6817 rows.
Nikkei225 dataset contains 2940 rows.
FTSE100 dataset contains 4500 rows.
EURUSD dataset contains 12372 rows.


### Process the Data

In [33]:
datasets = [
    "SP500",
    "DOW30",
    "EuroStoxx50",
    "NASDAQ",
    "CrudeOil",
    "SSE",
    "Gold",
    "VIX",
    "Nikkei225",
    "FTSE100",
    "EURUSD"
]

dataframes = []  # List to store each dataframe

for name in datasets:
    filename = f"{name}_data.csv"
    try:
        # Load the dataset
        df = pd.read_csv(filename, index_col='Datetime', parse_dates=True)
        # Keep only the 'Close' column and rename it to the dataset name
        df = df[['Close']].rename(columns={'Close': name})
        dataframes.append(df)
    except FileNotFoundError:
        print(f"File {filename} not found. Please ensure the data is downloaded and saved correctly.")

In [34]:
# Use the functools.reduce function to iteratively merge dataframes on the index
from functools import reduce

# Merge all dataframes on the datetime index
merged_df = reduce(lambda left, right: pd.merge(left, right, left_index=True, right_index=True, how='outer'), dataframes)

# Sort the dataframe by datetime index just in case
merged_df.sort_index(inplace=True)

### Save or Use the Data

In [35]:
merged_df.to_csv('raw_data.csv')

### Automating(incomplete)

In [36]:
import schedule
import time

def fetch_data():
    # Include the steps to fetch data here (Steps 3-6)
    print("Fetching data...")

# Schedule the task
schedule.every().day.at("09:00").do(fetch_data)  # Example: every day at 9:00 AM

# Run the scheduler
while True:
    schedule.run_pending()
    time.sleep(1)

ModuleNotFoundError: No module named 'schedule'