In [24]:
#!pip install yfinance

In [25]:
import yfinance as yf
import pandas as pd
import os

### Define the Tickers and Period

In [26]:
tickers = {
    "SP500": "^GSPC",
    "DOW30": "^DJI",
    "EuroStoxx50": "^STOXX50E",
    "NASDAQ": "^IXIC",
    "CrudeOil": "CL=F",  # Futures contract for Crude Oil
    "SSE": "000001.SS",  # Shanghai Composite Index
    "Gold": "GC=F",  # Futures contract for Gold
    "VIX": "^VIX",
    "Nikkei225": "^N225",
    "FTSE100": "^FTSE",
    "EURUSD": "EURUSD=X",
    "CNYUSD": "CNYUSD=X",
    "GBPUSD": "GBPUSD=X",
    "JPYUSD": "JPYUSD=X",
    "CHFUSD": "CHFUSD=X"
    
}

### Fetch the Data

In [27]:
for name, ticker in tickers.items():
    print(f"Downloading {name} data...")
    data = yf.download(ticker, period="2y", interval="1h")
    filename = f"{name}_data.csv"
    data.to_csv(filename)
    print(f"{name} data saved to {filename}")


Downloading SP500 data...


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


SP500 data saved to SP500_data.csv
Downloading DOW30 data...
DOW30 data saved to DOW30_data.csv
Downloading EuroStoxx50 data...


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

EuroStoxx50 data saved to EuroStoxx50_data.csv
Downloading NASDAQ data...





NASDAQ data saved to NASDAQ_data.csv
Downloading CrudeOil data...


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

CrudeOil data saved to CrudeOil_data.csv
Downloading SSE data...
SSE data saved to SSE_data.csv
Downloading Gold data...



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Gold data saved to Gold_data.csv
Downloading VIX data...



[*********************100%%**********************]  1 of 1 completed

VIX data saved to VIX_data.csv
Downloading Nikkei225 data...
Nikkei225 data saved to Nikkei225_data.csv
Downloading FTSE100 data...



[*********************100%%**********************]  1 of 1 completed


FTSE100 data saved to FTSE100_data.csv
Downloading EURUSD data...


[*********************100%%**********************]  1 of 1 completed


EURUSD data saved to EURUSD_data.csv
Downloading CNYUSD data...


[*********************100%%**********************]  1 of 1 completed


CNYUSD data saved to CNYUSD_data.csv
Downloading GBPUSD data...


[*********************100%%**********************]  1 of 1 completed


GBPUSD data saved to GBPUSD_data.csv
Downloading JPYUSD data...


[*********************100%%**********************]  1 of 1 completed


JPYUSD data saved to JPYUSD_data.csv
Downloading CHFUSD data...


[*********************100%%**********************]  1 of 1 completed

CHFUSD data saved to CHFUSD_data.csv





In [28]:
# List of dataset names as defined in the previous step
datasets = [
    "SP500",
    "DOW30",
    "EuroStoxx50",
    "NASDAQ",
    "CrudeOil",
    "SSE",
    "Gold",
    "VIX",
    "Nikkei225",
    "FTSE100",
    "EURUSD",
    "CNYUSD",
    "GBPUSD"
    "JPYUSD",
    "CHFUSD"
]

for name in datasets:
    filename = f"{name}_data.csv"
    try:
        data = pd.read_csv(filename)
        print(f"{name} dataset contains {len(data)} rows.")
    except FileNotFoundError:
        print(f"File {filename} not found. Please ensure the data is downloaded and saved correctly.")

SP500 dataset contains 3495 rows.
DOW30 dataset contains 3495 rows.
EuroStoxx50 dataset contains 4536 rows.
NASDAQ dataset contains 3495 rows.
CrudeOil dataset contains 11165 rows.
SSE dataset contains 2946 rows.
Gold dataset contains 11500 rows.
VIX dataset contains 6822 rows.
Nikkei225 dataset contains 2936 rows.
FTSE100 dataset contains 4500 rows.
EURUSD dataset contains 12369 rows.
CNYUSD dataset contains 10025 rows.
File GBPUSDJPYUSD_data.csv not found. Please ensure the data is downloaded and saved correctly.
CHFUSD dataset contains 12308 rows.


### Process the Data

In [29]:
datasets = [
    "SP500",
    "DOW30",
    "EuroStoxx50",
    "NASDAQ",
    "CrudeOil",
    "SSE",
    "Gold",
    "VIX",
    "Nikkei225",
    "FTSE100",
    "EURUSD",
    "CNYUSD",
    "GBPUSD",
    "JPYUSD",
    "CHFUSD"
]

dataframes = []  # List to store each dataframe

for name in datasets:
    filename = f"{name}_data.csv"
    try:
        # Load the dataset
        df = pd.read_csv(filename, index_col='Datetime', parse_dates=True)
        # Keep only the 'Close' column and rename it to the dataset name
        df = df[['Close']].rename(columns={'Close': name})
        dataframes.append(df)
    except FileNotFoundError:
        print(f"File {filename} not found. Please ensure the data is downloaded and saved correctly.")

In [30]:
# Use the functools.reduce function to iteratively merge dataframes on the index
from functools import reduce

# Merge all dataframes on the datetime index
merged_df = reduce(lambda left, right: pd.merge(left, right, left_index=True, right_index=True, how='outer'), dataframes)

# Sort the dataframe by datetime index just in case
merged_df.sort_index(inplace=True)

### Save or Use the Data

In [32]:
merged_df.to_csv('raw_data.csv')

### Automating(incomplete)

In [None]:
import schedule
import time

def fetch_data():
    # Include the steps to fetch data here (Steps 3-6)
    print("Fetching data...")

# Schedule the task
schedule.every().day.at("09:00").do(fetch_data)  # Example: every day at 9:00 AM

# Run the scheduler
while True:
    schedule.run_pending()
    time.sleep(1)