# Collect Data for Test 1
- YFinance Data
- Weekdays (int)
- Month (int)
- Trend (int)
- Buy or Sell

In [1]:
import pandas as pd
from datetime import datetime
import yfinance as yf
from functools import reduce
import os

In [2]:
def get_finance_data(symbols: list, period: str = "max", start: str = None, end: str = None, interval: str = "1d") -> pd.DataFrame:
    """
    Args:
        symbols (list): 
            List of Symbols, whitch we want to get data for
        period (str, optional): 
            Valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
            Either Use period parameter or use start and end. Defaults to "max".
        start (str, optional): 
            start date in YYYY-MM-DD format. 
            Defaults to None.
        end (str, optional): 
            end date in YYYY-MM-DD format. 
            Defaults to None.
        interval (str, optional): 
            Valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
            Intraday data cannot extend last 60 days. 
            Defaults to "1d".

    Returns:
        pd.DataFrame: Contains the optimized finance data for the given symbols grouped by column
    """
    # Fetch data
    df = yf.download(tickers=symbols, start=start, end=end, period=period, interval=interval)
    
    df.reset_index(inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    # Add Month and Day information
    df["month"] = df['Date'].dt.strftime('%m').astype(int)
    df['weekday'] = df['Date'].dt.dayofweek.astype(int)
    
    # return data
    os.makedirs("Test4-Raw Material/Data", exist_ok=True)
    return df

In [3]:
def determine_trend(row) -> int:
    """
    Summary:
        Determine if the Close is higher than the Open (last day Close)
    Args:
        row (_type_): Pandas Dataframe row

    Returns:
        int: -1 if Trend is going down and 1 if Trend is going up
    """
    print(type(row))
    return -1 if row['^GDAXI_Open'] > row['^GDAXI_Close'] else 1

## Training Data

In [None]:
os.makedirs("../Data", exist_ok=True)
symbols = ["^GDAXI", "GC=F", "BZ=F"]
results = []

# Fetch financial data for each symbol and store in results
for symbol in symbols:
    df = get_finance_data(symbols=symbol, interval="1d", start=datetime(2015, 1, 1), end=datetime(2022, 12, 31))
    # Add a prefix based on the symbol for each column (except the date column)
    df = df.add_prefix(f"{symbol}_").rename(columns={f"{symbol}_Date": "Date"})
    df["Symbol"] = symbol
    results.append(df)

# Merge all DataFrames on the "Date" column using reduce to iterate over all results
df = reduce(lambda left, right: pd.merge(left, right, on="Date", how="inner"), results)

df = df.dropna(subset=[col for col in df.columns])


df["Y"] = None
for index, row in df.iterrows():
    if index > 0:
        df.at[index-1, "Y"] = df.at[index, "^GDAXI_Close"]
df.drop(df.tail(1).index,inplace=True)
       
df["^GDAXI_month"] = df["^GDAXI_month"].astype(int)
df.to_pickle("../Data/train_dax_data.pkl")
display(df)

## Test Data

In [None]:
symbols = ["^GDAXI", "GC=F", "BZ=F"]
results = []

# Fetch financial data for each symbol and store in results
for symbol in symbols:
    df = get_finance_data(symbols=symbol, interval="1d", start=datetime(2022, 10, 1), end=datetime(2023, 12, 31))
    # Add a prefix based on the symbol for each column (except the date column)
    df = df.add_prefix(f"{symbol}_").rename(columns={f"{symbol}_Date": "Date"})
    df["Symbol"] = symbol
    results.append(df)

# Merge all DataFrames on the "Date" column using reduce to iterate over all results
df = reduce(lambda left, right: pd.merge(left, right, on="Date", how="inner"), results)

df = df.dropna(subset=[col for col in df.columns])

    
df["Y"] = None
for index, row in df.iterrows():
    if index > 0:
        df.at[index-1, "Y"] = df.at[index, "^GDAXI_Close"]
df.drop(df.tail(1).index,inplace=True)
       
df["^GDAXI_month"] = df["^GDAXI_month"].astype(int)
df.to_pickle("../Data/test_dax_data.pkl")
display(df)