# Collect Data for Test 4
- YFinance Data
- Weekdays (int)
- Month (int)
- Trend (int)
- Buy or Sell

In [1]:
import pandas as pd
from datetime import datetime
import yfinance as yf
from functools import reduce
import os


In [2]:
def get_finance_data(symbols: list, period: str = "max", start: str = None, end: str = None, interval: str = "1d") -> pd.DataFrame:
    """
    Args:
        symbols (list): 
            List of Symbols, whitch we want to get data for
        period (str, optional): 
            Valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
            Either Use period parameter or use start and end. Defaults to "max".
        start (str, optional): 
            start date in YYYY-MM-DD format. 
            Defaults to None.
        end (str, optional): 
            end date in YYYY-MM-DD format. 
            Defaults to None.
        interval (str, optional): 
            Valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
            Intraday data cannot extend last 60 days. 
            Defaults to "1d".

    Returns:
        pd.DataFrame: Contains the optimized finance data for the given symbols grouped by column
    """
    # Fetch data
    df = yf.download(tickers=symbols, start=start, end=end, period=period, interval=interval)
    
    df.reset_index(inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    # Add Month and Day information
    df["month"] = df['Date'].dt.strftime('%m').astype(int)
    df['weekday'] = df['Date'].dt.dayofweek.astype(int)
    
    # return data
    os.makedirs("Test4-Raw Material/Data", exist_ok=True)
    return df

In [3]:
def determine_trend(row) -> int:
    """
    Summary:
        Determine if the Close is higher than the Open (last day Close)
    Args:
        row (_type_): Pandas Dataframe row

    Returns:
        int: -1 if Trend is going down and 1 if Trend is going up
    """
    print(type(row))
    return -1 if row['^GDAXI_Open'] > row['^GDAXI_Close'] else 1

## Training Data

In [4]:
os.makedirs('../Data', exist_ok=True)
symbols = ["^GDAXI", "GC=F", "BZ=F"]
results = []

# Fetch financial data for each symbol and store in results
for symbol in symbols:
    df = get_finance_data(symbols=symbol, interval="1d", start=datetime(2015, 1, 1), end=datetime(2022, 12, 31))
    # Add a prefix based on the symbol for each column (except the date column)
    df = df.add_prefix(f"{symbol}_").rename(columns={f"{symbol}_Date": "Date"})
    df["Symbol"] = symbol
    results.append(df)

# Merge all DataFrames on the "Date" column using reduce to iterate over all results
df = reduce(lambda left, right: pd.merge(left, right, on="Date", how="inner"), results)

df = df.dropna(subset=[col for col in df.columns])

df["^GDAXI_trend"] = df.apply(determine_trend, axis=1)

for index, row in df.iterrows():
    if index > 0: 
        if df.at[index, "^GDAXI_trend"] == 1:
            df.at[index-1, "Invest"] = True
        else:
            df.at[index-1, "Invest"] = False

df["Invest"] = df["Invest"].astype(bool)
df.to_pickle("../Data/train_dax_data.pkl")
display(df)


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.S

Unnamed: 0,Date,^GDAXI_Open,^GDAXI_High,^GDAXI_Low,^GDAXI_Close,^GDAXI_Adj Close,^GDAXI_Volume,^GDAXI_month,^GDAXI_weekday,Symbol_x,...,BZ=F_High,BZ=F_Low,BZ=F_Close,BZ=F_Adj Close,BZ=F_Volume,BZ=F_month,BZ=F_weekday,Symbol,^GDAXI_trend,Invest
0,2015-01-02,9869.129883,9879.530273,9687.259766,9764.730469,9764.730469,67673900,1,4,^GDAXI,...,58.220001,55.520000,56.419998,56.419998,16707,1,4,BZ=F,-1,False
1,2015-01-05,9735.650391,9790.269531,9468.580078,9473.160156,9473.160156,105538300,1,0,^GDAXI,...,56.290001,52.669998,53.110001,53.110001,30065,1,0,BZ=F,-1,False
2,2015-01-06,9484.250000,9624.650391,9382.820312,9469.660156,9469.660156,96812300,1,1,^GDAXI,...,53.520000,50.529999,51.099998,51.099998,35494,1,1,BZ=F,-1,True
3,2015-01-07,9510.339844,9592.370117,9459.179688,9518.179688,9518.179688,82466600,1,2,^GDAXI,...,51.840000,49.680000,51.150002,51.150002,37082,1,2,BZ=F,1,True
4,2015-01-08,9643.769531,9855.429688,9607.900391,9837.610352,9837.610352,114825000,1,3,^GDAXI,...,51.889999,49.820000,50.959999,50.959999,29469,1,3,BZ=F,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1970,2022-12-23,13945.589844,14000.679688,13874.500000,13940.929688,13940.929688,28738700,12,4,^GDAXI,...,84.370003,81.339996,83.919998,83.919998,8621,12,4,BZ=F,-1,False
1971,2022-12-27,14047.419922,14063.139648,13966.349609,13995.099609,13995.099609,22975000,12,1,^GDAXI,...,85.669998,83.660004,84.330002,84.330002,7512,12,1,BZ=F,-1,False
1972,2022-12-28,14013.719727,14018.469727,13914.620117,13925.599609,13925.599609,27583800,12,2,^GDAXI,...,84.639999,81.949997,83.260002,83.260002,5384,12,2,BZ=F,-1,True
1973,2022-12-29,13890.809570,14071.719727,13871.320312,14071.719727,14071.719727,30727400,12,3,^GDAXI,...,82.910004,81.300003,82.260002,82.260002,20599,12,3,BZ=F,1,False


## Test Data

In [5]:
os.makedirs('../Data', exist_ok=True)
symbols = ["^GDAXI", "GC=F", "BZ=F"]
results = []

# Fetch financial data for each symbol and store in results
for symbol in symbols:
    df = get_finance_data(symbols=symbol, interval="1d", start=datetime(2023, 1, 1), end=datetime(2023, 12, 31))
    # Add a prefix based on the symbol for each column (except the date column)
    df = df.add_prefix(f"{symbol}_").rename(columns={f"{symbol}_Date": "Date"})
    df["Symbol"] = symbol
    results.append(df)

# Merge all DataFrames on the "Date" column using reduce to iterate over all results
df = reduce(lambda left, right: pd.merge(left, right, on="Date", how="inner"), results)

df = df.dropna(subset=[col for col in df.columns])

df["^GDAXI_trend"] = df.apply(determine_trend, axis=1)

for index, row in df.iterrows():
    if index > 0: 
        if df.at[index, "^GDAXI_trend"] == 1:
            df.at[index-1, "Invest"] = True
        else:
            df.at[index-1, "Invest"] = False
df["Invest"] = df["Invest"].astype(bool)           
df.to_pickle("../Data/test_dax_data.pklre")
display(df)


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.S

Unnamed: 0,Date,^GDAXI_Open,^GDAXI_High,^GDAXI_Low,^GDAXI_Close,^GDAXI_Adj Close,^GDAXI_Volume,^GDAXI_month,^GDAXI_weekday,Symbol_x,...,BZ=F_High,BZ=F_Low,BZ=F_Close,BZ=F_Adj Close,BZ=F_Volume,BZ=F_month,BZ=F_weekday,Symbol,^GDAXI_trend,Invest
0,2023-01-03,14116.070312,14293.870117,14083.759766,14181.669922,14181.669922,57907500,1,1,^GDAXI,...,87.019997,81.769997,82.099998,82.099998,27559,1,1,BZ=F,1,True
1,2023-01-04,14266.980469,14492.780273,14264.599609,14490.780273,14490.780273,76221600,1,2,^GDAXI,...,82.669998,77.720001,77.839996,77.839996,24772,1,2,BZ=F,1,False
2,2023-01-05,14451.230469,14501.599609,14403.730469,14436.309570,14436.309570,60947500,1,3,^GDAXI,...,79.959999,77.610001,78.690002,78.690002,28051,1,3,BZ=F,-1,True
3,2023-01-06,14476.719727,14610.230469,14388.980469,14610.019531,14610.019531,55433700,1,4,^GDAXI,...,80.570000,78.050003,78.570000,78.570000,23767,1,4,BZ=F,1,True
4,2023-01-09,14650.730469,14832.889648,14628.610352,14792.830078,14792.830078,64976500,1,0,^GDAXI,...,81.370003,78.339996,79.650002,79.650002,29985,1,0,BZ=F,1,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
242,2023-12-21,16667.310547,16708.349609,16624.160156,16687.419922,16687.419922,57871300,12,3,^GDAXI,...,80.120003,77.849998,79.389999,79.389999,22237,12,3,BZ=F,1,True
243,2023-12-22,16673.300781,16735.320312,16651.779297,16706.179688,16706.179688,46295300,12,4,^GDAXI,...,80.370003,78.830002,79.070000,79.070000,12334,12,4,BZ=F,1,True
244,2023-12-27,16727.769531,16775.710938,16697.580078,16742.070312,16742.070312,37678900,12,2,^GDAXI,...,81.320000,79.489998,79.650002,79.650002,8282,12,2,BZ=F,1,False
245,2023-12-28,16780.949219,16783.789062,16688.519531,16701.550781,16701.550781,36091600,12,3,^GDAXI,...,79.959999,78.339996,78.389999,78.389999,24301,12,3,BZ=F,-1,True
