# Collect Data for Test 4
- YFinance Data
- Weekdays (int)
- Month (int)
- Trend (int)
- Buy or Sell

In [70]:
import pandas as pd
from datetime import datetime
import yfinance as yf
from functools import reduce
import os


In [71]:
def get_finance_data(symbols: list, period: str = "max", start: str = None, end: str = None, interval: str = "1d") -> pd.DataFrame:
    """
    Args:
        symbols (list): 
            List of Symbols, whitch we want to get data for
        period (str, optional): 
            Valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
            Either Use period parameter or use start and end. Defaults to "max".
        start (str, optional): 
            start date in YYYY-MM-DD format. 
            Defaults to None.
        end (str, optional): 
            end date in YYYY-MM-DD format. 
            Defaults to None.
        interval (str, optional): 
            Valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
            Intraday data cannot extend last 60 days. 
            Defaults to "1d".

    Returns:
        pd.DataFrame: Contains the optimized finance data for the given symbols grouped by column
    """
    # Fetch data
    df = yf.download(tickers=symbols, start=start, end=end, period=period, interval=interval)
    
    df.reset_index(inplace=True)
    df['Date'] = pd.to_datetime(df['Date'])
    # Add Month and Day information
    df["month"] = df['Date'].dt.strftime('%m').astype(int)
    df['weekday'] = df['Date'].dt.dayofweek.astype(int)
    
    # return data
    if not os.path.exists("Test4-Raw Material/Data"):
        os.mkdir("Test4-Raw Material/Data")
    return df

In [72]:
def determine_trend(row) -> int:
    """
    Summary:
        Determine if the Close is higher than the Open (last day Close)
    Args:
        row (_type_): Pandas Dataframe row

    Returns:
        int: -1 if Trend is going down and 1 if Trend is going up
    """
    print(type(row))
    return -1 if row['^GDAXI_Open'] > row['^GDAXI_Close'] else 1

## Training Data

In [73]:
symbols = ["^GDAXI", "GC=F", "BZ=F"]
results = []

# Fetch financial data for each symbol and store in results
for symbol in symbols:
    df = get_finance_data(symbols=symbol, interval="1d", start=datetime(2015, 1, 1), end=datetime(2022, 12, 31))
    # Add a prefix based on the symbol for each column (except the date column)
    df = df.add_prefix(f"{symbol}_").rename(columns={f"{symbol}_Date": "Date"})
    df["Symbol"] = symbol
    results.append(df)

# Merge all DataFrames on the "Date" column using reduce to iterate over all results
df = reduce(lambda left, right: pd.merge(left, right, on="Date", how="outer"), results)

df = df.dropna(subset=[col for col in df.columns if "^GDAXI" in col])

df["^GDAXI_trend"] = df.apply(determine_trend, axis=1)

df.to_pickle("Test4-Raw Material/Data/train_dax_data.pkl")

display(df)


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.S




Unnamed: 0,Date,^GDAXI_Open,^GDAXI_High,^GDAXI_Low,^GDAXI_Close,^GDAXI_Adj Close,^GDAXI_Volume,^GDAXI_month,^GDAXI_weekday,Symbol_x,...,BZ=F_Open,BZ=F_High,BZ=F_Low,BZ=F_Close,BZ=F_Adj Close,BZ=F_Volume,BZ=F_month,BZ=F_weekday,Symbol,^GDAXI_trend
0,2015-01-02,9869.129883,9879.530273,9687.259766,9764.730469,9764.730469,67673900.0,1.0,4.0,^GDAXI,...,57.630001,58.220001,55.520000,56.419998,56.419998,16707.0,1.0,4.0,BZ=F,-1
1,2015-01-05,9735.650391,9790.269531,9468.580078,9473.160156,9473.160156,105538300.0,1.0,0.0,^GDAXI,...,56.290001,56.290001,52.669998,53.110001,53.110001,30065.0,1.0,0.0,BZ=F,-1
2,2015-01-06,9484.250000,9624.650391,9382.820312,9469.660156,9469.660156,96812300.0,1.0,1.0,^GDAXI,...,53.230000,53.520000,50.529999,51.099998,51.099998,35494.0,1.0,1.0,BZ=F,-1
3,2015-01-07,9510.339844,9592.370117,9459.179688,9518.179688,9518.179688,82466600.0,1.0,2.0,^GDAXI,...,51.060001,51.840000,49.680000,51.150002,51.150002,37082.0,1.0,2.0,BZ=F,1
4,2015-01-08,9643.769531,9855.429688,9607.900391,9837.610352,9837.610352,114825000.0,1.0,3.0,^GDAXI,...,51.000000,51.889999,49.820000,50.959999,50.959999,29469.0,1.0,3.0,BZ=F,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023,2022-12-23,13945.589844,14000.679688,13874.500000,13940.929688,13940.929688,28738700.0,12.0,4.0,^GDAXI,...,81.730003,84.370003,81.339996,83.919998,83.919998,8621.0,12.0,4.0,BZ=F,-1
2024,2022-12-27,14047.419922,14063.139648,13966.349609,13995.099609,13995.099609,22975000.0,12.0,1.0,^GDAXI,...,84.459999,85.669998,83.660004,84.330002,84.330002,7512.0,12.0,1.0,BZ=F,-1
2025,2022-12-28,14013.719727,14018.469727,13914.620117,13925.599609,13925.599609,27583800.0,12.0,2.0,^GDAXI,...,84.599998,84.639999,81.949997,83.260002,83.260002,5384.0,12.0,2.0,BZ=F,-1
2026,2022-12-29,13890.809570,14071.719727,13871.320312,14071.719727,14071.719727,30727400.0,12.0,3.0,^GDAXI,...,82.860001,82.910004,81.300003,82.260002,82.260002,20599.0,12.0,3.0,BZ=F,1


## Test Data

In [74]:
import pandas as pd
from datetime import datetime
from functools import reduce

symbols = ["^GDAXI", "GC=F", "BZ=F"]
results = []

# Fetch financial data for each symbol and store in results
for symbol in symbols:
    df = get_finance_data(symbols=symbol, interval="1d", start=datetime(2023, 1, 1), end=datetime(2023, 12, 31))
    # Add a prefix based on the symbol for each column (except the date column)
    df = df.add_prefix(f"{symbol}_").rename(columns={f"{symbol}_Date": "Date"})
    df["Symbol"] = symbol
    results.append(df)

# Merge all DataFrames on the "Date" column using reduce to iterate over all results
df = reduce(lambda left, right: pd.merge(left, right, on="Date", how="outer"), results)

df = df.dropna(subset=[col for col in df.columns if "^GDAXI" in col])

df["^GDAXI_trend"] = df.apply(determine_trend, axis=1)

df.to_pickle("Test4-Raw Material/Data/test_dax_data.pkl")

display(df)


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.S

Unnamed: 0,Date,^GDAXI_Open,^GDAXI_High,^GDAXI_Low,^GDAXI_Close,^GDAXI_Adj Close,^GDAXI_Volume,^GDAXI_month,^GDAXI_weekday,Symbol_x,...,BZ=F_Open,BZ=F_High,BZ=F_Low,BZ=F_Close,BZ=F_Adj Close,BZ=F_Volume,BZ=F_month,BZ=F_weekday,Symbol,^GDAXI_trend
0,2023-01-02,13992.709961,14095.910156,13976.440430,14069.259766,14069.259766,34978500.0,1.0,0.0,^GDAXI,...,,,,,,,,,,1
1,2023-01-03,14116.070312,14293.870117,14083.759766,14181.669922,14181.669922,57907500.0,1.0,1.0,^GDAXI,...,86.040001,87.019997,81.769997,82.099998,82.099998,27559.0,1.0,1.0,BZ=F,1
2,2023-01-04,14266.980469,14492.780273,14264.599609,14490.780273,14490.780273,76221600.0,1.0,2.0,^GDAXI,...,82.230003,82.669998,77.720001,77.839996,77.839996,24772.0,1.0,2.0,BZ=F,1
3,2023-01-05,14451.230469,14501.599609,14403.730469,14436.309570,14436.309570,60947500.0,1.0,3.0,^GDAXI,...,78.089996,79.959999,77.610001,78.690002,78.690002,28051.0,1.0,3.0,BZ=F,-1
4,2023-01-06,14476.719727,14610.230469,14388.980469,14610.019531,14610.019531,55433700.0,1.0,4.0,^GDAXI,...,78.809998,80.570000,78.050003,78.570000,78.570000,23767.0,1.0,4.0,BZ=F,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
250,2023-12-21,16667.310547,16708.349609,16624.160156,16687.419922,16687.419922,57871300.0,12.0,3.0,^GDAXI,...,79.139999,80.120003,77.849998,79.389999,79.389999,22237.0,12.0,3.0,BZ=F,1
251,2023-12-22,16673.300781,16735.320312,16651.779297,16706.179688,16706.179688,46295300.0,12.0,4.0,^GDAXI,...,79.440002,80.370003,78.830002,79.070000,79.070000,12334.0,12.0,4.0,BZ=F,1
252,2023-12-27,16727.769531,16775.710938,16697.580078,16742.070312,16742.070312,37678900.0,12.0,2.0,^GDAXI,...,80.739998,81.320000,79.489998,79.650002,79.650002,8282.0,12.0,2.0,BZ=F,1
253,2023-12-28,16780.949219,16783.789062,16688.519531,16701.550781,16701.550781,36091600.0,12.0,3.0,^GDAXI,...,79.839996,79.959999,78.339996,78.389999,78.389999,24301.0,12.0,3.0,BZ=F,-1
