In [1]:
import pandas as pd
import numpy as np
import yfinance as yf


In [2]:
def financial_dataset(
    stock,
    num_of_labels=2,
    cutoff=0.25,
    start_date="2010-01-01",
    end_date="2021-01-01"
):
    fin_data = yf.download(
        stock,
        start=start_date,
        end=end_date,
        progress=False
    )

    if isinstance(fin_data.columns, pd.MultiIndex):
        fin_data.columns = fin_data.columns.get_level_values(0)

    print(f"{stock} data shape:", fin_data.shape)

    fin_data = fin_data.reset_index()
    fin_data["Price_change"] = 0
    fin_data["date"] = fin_data["Date"].astype(str)

    for i in range(1, len(fin_data)):
        yesterday = fin_data.loc[i - 1, "Close"]
        today = fin_data.loc[i, "Close"]

        diff = 100 * (today - yesterday) / yesterday

        if num_of_labels == 3:
            if diff > cutoff:
                label = 1
            elif diff < -cutoff:
                label = -1
            else:
                label = 0
        else:
            label = 1 if diff > 0 else -1

        fin_data.loc[i, "Price_change"] = label

    return fin_data

In [3]:
stocks = [
    "AAPL",
    "MSFT",
    "GOOG",
    "AMZN",
    "ADBE",
    "INTC"
]

In [4]:
big_df = pd.DataFrame()

for stock in stocks:
    df = financial_dataset(stock)
    df["stock"] = stock
    big_df = pd.concat([big_df, df], ignore_index=True)

big_df.head()

AAPL data shape: (2769, 5)
MSFT data shape: (2769, 5)
GOOG data shape: (2769, 5)
AMZN data shape: (2769, 5)
ADBE data shape: (2769, 5)
INTC data shape: (2769, 5)


Price,Date,Close,High,Low,Open,Volume,Price_change,date,stock
0,2010-01-04,6.418384,6.43308,6.369499,6.400989,493729600,0,2010-01-04,AAPL
1,2010-01-05,6.42948,6.465769,6.39559,6.436078,601904800,1,2010-01-05,AAPL
2,2010-01-06,6.327211,6.454973,6.320613,6.42948,552160000,-1,2010-01-06,AAPL
3,2010-01-07,6.315515,6.358102,6.269628,6.350604,477131200,-1,2010-01-07,AAPL
4,2010-01-08,6.357499,6.358099,6.269926,6.307114,447610800,1,2010-01-08,AAPL


In [5]:
big_df.to_csv("financial_dataset_all_stocks.csv", index=False)
print("Saved as financial_dataset_all_stocks.csv")

Saved as financial_dataset_all_stocks.csv
