In [2]:
import numpy as np
import pandas as pd

In [11]:
#stocks = 'https://raw.githubusercontent.com/HSanaei/MachineLearing/main/19880101_20191231.csv'
stocks = 'https://raw.githubusercontent.com/Mr-SuSeL/NeuralNetworkPredictsDowJones/main/dji.csv'
data_raw = pd.read_csv(stocks, index_col='Date')

In [12]:
data_raw

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1930-02-03,268.40,271.50,264.80,266.50,428572.0
1930-02-04,266.50,270.00,264.40,268.50,364286.0
1930-02-05,268.90,274.00,268.90,272.10,491730.0
1930-02-06,272.10,273.60,267.00,268.60,418421.0
1930-02-07,268.60,271.60,265.40,267.80,382331.0
...,...,...,...,...,...
2024-02-01,38175.34,38522.50,38106.84,38519.84,338731681.0
2024-02-02,38448.10,38783.62,38336.57,38654.42,387138485.0
2024-02-05,38546.77,38633.88,38220.40,38380.12,325369705.0
2024-02-06,38392.90,38545.28,38350.46,38521.36,280663184.0


In [13]:
def generate_features(df):
    """
    Generate features for a stock/index based on historical price and performance
    @param df: dataframe with columns "Open", "Close", "High", "Low", "Volume", "Adjusted Close"
    @return: dataframe, data set with new features
    """
    df_new = pd.DataFrame()
    # 6 original features
    df_new['open'] = df['Open']
    df_new['open_1'] = df['Open'].shift(1)
    df_new['close_1'] = df['Close'].shift(1)
    df_new['high_1'] = df['High'].shift(1)
    df_new['low_1'] = df['Low'].shift(1)
    df_new['volume_1'] = df['Volume'].shift(1)
    # 31 generated features
    # average price
    df_new['avg_price_5'] = df['Close'].rolling(5).mean().shift(1)
    df_new['avg_price_30'] = df['Close'].rolling(21).mean().shift(1)
    df_new['avg_price_365'] = df['Close'].rolling(252).mean().shift(1)
    df_new['ratio_avg_price_5_30'] = df_new['avg_price_5'] / df_new['avg_price_30']
    df_new['ratio_avg_price_5_365'] = df_new['avg_price_5'] / df_new['avg_price_365']
    df_new['ratio_avg_price_30_365'] = df_new['avg_price_30'] / df_new['avg_price_365']
    # average volume
    df_new['avg_volume_5'] = df['Volume'].rolling(5).mean().shift(1)
    df_new['avg_volume_30'] = df['Volume'].rolling(21).mean().shift(1)
    df_new['avg_volume_365'] = df['Volume'].rolling(252).mean().shift(1)
    df_new['ratio_avg_volume_5_30'] = df_new['avg_volume_5'] / df_new['avg_volume_30']
    df_new['ratio_avg_volume_5_365'] = df_new['avg_volume_5'] / df_new['avg_volume_365']
    df_new['ratio_avg_volume_30_365'] = df_new['avg_volume_30'] / df_new['avg_volume_365']
    # standard deviation of prices
    df_new['std_price_5'] = df['Close'].rolling(5).std().shift(1)
    df_new['std_price_30'] = df['Close'].rolling(21).std().shift(1)
    df_new['std_price_365'] = df['Close'].rolling(252).std().shift(1)
    df_new['ratio_std_price_5_30'] = df_new['std_price_5'] / df_new['std_price_30']
    df_new['ratio_std_price_5_365'] = df_new['std_price_5'] / df_new['std_price_365']
    df_new['ratio_std_price_30_365'] = df_new['std_price_30'] / df_new['std_price_365']
    # standard deviation of volumes
    df_new['std_volume_5'] = df['Volume'].rolling(5).std().shift(1)
    df_new['std_volume_30'] = df['Volume'].rolling(21).std().shift(1)
    df_new['std_volume_365'] = df['Volume'].rolling(252).std().shift(1)
    df_new['ratio_std_volume_5_30'] = df_new['std_volume_5'] / df_new['std_volume_30']
    df_new['ratio_std_volume_5_365'] = df_new['std_volume_5'] / df_new['std_volume_365']
    df_new['ratio_std_volume_30_365'] = df_new['std_volume_30'] / df_new['std_volume_365']
    # # return
    df_new['return_1'] = ((df['Close'] - df['Close'].shift(1)) / df['Close'].shift(1)).shift(1)
    df_new['return_5'] = ((df['Close'] - df['Close'].shift(5)) / df['Close'].shift(5)).shift(1)
    df_new['return_30'] = ((df['Close'] - df['Close'].shift(21)) / df['Close'].shift(21)).shift(1)
    df_new['return_365'] = ((df['Close'] - df['Close'].shift(252)) / df['Close'].shift(252)).shift(1)
    df_new['moving_avg_5'] = df_new['return_1'].rolling(5).mean().shift(1)
    df_new['moving_avg_30'] = df_new['return_1'].rolling(21).mean().shift(1)
    df_new['moving_avg_365'] = df_new['return_1'].rolling(252).mean().shift(1)
    # the target
    df_new['close'] = df['Close']
    df_new = df_new.dropna(axis=0)
    return df_new

In [14]:
generate_features(data_raw)

Unnamed: 0_level_0,open,open_1,close_1,high_1,low_1,volume_1,avg_price_5,avg_price_30,avg_price_365,ratio_avg_price_5_30,...,ratio_std_volume_5_365,ratio_std_volume_30_365,return_1,return_5,return_30,return_365,moving_avg_5,moving_avg_30,moving_avg_365,close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1930-12-08,178.00,180.60,178.40,180.60,177.60,159699.0,182.280,184.582857,238.981508,0.987524,...,0.124114,0.401101,-0.014909,-0.038275,-0.030435,-0.335568,-0.002445,0.000288,-0.001390,176.10
1930-12-09,176.10,178.00,176.10,178.00,174.30,223309.0,180.140,184.078095,238.600556,0.978606,...,0.214120,0.307919,-0.012892,-0.057281,-0.056776,-0.352811,-0.007728,-0.001372,-0.001479,176.50
1930-12-10,176.50,176.10,176.50,178.20,173.90,239098.0,178.620,183.887619,238.235079,0.971354,...,0.226450,0.307424,0.002271,-0.041282,-0.022161,-0.342889,-0.011708,-0.002685,-0.001583,174.00
1930-12-11,173.00,176.50,174.00,178.10,170.20,355263.0,177.220,183.440000,237.862857,0.966092,...,0.516903,0.370770,-0.014164,-0.038674,-0.051254,-0.350261,-0.008363,-0.000995,-0.001523,170.30
1930-12-12,170.30,173.00,170.30,173.00,167.00,325940.0,175.060,182.616190,237.468016,0.958623,...,0.538349,0.393378,-0.021264,-0.059636,-0.092217,-0.368792,-0.007828,-0.002435,-0.001567,168.70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-02-01,38175.34,38426.78,38150.30,38588.86,38139.66,457403550.0,38221.924,37763.954762,34461.878214,1.012127,...,0.762028,0.605380,-0.008241,0.009097,0.012225,0.131483,0.002952,0.000957,0.000516,38519.84
2024-02-02,38448.10,38175.34,38519.84,38522.50,38106.84,338731681.0,38316.066,37802.278571,34479.472659,1.013591,...,0.760494,0.605419,0.009686,0.012371,0.021339,0.130077,0.001827,0.000591,0.000514,38654.42
2024-02-05,38546.77,38448.10,38654.42,38783.62,38336.57,387138485.0,38425.064,37860.575238,34497.573690,1.014910,...,0.757191,0.615235,0.003494,0.014301,0.032707,0.133795,0.002480,0.001020,0.000509,38380.12
2024-02-06,38392.90,38546.77,38380.12,38633.88,38220.40,325369705.0,38434.398,37905.326667,34514.741071,1.013958,...,0.738646,0.613087,-0.007096,0.001217,0.025101,0.127039,0.002862,0.001546,0.000522,38521.36
