In [2]:
import numpy as np
import pandas as pd

In [3]:
stocks = 'https://raw.githubusercontent.com/HSanaei/MachineLearing/main/19880101_20191231.csv'
data_raw = pd.read_csv(stocks, index_col='Date')

In [4]:
data_raw

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1992-01-02,3152,3173,3139,3172,3172,23550000
1992-01-03,3172,3211,3166,3202,3202,23620000
1992-01-06,3202,3213,3192,3200,3200,27280000
1992-01-07,3200,3210,3184,3205,3205,25510000
1992-01-08,3205,3229,3186,3204,3204,29040000
...,...,...,...,...,...,...
2019-12-23,28492,28582,28492,28552,28552,223530000
2019-12-24,28573,28577,28503,28515,28515,86150000
2019-12-26,28539,28624,28535,28621,28621,155970000
2019-12-27,28675,28702,28609,28645,28645,182280000


In [5]:
def generate_features(df):
    """
    Generate features for a stock/index based on historical price and performance
    @param df: dataframe with columns "Open", "Close", "High", "Low", "Volume", "Adjusted Close"
    @return: dataframe, data set with new features
    """
    df_new = pd.DataFrame()
    # 6 original features
    df_new['open'] = df['Open']
    df_new['open_1'] = df['Open'].shift(1)
    df_new['close_1'] = df['Close'].shift(1)
    df_new['high_1'] = df['High'].shift(1)
    df_new['low_1'] = df['Low'].shift(1)
    df_new['volume_1'] = df['Volume'].shift(1)
    # 31 generated features
    # average price
    df_new['avg_price_5'] = df['Close'].rolling(5).mean().shift(1)
    df_new['avg_price_30'] = df['Close'].rolling(21).mean().shift(1)
    df_new['avg_price_365'] = df['Close'].rolling(252).mean().shift(1)
    df_new['ratio_avg_price_5_30'] = df_new['avg_price_5'] / df_new['avg_price_30']
    df_new['ratio_avg_price_5_365'] = df_new['avg_price_5'] / df_new['avg_price_365']
    df_new['ratio_avg_price_30_365'] = df_new['avg_price_30'] / df_new['avg_price_365']
    # average volume
    df_new['avg_volume_5'] = df['Volume'].rolling(5).mean().shift(1)
    df_new['avg_volume_30'] = df['Volume'].rolling(21).mean().shift(1)
    df_new['avg_volume_365'] = df['Volume'].rolling(252).mean().shift(1)
    df_new['ratio_avg_volume_5_30'] = df_new['avg_volume_5'] / df_new['avg_volume_30']
    df_new['ratio_avg_volume_5_365'] = df_new['avg_volume_5'] / df_new['avg_volume_365']
    df_new['ratio_avg_volume_30_365'] = df_new['avg_volume_30'] / df_new['avg_volume_365']
    # standard deviation of prices
    df_new['std_price_5'] = df['Close'].rolling(5).std().shift(1)
    df_new['std_price_30'] = df['Close'].rolling(21).std().shift(1)
    df_new['std_price_365'] = df['Close'].rolling(252).std().shift(1)
    df_new['ratio_std_price_5_30'] = df_new['std_price_5'] / df_new['std_price_30']
    df_new['ratio_std_price_5_365'] = df_new['std_price_5'] / df_new['std_price_365']
    df_new['ratio_std_price_30_365'] = df_new['std_price_30'] / df_new['std_price_365']
    # standard deviation of volumes
    df_new['std_volume_5'] = df['Volume'].rolling(5).std().shift(1)
    df_new['std_volume_30'] = df['Volume'].rolling(21).std().shift(1)
    df_new['std_volume_365'] = df['Volume'].rolling(252).std().shift(1)
    df_new['ratio_std_volume_5_30'] = df_new['std_volume_5'] / df_new['std_volume_30']
    df_new['ratio_std_volume_5_365'] = df_new['std_volume_5'] / df_new['std_volume_365']
    df_new['ratio_std_volume_30_365'] = df_new['std_volume_30'] / df_new['std_volume_365']
    # # return
    df_new['return_1'] = ((df['Close'] - df['Close'].shift(1)) / df['Close'].shift(1)).shift(1)
    df_new['return_5'] = ((df['Close'] - df['Close'].shift(5)) / df['Close'].shift(5)).shift(1)
    df_new['return_30'] = ((df['Close'] - df['Close'].shift(21)) / df['Close'].shift(21)).shift(1)
    df_new['return_365'] = ((df['Close'] - df['Close'].shift(252)) / df['Close'].shift(252)).shift(1)
    df_new['moving_avg_5'] = df_new['return_1'].rolling(5).mean().shift(1)
    df_new['moving_avg_30'] = df_new['return_1'].rolling(21).mean().shift(1)
    df_new['moving_avg_365'] = df_new['return_1'].rolling(252).mean().shift(1)
    # the target
    df_new['close'] = df['Close']
    df_new = df_new.dropna(axis=0)
    return df_new

In [8]:
generate_features(data_raw)

Unnamed: 0_level_0,open,open_1,close_1,high_1,low_1,volume_1,avg_price_5,avg_price_30,avg_price_365,ratio_avg_price_5_30,...,ratio_std_volume_5_365,ratio_std_volume_30_365,return_1,return_5,return_30,return_365,moving_avg_5,moving_avg_30,moving_avg_365,close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1993-01-04,3301,3321.0,3301.0,3328.0,3301.0,13860000.0,3318.4,3303.523810,3284.861111,1.004503,...,1.098370,1.558631,-0.006022,-0.003923,0.002125,0.030918,0.000007,0.000241,0.000203,3309
1993-01-05,3309,3301.0,3309.0,3319.0,3299.0,21400000.0,3315.0,3304.619048,3285.293651,1.003141,...,0.970157,1.562219,0.002424,-0.005111,0.006999,0.034063,-0.000775,0.000113,0.000142,3308
1993-01-06,3308,3309.0,3308.0,3321.0,3301.0,28060000.0,3310.0,3306.095238,3285.702381,1.001181,...,1.144806,1.571110,-0.000302,-0.007501,0.009460,0.032137,-0.001015,0.000344,0.000154,3305
1993-01-07,3305,3308.0,3305.0,3312.0,3291.0,35790000.0,3308.8,3306.857143,3286.103175,1.000588,...,1.692115,1.638949,-0.000907,-0.001812,0.004865,0.031523,-0.001496,0.000460,0.000147,3269
1993-01-08,3269,3305.0,3269.0,3313.0,3261.0,36820000.0,3298.4,3305.047619,3286.337302,0.997989,...,1.807453,1.704051,-0.010893,-0.015658,-0.011491,0.018380,-0.000358,0.000243,0.000144,3252
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-23,28492,28609.0,28455.0,28609.0,28446.0,603780000.0,28314.8,28010.380952,26241.869048,1.010868,...,1.889330,1.182806,0.002749,0.011374,0.022789,0.244751,0.001738,0.000763,0.000817,28552
2019-12-24,28573,28492.0,28552.0,28582.0,28492.0,223530000.0,28378.0,28047.809524,26266.103175,1.011772,...,2.332930,1.377335,0.003409,0.011191,0.028308,0.272087,0.002267,0.001087,0.000907,28515
2019-12-26,28539,28573.0,28515.0,28577.0,28503.0,86150000.0,28427.6,28078.238095,26292.781746,1.012442,...,2.848276,1.460719,-0.001296,0.008773,0.022923,0.308508,0.002230,0.001343,0.000993,28621
2019-12-27,28675,28539.0,28621.0,28624.0,28535.0,155970000.0,28504.0,28104.666667,26315.571429,1.014209,...,3.007175,1.499707,0.003717,0.013527,0.019775,0.251027,0.001752,0.001093,0.001103,28645
