In [77]:
import csv
import numpy as np

In [80]:
def readCSV(filename):
  with open(filename) as csv_file:
      csv_reader = csv.reader(csv_file, delimiter=',')
      line_count = 0
      closing_prices = []
      high_prices = []
      low_prices = []
      for row in csv_reader:
          if line_count == 0:
              print(f'Column names are {", ".join(row)}')
              line_count += 1
          else:
              closing_prices.append(float(row[1][2:]))
              high_prices.append(float(row[4][2:]))
              low_prices.append(float(row[5][2:]))
              line_count += 1
      return [closing_prices, high_prices, low_prices]


**Exponential Moving Average**

$EMA = price(today) * a + EMA(yesterday) * (1-a)$

$a = \frac{2}{N+1}$

first EMA equals SMA of time period N

In [49]:
def EMA(n, p):
  p_len = len(p)
  a = 2 / (n + 1)
  print(a)

  emas = []
  print(f'first indexes: {p_len-n} {p_len} {p[p_len-n:p_len]}')
  emas.append(np.average(p[p_len-n:p_len])) # SMA as first value
              
  for i in range(n, p_len):
    ema = a*p[p_len-i-1] + (1-a)*emas[0]
    emas.insert(0, ema)

  return emas



**RoR**

$RoR = \ln{\frac{p_t}{p_{t-n}}}$

In [50]:
def RoR (pt, pt_):
  return np.log(pt/pt_)

**Gradient a of the price trend**

$a= \frac{\sum_ {t=1}^n (t-\bar{t}) (p_{t}-\bar{p})}{\sum_ {t=1}^n (t-\bar{t})^2}$

where is the index of the time instant ,
are the corresponding stock prices and are average values of t and p, respectively

In [51]:
def grad_price_trend(p):
  n = len(p)
  n_avg = np.average(range(1, n+1))
  p_avg = np.average(p)
  numerator = 0
  denominator = 0
  for i in range(1, n+1):
    numerator += (i - n_avg)*(p[i-1] - p_avg)
    denominator += (i - n_avg)**2
  return numerator/denominator

**Relative Strength Index**

$RSI = 100 - \frac{100}{1 + RS}$

$RS = \frac{EMA(U)}{EMA(D)}$

In [63]:
def RSI(n, p):
  U = [] # gain
  D = [] # loss
  for i in range(len(p) - 1):
    if p[i] == p[i+1]:
      U.append(0); D.append(0)
    else:
      if p[i] > p[i+1]:
        U.append (p[i] - p[i+1]); D.append(0)
      else:
        U.append(0); D.append(p[i+1] - p[i])


  if not D:
    return 100;
  if not U:
    return 0;

  U_ema = EMA(n,U)
  D_ema = EMA(n,D)

  print(f'u_ema: {U_ema} , d_ema: {D_ema}')
  RS = np.divide(U_ema,D_ema)
  print(f'RS: {RS}')

  return 100 - 100/(1 + RS)

**MACD**

MACD = EMA for 12 days - EMA for 26 days

In [53]:
def MACD(p):
  ema26 = EMA(26, p)
  ema12 = EMA(12, p)[0:len(ema26)] 

  return np.subtract(ema12,ema26)


**Commodity Channel Index**

$CCI = \frac{Typical Price - MA}{0.015 * Mean Deviation}$

$Typical Price = \sum_{i=1}^N (High + Low + Close)/3$

N - number of periods (20)
$MA = Moving Average = (\sum_{i=1}^N Typical Price)/N$

$Mean Deviation = (\sum_{i=1}^N |Typical Price - MA|)/N$

In [65]:
def CCI(p, h, l):
  typical_price = np.add(np.add(p, h), l)/3
  MA = np.average(typical_price)
  mean_deviation = np.average(typical_price - MA)
  return (np.abs(typical_price[0] - MA))/(0.015 * mean_deviation)


**Calculate all features**

In [68]:
def calculate_features(closing_prices, high_prices, low_prices):
  features = []

  # Relative Strength Index for 14 days
  RSIs = RSI(14, closing_prices)
  MACDs = MACD(closing_prices)

  for i in range(len(closing_prices) - 26):
    f1 = RoR (closing_prices[i], closing_prices[i+1])
    f2 = RoR (closing_prices[i+1], closing_prices[i+2])
    f3 = RoR (closing_prices[i+2], closing_prices[i+3])
    f4 = RoR (closing_prices[i+3], closing_prices[i+4])

    f5 = RoR (closing_prices[i], closing_prices[i+2])
    f6 = RoR (closing_prices[i+1], closing_prices[i+3])

    # gradient of 5-day price trend
    f7 = grad_price_trend(closing_prices[i:i+5])
    f8 = grad_price_trend(closing_prices[i+5:i+10])
    # gradient of 10-day price trend
    f9 = grad_price_trend(closing_prices[i:i+10])

    f10 = f1 - f2
    f11 = f1 - f3

    # dynamics of change for gradient of 5-day logarithmic price trend
    f12 = RoR (f7, f8) # ???
    f13 = f9 - f7 # ???

    f14 = RSIs[i]
    f15 = MACDs[i]

    f16 = closing_prices[i] - np.average(closing_prices[i+1:i+13])

    # 14-day rate of change 
    f17 = (closing_prices[i] - closing_prices[i+14])/closing_prices[i+14]

    f18 = CCI(closing_prices[i:i+20], high_prices[i:i+20], low_prices[i:i+20])

    f = [f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15, f16, f17, f18]
    features.append(f)
  return features
    
  print(f'Features: {features}')

In [83]:
def read_and_extract_features(input, output):
  csv_content = readCSV(input)
  features = calculate_features(csv_content[0], csv_content[1], csv_content[2])

  with open(output, mode='w') as result_file:
      writer = csv.writer(result_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
      for arr in features:
        writer.writerow(arr)

read_and_extract_features('BIDU-5y.csv', 'features-BIDU-5y.csv')

Column names are Date,  Close/Last,  Volume,  Open,  High,  Low
0.13333333333333333
first indexes: 1244 1258 [0, 0, 0.4200000000000017, 0, 0, 0.379999999999999, 0.10999999999999943, 0, 0, 0, 0.08000000000000185, 0.08999999999999986, 0.28999999999999915, 0]
0.13333333333333333
first indexes: 1244 1258 [0.08999999999999986, 0.5, 0, 0.129999999999999, 0.3000000000000007, 0, 0, 0.16000000000000014, 0.019999999999999574, 0.030000000000001137, 0, 0, 0, 0.3200000000000003]
u_ema: [0.3444378599374739, 0.30665906915862373, 0.24306815672148854, 0.2804632577555637, 0.1820729897179584, 0.2039303727514906, 0.2353042762517199, 0.16996647259813832, 0.1961151606901596, 0.22628672387326107, 0.26110006600760893, 0.2505000761626254, 0.2875000878799527, 0.30557702447686824, 0.33566579747330955, 0.38730668939228025, 0.4468923339141695, 0.30949115451634945, 0.24941287059578796, 0.27086100453360157, 0.3125319283080018, 0.3067676095861557, 0.3539626264455642, 0.4084184151294971, 0.4050981713032659, 0.46742096

  
  
  """
  
