In [2]:
import numpy as np
import statistics
import matplotlib.pyplot as plt
import pandas as pd

In [33]:
def EMA(prices,N,smoothing):

    ema = np.zeros(shape=(prices.shape[0],1),dtype=None)

    ema[N-1,0]=sum(prices[0:N])/N

    for i in range(prices.shape[0]-N):
        ema[N+i,0] = ((prices[N+i]*(smoothing/(1+N)))+(ema[N+i-1,0]*(1-(smoothing/(1+N)))))

    return ema

def MACD(prices):
    MACD = np.zeros(shape=(prices.shape[0],1),dtype=None)
    ema12 = EMA(prices,12,2)
    ema26 = EMA(prices,26,2)

    #MACD LINE = 12EMA - 26EMA
    for i in range(25,prices.shape[0]):

            MACD[i,0] = ema12[i,0] - ema26[i,0]

    #MACD SIGNAL LINE = 9EMA OF MACD LINE
    signal = EMA(MACD[:,0],9,2)
    MACD = np.append(MACD,signal,axis=1)

    #MACD HISTOGRAM = MACD LINE - MACD SIGNAL LINE
    MACD = np.append(MACD,np.zeros(shape=(prices.shape[0],1),dtype=None),axis=1)
    for i in range(25,prices.shape[0]):
        MACD[i,2] = MACD[i,0]-MACD[i,1]

    #MACD is a 4241x3 np array. First column is MACD Line, second is MACD Signal Line, and third is MACD Histogram
    return MACD

def Momentum(prices,N):
    momentum = np.zeros(shape=(prices.shape[0],1),dtype=None)
    for i in range(N,prices.shape[0]):
        momentum[i] = prices[i]/prices[i-N]
    return momentum



def RateofChange(prices,N):
    roc = np.zeros(shape=(prices.shape[0],1),dtype=None)
    for i in range(N,prices.shape[0]):
        roc[i] = (prices[i]-prices[i-N])/prices[i-N]
    return roc

def RSI(window, data):
    # RSI = 100 - [100/(1+avg.gain/avg.loss)]
    # Standard is to use a 14-day period
    RSI_data = np.copy(data)
    upPrices = []
    downPrices = []
    for i in range(len(RSI_data)):
        if i == 0:
            upPrices.append(0)
            downPrices.append(0)
        elif float(RSI_data[i][1]) > float(RSI_data[i-1][1]):
            upPrices.append(float(RSI_data[i][1]) - float(RSI_data[i-1][1]))
            downPrices.append(0)
        elif float(RSI_data[i][1]) <= float(RSI_data[i-1][1]):
            downPrices.append(float(RSI_data[i-1][1]) - float(RSI_data[i][1]))
            upPrices.append(0)
    count = window-1
    RSI_value = np.zeros([window-1, 1], dtype=float)
    while(count <= len(RSI_data)-1):
        gain = 0
        loss = 0
        y = count - window + 1
        while y <= count:
            gain += upPrices[y]
            loss += downPrices[y]
            y += 1
        RSI_value = np.append(RSI_value, [[100 - (100/(1+gain/loss))]], axis=0)
        count += 1
    # RSI_data = np.append(RSI_data, RSI_value, axis=1)
    # for i in RSI_data:
    #     print(i)
    return RSI_value

def simple_moving_avg(window, data):
    avg_data = np.copy(data)
    avg_values = np.zeros([window-1, 1], dtype=float)
    i = window - 1
    while i < len(avg_data):
        avg_arr = avg_data[i+1-window:i+1, 1].astype(float)
        avg = avg_arr.sum() / window
        avg_values = np.append(avg_values, [[avg]], axis=0)
        i += 1
    # avg_data = np.append(avg_data, avg_values, axis=1)
    # for i in avg_data:
    #     print(i)
    return avg_values


def bollinger_bands(period, data):
    # Middle Band = 20-day simple moving average (SMA)
    # Upper Band = 20-day SMA + (20-day standard deviation of price x 2)
    # Lower Band = 20-day SMA - (20-day standard deviation of price x 2)
    bb_data = np.copy(data)
    sma_data = simple_moving_avg(period, bb_data)
    bb_values = np.zeros([period-1, 3], dtype=float)
    i = period - 1
    while i < len(bb_data):
        sample = bb_data[i+1-period:i+1, 1].astype(float)
        stdev2 = statistics.stdev(sample) * 2
        bb_middle = sma_data[i][0]
        bb_upper = bb_middle + stdev2
        bb_lower = bb_middle - stdev2
        bb_values = np.append(
            bb_values, [[bb_lower, bb_middle, bb_upper]], axis=0)
        i += 1
    # for i in bb_values:
    #     print(i)
    # plt.plot(bb_data[:, 0], bb_values[:, 0], 'g')
    # plt.plot(bb_data[:, 0], bb_values[:, 1], 'r')
    # plt.plot(bb_data[:, 0], bb_values[:, 2], 'g')
    plt.show()
    return bb_values

def profits(prices,N):
    labels = np.zeros(shape=(prices.shape[0],1),dtype=None)
    for i in range(prices.shape[0]-N):
        if prices[i] < prices[i+N]:
            labels[i] = 1
        elif prices[i] > prices[i+N]:
            labels[i] = 0
    return labels


In [39]:
#data is a 4250 x 2 np array. Rows are the days, column 0 is date, column 1 is price. No headers
data = np.genfromtxt('Coin_Metrics_Network_Data.csv',delimiter="\t",encoding="utf16",dtype=None,usecols=range(2))
data = np.delete(data,(0),axis=0)
time = data[0]


#Prices is a 4250 x 1 np array of floats which will be used for calculations
prices = data[:,1].astype(float)

#Create a new np array to hold prices and all the indicators as columns.
PricesAndIndicators = np.zeros(shape=(prices.shape[0],1),dtype=None)
PricesAndIndicators[:,0] = prices
print('Added prices')
print(PricesAndIndicators.shape)
#Call functions with predetermined periods, and append indicators as columns to the prices.

#Indicator: RSI
RSIColumn = RSI(14, data)
PricesAndIndicators = np.append(PricesAndIndicators, RSIColumn,axis=1)
print('Added RSI')
print(PricesAndIndicators.shape)


# Indicator: Exponential and Simple Moving Average
for period in [10,21,50]:
    emaColumn = EMA(prices,period,2)
    simpleMovingAverageColumn = simple_moving_avg(period, data)
    PricesAndIndicators = np.append(PricesAndIndicators,simpleMovingAverageColumn,axis=1)
    print('Added Moving Average')
    print(PricesAndIndicators.shape)

    PricesAndIndicators = np.append(PricesAndIndicators, emaColumn, axis = 1)
    print('Added EMA')
    print(PricesAndIndicators.shape)




# Indicator: Bollinger Bands
bollingerBandsColumn = bollinger_bands(20, data)
PricesAndIndicators = np.append(PricesAndIndicators,bollingerBandsColumn,axis=1)
print('Added Bollinger Bands')
print(PricesAndIndicators.shape)


#Indicator: MACD
MACDColumn = MACD(prices)
PricesAndIndicators = np.append(PricesAndIndicators, MACDColumn, axis=1)
print('Added MACD')
print(PricesAndIndicators.shape)


for period in [12,14,21]:
    MomentumColumn = Momentum(prices,period)
    RoCColumn = RateofChange(prices,period)
    PricesAndIndicators = np.append(PricesAndIndicators, MomentumColumn, axis = 1)
    print('Added Momentum')
    print(PricesAndIndicators.shape)

    PricesAndIndicators = np.append(PricesAndIndicators, RoCColumn, axis = 1)
    print('Added RoC')
    print(PricesAndIndicators.shape)

labels = profits(prices,30)
PricesAndIndicators = np.append(PricesAndIndicators,labels, axis=1)
# np.savetxt("Data.csv",PricesAndIndicators,delimiter=",")


Added prices
(4250, 1)
Added RSI
(4250, 2)
Added Moving Average
(4250, 3)
Added EMA
(4250, 4)
Added Moving Average
(4250, 5)
Added EMA
(4250, 6)
Added Moving Average
(4250, 7)
Added EMA
(4250, 8)
Added Bollinger Bands
(4250, 11)
Added MACD
(4250, 14)
Added Momentum
(4250, 15)
Added RoC
(4250, 16)
Added Momentum
(4250, 17)
Added RoC
(4250, 18)
Added Momentum
(4250, 19)
Added RoC
(4250, 20)


In [5]:
time

array(['"2010-07-18"', '0.08584'], dtype='<U37')

In [None]:
PricesAndIndicators

In [35]:
# data.shape
PricesAndIndicators.shape

(4250, 21)

In [41]:
columns = ['Price', 'RSI', 'MovingAverage10','EMA10', 'MovingAverage21','EMA21','MovingAverage50','EMA50','LowerBB','MidBB','UpperBB','MACD1','MACD2','MACD3','Momentum12','RoC12','Momentum14','RoC14','Momentum21','RoC21','signals_30d']

df = pd.DataFrame(PricesAndIndicators, columns = columns)
df

Unnamed: 0,Price,RSI,MovingAverage10,EMA10,MovingAverage21,EMA21,MovingAverage50,EMA50,LowerBB,MidBB,...,MACD1,MACD2,MACD3,Momentum12,RoC12,Momentum14,RoC14,Momentum21,RoC21,signals_30d
0,0.085840,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
1,0.080800,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
2,0.074736,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
3,0.079193,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
4,0.058470,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4245,43984.584927,49.794767,39843.611390,41185.309688,40919.160318,40787.168232,40405.481374,41670.678659,35704.369615,40784.401199,...,217.692932,-306.867403,524.560335,1.098400,0.098400,0.998159,-0.001841,0.990531,-0.009469,0.0
4246,42492.774455,55.288607,40383.085235,41423.030555,40865.752306,40942.223343,40376.116332,41702.917710,35701.959914,40791.173834,...,319.005882,-181.692746,500.698627,1.059779,0.059779,1.046362,0.046362,0.974284,-0.025716,0.0
4247,39106.405216,47.726026,40473.466348,41001.825948,40710.946757,40775.330786,40305.707128,41601.093691,35539.155502,40637.821125,...,124.609012,-120.432394,245.041407,1.014391,0.014391,0.976581,-0.023419,0.923250,-0.076750,0.0
4248,39383.884843,48.292268,40681.027959,40707.654838,40578.109874,40648.835700,40231.331873,41514.144324,35424.433647,40497.235630,...,-6.981146,-97.742145,90.760999,1.061616,0.061616,0.982243,-0.017757,0.933855,-0.066145,0.0


In [42]:
# df.insert (0, "Time", time)
df1 = pd.DataFrame(data, columns = ['Time','Price'])
Time = df1['Time']
# Time.shape
df.insert (0, "Time", Time)


In [43]:
df.to_csv('btc_30d.csv')  

In [21]:
df2 = pd.read_csv('data.csv', header=None)

df2[20]

0       0.0
1       0.0
2       0.0
3       0.0
4       1.0
       ... 
4245    0.0
4246    0.0
4247    0.0
4248    0.0
4249    0.0
Name: 20, Length: 4250, dtype: float64

In [32]:
# df = df.join(df2[20])
# df = df.rename(columns={20:'signal'})
df.to_csv('btc_data.csv')  