In [None]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import io, base64, os, json, re, csv
import pandas as pd
import numpy as np
import datetime
import warnings
warnings.filterwarnings('ignore')



In [None]:
path_to_market_data = '/Users/illen/Documents/coinbase-data-downloads/'

### Load Data

In [None]:
gld_df = pd.read_csv(path_to_market_data + 'LBMA-GOLD.csv' )
gld_df['Date'] = pd.to_datetime(gld_df['Date'])
print(gld_df)


gld_df = gld_df[['Date', 'USD (PM)']]
gld_df.columns = ['Date', 'GLD']
gld_df['GLD'] = pd.to_numeric(gld_df['GLD'], errors='coerce')

print(np.min(gld_df['Date'] ),np.max(gld_df['Date'] ))
gld_df = gld_df.sort_values('Date', ascending=True)
gld_df = gld_df.dropna(how='any')

gld_df.head()

In [None]:
fig, ax = plt.subplots(figsize=(16, 8))
plt.plot(gld_df['Date'], gld_df['GLD'], label='GLD', color='gold')
plt.title('Gold '+ str(np.min(gld_df['Date'])) + ' - ' + str(np.max(gld_df['Date'])))
plt.legend(loc='upper left')
plt.grid()
plt.show()

In [None]:
def split_seq(seq, num_pieces):
    # https://stackoverflow.com/questions/54915803/automatically-split-data-in-list-and-order-list-elements-and-send-to-function
    start = 0
    for i in range(num_pieces):
        stop = start + len(seq[i::num_pieces])
        yield seq[start:stop]
        start = stop
        
def pearson(s1, s2):
    """take two pd.Series objects and return a pearson correlation"""
    s1_c=s1-np.mean(s1)
    s2_c=s2-np.mean(s2)
    return np.sum(s1_c * s2_c) / np.sqrt(np.sum(s1_c ** 2) * np.sum(s2_c ** 2))


### 30 trading day series

In [None]:
# pivot the column to a row and seperate into columns of 30 days
lookback = 30
dates = gld_df['Date']
prices = list(gld_df['GLD'].values)
counter_ = -1
price_series = []
for day in dates:
    counter_ += 1
    # if counter % 1000 == 0: print(counter_)
    if counter_ >= lookback:
        price_series.append(prices[counter_ -lookback:counter_])

timeseries_df = pd.DataFrame(price_series)


### Look for the rise and build the outcome

In [None]:
timeseries_df.shape

In [None]:
timeseries_df.head()

In [None]:
counter = 5
for index, row in timeseries_df.iterrows():
    counter -= 1
    # look for the desired shape
    plt.plot(row.values)
    plt.grid()
    plt.show()
    if counter < 0:
        break

### Pattern Simplifier

Here we break a long list of data into smaller lists set by 'complexity' and then average out each one

In [None]:
counter =5 
complexity = 5
for index, row in timeseries_df.iterrows():
    counter -= 1
    #look for the shape
    plt.plot([np.mean(r) for r in split_seq(list(row.values), complexity)])
    plt.grid()
    plt.show()
    if counter < 0:
        break

In [None]:
[np.mean(r) for r in split_seq(list(row.values), complexity)]

In [None]:
r

In [None]:
for index, row in timeseries_df.iterrows():
    print(row)
    r = row
    break

In [None]:
[np.mean(t) for t in split_seq(list(r), complexity)]

### Create an ideal shape pattern

Play around with the shape, you can select ups, downs, u's or v's - anything goes

In [None]:
# single out the shape we want
correlate_against = [0,0,0,0,1,2]
plt.plot(correlate_against)
plt.grid()

### Using the pearson correlation function to find the best matching shape

by comparing two lists, if they are both moving in the same direction it gives it a 1, if they are moving inverse it gives a -1, and if they are moving in random direction it gives a zero. ideally we want close to 1(looking for market days matching our defined wanted pattern)

In [None]:
complexity = 6 # more complexity for more granularity
outcome_list = []
for index, row in timeseries_df.iterrows():
    simplified_values = []
    for r in split_seq(list(row.values), complexity):
        simplified_values.append(np.mean(r))
    correz = pearson(simplified_values,correlate_against)
    if correz > 0.5:
        outcome_list.append(1)
    else:
        outcome_list.append(0)
    

In [None]:
np.mean(outcome_list)

In [None]:
timeseries_df['outcome'] = outcome_list
timeseries_df.head(25)

In [None]:
timeseries_df_tmp = timeseries_df[timeseries_df['outcome']==1]
timeseries_df_tmp.tail()

In [None]:
timeseries_df_tmp = timeseries_df_tmp.tail()
#  pull one example and remove the outcome variable
example = timeseries_df_tmp.values[0][:-1]
plt.plot(example)

In [None]:
simplified_values = []
for r in split_seq(list(example), complexity):
    simplified_values.append(np.mean(example))
plt.plot(simplified_values)

In [None]:
vals = [np.mean(r) for r in split_seq(list(example), complexity)]
np.min(vals)
vals2 = [val - np.min(vals) for val in vals]
plt.plot(vals2)

# Bitcoin Section

In [None]:
btc_df = pd.read_csv(path_to_market_data + 'Coinbase_BTCUSD_d.csv' )
print(btc_df)
btc_df['Date'] = pd.to_datetime(btc_df['Date'])



btc_df = btc_df[['Date', 'Close']]
btc_df.columns = ['Date', 'BTC']
btc_df['BTC'] = pd.to_numeric(btc_df['BTC'], errors='coerce')

print(np.min(btc_df['Date'] ),np.max(btc_df['Date'] ))
btc_df = btc_df.sort_values('Date', ascending=True)
btc_df = btc_df.dropna(how='any')

btc_df.head()


In [None]:
fig, ax = plt.subplots(figsize=(16, 8))
plt.plot(btc_df['Date'], btc_df['BTC'], label='BTC', color='blue')
plt.title('Bitcoin '+ str(np.min(btc_df['Date'])) + ' - ' + str(np.max(btc_df['Date'])))
plt.legend(loc='upper left')
plt.grid()
plt.show()

In [None]:
def split_seq(seq, num_pieces):
    # https://stackoverflow.com/questions/54915803/automatically-split-data-in-list-and-order-list-elements-and-send-to-function
    start = 0
    for i in range(num_pieces):
        stop = start + len(seq[i::num_pieces])
        yield seq[start:stop]
        start = stop
        
    def pearson(s1, s2):
        """take two pd.Series objects and return a pearson correlation"""
        s1_c=s1-np.mean(s1)
        s2_c=s2-np.mean(s2)
        return np.sum(s1_c * s2_c) / np.sqrt(np.sum(s1_c ** 2) * np.sum(s2_c ** 2))


In [None]:
# pivot the column to a row and seperate into columns of 30 days
lookback = 30
dates = btc_df['Date']
prices = list(btc_df['BTC'].values)
counter_ = -1
price_series = []
for day in dates:
    counter_ += 1
    # if counter % 1000 == 0: print(counter_)
    if counter_ >= lookback:
        price_series.append(prices[counter_ -lookback:counter_])

timeseries2_df = pd.DataFrame(price_series)

In [None]:
timeseries2_df.shape

In [None]:
timeseries2_df.head()

In [None]:
counter = 5
for index, row in timeseries2_df.iterrows():
    counter -= 1
    # look for the desired shape
    plt.plot(row.values)
    plt.grid()
    plt.show()
    if counter < 0:
        break

In [None]:
counter = 5 
complexity = 5
for index, row in timeseries2_df.iterrows():
    counter -= 1
    #look for the shape
    plt.plot([np.mean(r) for r in split_seq(list(row.values), complexity)])
    plt.grid()
    plt.show()
    if counter < 0:
        break