In [20]:
import requests 
import json 
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, time
import pickle
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from time import process_time 

def compute_HLVVW(df):
    High = df['Price'].max()
    Low = df['Price'].min()
    Volume = df.Volume.iloc[-1]
    Close = df.Price.iloc[-1]
    df["VWAP"] = (Volume * ((High + Low + Close) /3)) / Volume
    return df

def tickRule(df):
    df['TickRule'] = 0
    for i in range(0,len(df)):                          
        if df.Price.iloc[i] > df.Price.iloc[i-1]:
            df.TickRule.iloc[i] = 1 
        elif df.Price.iloc[i] < df.Price.iloc[i-1]:
            df.TickRule.iloc[i] = -1 
        else: 
            df.TickRule.iloc[i] = df.TickRule.iloc[i-1]
    df = df.assign(TickRule = df['TickRule'].cumsum())
    return df
                    
# Choose to calculate volume as the division of the 
    # multiplication of the average ticks per day and their
        # respective bid-ask volumes average.
        # Considering that 80 bars per day in a normal day are filled by 2.3 million shares 
            # every 5 minutes.

# Following the same path, tick bars equals the division of 80 and the
    # division of the length of the data and the length of each day's data (tick-by-tick)
def thresholds(df,threshold):
    df = df.set_index('timestamp').astype(
        {'bid_vol':'int64', 'ask_vol':'int64','date':'datetime64'}).reset_index()
    total_ticks = len(df)
    Day_grp = df.groupby(['date'])

    Tick_per_bar = round((total_ticks / len(Day_grp)) / threshold)
    Vol_per_bar = round(((total_ticks / len(Day_grp))* (
        Day_grp.mean()['ask_vol'] + Day_grp.mean()['bid_vol']  / 2).mean())/ threshold)
    return [Tick_per_bar, Vol_per_bar]

def Variables(df):
    df =  df.assign(Volume = df['bid_vol'].cumsum() + df['ask_vol'].cumsum(),
                        Price = ((df['ask_vol'] * df['ask'] + df['bid_vol'] * df['bid'])
                                / (df['ask_vol'] + df['bid_vol'])),
                         MidPrice = (df['bid'] + df['ask'])/ 2,
                        Spread  = df['ask'] - df['bid'],
                       SpreadCm = (df['ask'] - df['bid']).cumsum()) 
    df = tickRule(df)

    #  The observed prices are the result of sequential trading against the bid-ask spread
    df = df.assign(ObservedPrice = df['Price'] + df['TickRule']*df['Spread'])
    df = df.set_index('timestamp').astype(
        {'bid_vol':'int64', 'ask_vol':'int64',
         'Volume':'int64', 'Price':'float64', 'ObservedPrice':'float64'}).reset_index()
    return df


def Volume_Bars(df):
    New_bar = Variables(df)
    Volume_Bars = pd.DataFrame(columns = ['timestamp', 'bid', 'ask', 'bid_vol', 
                                         'ask_vol', 'Volume','Price', 'MidPrice', 
                                         'Spread', 'SpreadCm', 'TickRule', 'ObservedPrice'])

#     Vol_per_bar = thresholds(data2,80)[1]# from instrument info table pull volperbar 
    
    Vol_per_bar =5000000
    futureBar = pd.DataFrame(columns = ['timestamp','bid', 'ask', 'bid_vol','ask_vol'])
    
    while len(New_bar[New_bar['Volume'] >= Vol_per_bar]) > 1:
        i = New_bar[New_bar['Volume'] >= Vol_per_bar].index[0]
        Volume_Bars = Volume_Bars.append(compute_HLVVW(New_bar.iloc[:i]).iloc[-1])
        deprecatedBar = New_bar.iloc[:i]
        thirdQ = np.quantile(deprecatedBar['Volume'], .75).astype(int)
        futureBar = New_bar[New_bar['Volume'] >= thirdQ]
        futureBar = futureBar.drop([ 'Volume', 'Price', 'MidPrice',
                                  'Spread', 'SpreadCm', 'TickRule', 'ObservedPrice'],axis = 1)
        New_bar = Variables(futureBar)
    
    if (len(Volume_Bars) == 0) & (len(futureBar) == 0):
        futureBar = df
        
    return [Volume_Bars,futureBar]

def Tick_Bars(df):
    New_bar = Variables(df)
    Tick_Bars = pd.DataFrame(columns = ['timestamp', 'bid', 'ask', 'bid_vol', 
                                         'ask_vol', 'Volume','Price', 'MidPrice', 
                                         'Spread', 'SpreadCm', 'TickRule', 'ObservedPrice'])

#     Tick_per_bar = thresholds(data2,threshold)[1]# from instrument info table pull volperbar 
    Tick_per_bar = 629

    futureBar = pd.DataFrame(columns = ['timestamp','bid', 'ask', 'bid_vol','ask_vol'])
   
    while len(New_bar) > Tick_per_bar:
        i = New_bar[New_bar.index >= Tick_per_bar].index[0]
        Tick_Bars = Tick_Bars.append(compute_HLVVW(New_bar.iloc[:i]).iloc[-1])
        deprecatedBar = New_bar.iloc[:i]
        thirdQ = np.quantile(deprecatedBar.index[-1], .75).astype(int)
        futureBar = New_bar[New_bar.index >= thirdQ]
        futureBar = futureBar.drop([ 'Volume', 'Price', 'MidPrice',
                                  'Spread', 'SpreadCm', 'TickRule', 'ObservedPrice'],axis = 1)
        New_bar = Variables(futureBar)
        
    if (len(Tick_Bars) == 0) & (len(futureBar) == 0):
        futureBar = df
        
    return [Tick_Bars,futureBar]

In [21]:
# instruments ID 
# Api to get the list of instruments 
response2 = requests.get('https://freeserv.dukascopy.com/2.0/?path=api/instrumentList',
                        params= {'key' : 'rup1doorqo000000'})

# pd.DataFrame(response2.json()).iloc[0,1]
FAANG = {"Apple":"70002" ,"Facebook":"70094","Amazon":"70022","Google":"70118","Netflix":"70178"}



In [None]:
# for loop for each day of 2019 
# Start the stopwatch / counter  
t1_start = process_time()  

instruments = FAANG["Google"]
# # empty dataframe with columns: ['timestamp','bid', 'ask', 'bid_vol','ask_vol']
# la3 = pd.DataFrame(columns = ['timestamp','bid', 'ask', 'bid_vol','ask_vol'])

# vols =  pd.DataFrame(columns = ['timestamp', 'bid', 'ask', 'bid_vol', 
#                                      'ask_vol',  'Volume','Price', 'MidPrice', 
#                                      'Spread', 'SpreadCm', 'TickRule', 'ObservedPrice'])

# ticks =  pd.DataFrame(columns = ['timestamp', 'bid', 'ask', 'bid_vol', 
#                                      'ask_vol', 'Volume','Price', 'MidPrice', 
#                                      'Spread', 'SpreadCm', 'TickRule', 'ObservedPrice'])

# futureBarTick = pd.DataFrame(columns = ['timestamp','bid', 'ask', 'bid_vol','ask_vol'])
# futureBarVolume = pd.DataFrame(columns = ['timestamp','bid', 'ask', 'bid_vol','ask_vol'])

# for each number between 199 - 150 (part of the 365 days of the year) going in negative way
for i in list(range(200,0,-1)):
    # substract from the 01-01-2020 the days that i equals
    date = datetime(2020, 1, 1) - timedelta(days=i)

    # converts date into timestamp and adds 9 hs and 30 minutes, 
        # and multiplies * 1000 to get miliseconds, while using int() to convert to integer
    start = int(datetime.timestamp(date + timedelta(hours = 9, minutes = 30)) * 1000)
    end = int(datetime.timestamp(date + timedelta(hours = 9, minutes = 35)) * 1000)

#     if len(pd.DataFrame(pd.DataFrame(requests.get('https://freeserv.dukascopy.com/2.0/?path=api/historicalPrices', 
#                                 params={'key' : 'rup1doorqo000000','instrument':instruments,
#                                         'timeFrame':'tick','count':'5000',
#                                         'start':start, 'end':end}).json())['ticks'].tolist())) > 0:
#         # while the datetime is less than 15:56 hs
    while datetime.fromtimestamp(end/1000).time() < time(hour = 15, minute = 56):

        # set parameter conditions, the key here is that start and end are always different
        parameters = {'key' : 'rup1doorqo000000','instrument':instruments,'timeFrame':'tick','count':'5000',
           'start':start, 'end':end}

        # api pull request
        response = requests.get('https://freeserv.dukascopy.com/2.0/?path=api/historicalPrices', 
                                params=parameters) 

        # convert pull request into json file
        la = pd.DataFrame(response.json())

        # gets tick data from json file and converts it to list and then df
        pe = pd.DataFrame(la['ticks'].tolist())

        la3 = pd.concat([la3,pe])

        for k in range(len(pe)):
            pe['timestamp'].iloc[k] = datetime.fromtimestamp(pe['timestamp'].iloc[k]/1000)

#             pe = pe.assign(date = [d.date() for d in pe['timestamp']])

        vbars = Volume_Bars(pd.concat([futureBarVolume,pe]))
        tbars = Tick_Bars(pd.concat([futureBarTick,pe]))

        futureBarVolume = vbars[1]
        futureBarTick = tbars[1]

        vols = pd.concat([vols, vbars[0]])
        ticks = pd.concat([ticks, tbars[0]])

        # adds five minutes to both start and end of the pull request
        start = int(datetime.timestamp(datetime.fromtimestamp(start/1000) + timedelta(minutes = 5)) * 1000)
        end = int(datetime.timestamp(datetime.fromtimestamp(end/1000) + timedelta(minutes = 5)) * 1000)

# converts timetstamp to datetime and substracts 5 vs
la3['timestamp'] = pd.to_datetime(la3['timestamp'], unit='ms') - timedelta(hours= 5)
la3 = la3.assign(date = [d.date() for d in la3['timestamp']])
la3['Instrument'] = "Google"

# Stop the stopwatch / counter 
t1_stop = process_time() 

print("Elapsed time:", t1_stop, t1_start)  
print("Elapsed time during the whole program in seconds:", 
                                     t1_stop-t1_start)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()


In [82]:
la3.to_csv("Google" + "MainData.csv")
ticks['Instrument'] = "Google"
vols['Instrument'] = "Google"
ticks.to_csv("Google" + "ticks.csv")
vols.to_csv("Google"+ "vols.csv")
