In [582]:
import requests 
import json 
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, time
import pickle
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

def compute_HLVVW(df):
    High = df['Price'].max()
    Low = df['Price'].min()
    Volume = df.Volume.iloc[-1]
    Close = df.Price.iloc[-1]
    df["VWAP"] = (Volume * ((High + Low + Close) /3)) / Volume
    return df

def tickRule(df):
    df['TickRule'] = 0
    for i in range(0,len(df)):                          
        if df.Price.iloc[i] > df.Price.iloc[i-1]:
            df.TickRule.iloc[i] = 1 
        elif df.Price.iloc[i] < df.Price.iloc[i-1]:
            df.TickRule.iloc[i] = -1 
        else: 
            df.TickRule.iloc[i] = df.TickRule.iloc[i-1]
    df = df.assign(TickRule = df['TickRule'].cumsum())
    return df
                    
# Choose to calculate volume as the division of the 
    # multiplication of the average ticks per day and their
        # respective bid-ask volumes average.
        # Considering that 80 bars per day in a normal day are filled by 2.3 million shares 
            # every 5 minutes.

# Following the same path, tick bars equals the division of 80 and the
    # division of the length of the data and the length of each day's data (tick-by-tick)
def thresholds(df,threshold):
    df = df.set_index('timestamp').astype(
        {'bid_vol':'int64', 'ask_vol':'int64', 'date':'datetime64'}).reset_index()
    total_ticks = len(df)
    Day_grp = df.groupby(['date'])

    Tick_per_bar = round((total_ticks / len(Day_grp)) / threshold)
    Vol_per_bar = round(((total_ticks / len(Day_grp))* (
        Day_grp.mean()['ask_vol'] + Day_grp.mean()['bid_vol']  / 2).mean())/ threshold)
    return [Tick_per_bar, Vol_per_bar]

def Variables(data):
    data =  data.assign(Volume = data['bid_vol'].cumsum() + data['ask_vol'].cumsum(),
                        Price = ((data['ask_vol'] * data['ask'] + data['bid_vol'] * data['bid'])
                                / (data['ask_vol'] + data['bid_vol'])),
                         MidPrice = (data['bid'] + data['ask'])/ 2,
                        Spread  = data['ask'] - data['bid'],
                       SpreadCm = (data['ask'] - data['bid']).cumsum()) 
    data = tickRule(data)

    #  The observed prices are the result of sequential trading against the bid-ask spread
    data = data.assign(ObservedPrice = data['Price'] + data['TickRule']*data['Spread'])
    data = data.set_index('timestamp').astype(
        {'bid_vol':'int64', 'ask_vol':'int64', 'date':'datetime64',
         'Volume':'int64', 'Price':'float64', 'ObservedPrice':'float64'}).reset_index()
    return data

def Volume_Bars(New_bar):
    Volume_Bars = pd.DataFrame(columns = ['timestamp', 'bid', 'ask', 'bid_vol', 
                                         'ask_vol', 'date', 'Volume','Price', 'MidPrice', 
                                         'Spread', 'SpreadCm', 'TickRule', 'ObservedPrice'])

#     Vol_per_bar = thresholds(data2,80)[1]# from instrument info table pull volperbar 
    
    Vol_per_bar = 5225269
    
    while len(New_bar[New_bar['Volume'] >= Vol_per_bar]) > 1:
        i = New_bar[New_bar['Volume'] >= Vol_per_bar].index[0]
        Volume_Bars = Volume_Bars.append(compute_HLVVW(New_bar.iloc[:i]).iloc[-1])
        deprecatedBar = New_bar.iloc[:i]
        thirdQ = np.quantile(deprecatedBar['Volume'], .75).astype(int)
        futureBar = New_bar[New_bar['Volume'] >= thirdQ]
        futureBar = futureBar.drop([ 'Volume', 'Price', 'MidPrice',
                                  'Spread', 'SpreadCm', 'TickRule', 'ObservedPrice'],axis = 1)
        New_bar = Variables(futureBar)

        
    return Volume_Bars

def Tick_Bars(New_bar):
    Tick_Bars = pd.DataFrame(columns = ['timestamp', 'bid', 'ask', 'bid_vol', 
                                         'ask_vol', 'date', 'Volume','Price', 'MidPrice', 
                                         'Spread', 'SpreadCm', 'TickRule', 'ObservedPrice'])

#     Tick_per_bar = thresholds(data2,threshold)[1]# from instrument info table pull volperbar 
    Tick_per_bar = 1258

    x = 1
    
   
    while len(New_bar) >= Tick_per_bar:
        i = New_bar[New_bar.index >= Tick_per_bar].index[0]
        Tick_Bars = Tick_Bars.append(compute_HLVVW(New_bar.iloc[:i]).iloc[-1])
        deprecatedBar = New_bar.iloc[:i]
        thirdQ = np.quantile(deprecatedBar.index[-1], .75).astype(int)
        futureBar = New_bar[New_bar.index >= thirdQ]
        futureBar = futureBar.drop([ 'Volume', 'Price', 'MidPrice',
                                  'Spread', 'SpreadCm', 'TickRule', 'ObservedPrice'],axis = 1)
        New_bar = Variables(futureBar)

        
    return Tick_Bars

In [372]:
data = pd.read_pickle("Exploratory/Apple/MainData")
data = data.assign(date = [d.date() for d in data['timestamp']])
# data.head()


In [606]:
ticks = Tick_Bars(Variables(data.iloc[:]))
ticks.to_csv("ticks.csv")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


KeyboardInterrupt: 

In [None]:
vols = Volume_Bars(Variables(data.iloc[:]))
vols.to_csv("volss.csv")