# Check Aggregations

Goals:
* Check subgraph hourly and daily aggregations


In [34]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import json
from gql import Client, gql
from gql.transport.aiohttp import AIOHTTPTransport
from decimal import Decimal


In [35]:
## constants
INFURA_KEY = os.getenv('INFURA_KEY')

# mainnet
# PROD_ENDPOINT = 'https://api.thegraph.com/subgraphs/name/kwenta/optimism-futures'
# DEV_ENDPOINT = 'https://api.thegraph.com/subgraphs/name/tburm/optimism-futures'

# testnet
PROD_ENDPOINT = 'https://api.thegraph.com/subgraphs/name/kwenta/optimism-goerli-futures'
DEV_ENDPOINT = 'https://api.thegraph.com/subgraphs/name/tburm/optimism-goerli-futures'


In [36]:
# functions
convertDecimals = lambda x: Decimal(x) / Decimal(10**18)

def clean_df(df, decimal_cols, int_cols):
    for col in decimal_cols:
        if col in df.columns:
            df[col] = df[col].apply(convertDecimals)
            df[col] = df[col].astype(float)
        else:
            print(f"{col} not in DataFrame")

    for col in int_cols:
        if col in df.columns:
            df[col] = df[col].astype(int)
        else:
            print(f"{col} not in DataFrame")
    return df

async def run_query(query, params, accessor, endpoint=PROD_ENDPOINT):
    transport = AIOHTTPTransport(url=endpoint)

    async with Client(
        transport=transport,
        fetch_schema_from_transport=True,
    ) as session:

        # Execute single query
        query = query

        result = await session.execute(query, variable_values=params)
        return result[accessor]


async def run_recursive_query(query, params, accessor, endpoint=PROD_ENDPOINT):
  transport = AIOHTTPTransport(url=endpoint)

  async with Client(
      transport=transport,
      fetch_schema_from_transport=True,
  ) as session:
    done_fetching = False
    all_results = []
    while not done_fetching:
      result = await session.execute(query, variable_values=params)
      if len(result[accessor]) > 0:
        all_results.extend(result[accessor])
        params['last_id'] = all_results[-1]['id']
      else:
        done_fetching = True

    return all_results


In [37]:
# queries
hourlyStatsOld = gql("""
{
  futuresHourlyStats(
    first: 1000
    orderBy: timestamp
    orderDirection: desc
  ) {
    id
    asset
    timestamp
    trades
    volume
  }
}
""")


hourlyStats = gql("""
{
  futuresAggregateStats(
    first: 1000
    where: {
      period: 3600
    }
    orderBy: timestamp
    orderDirection: desc
  ) {
    id
    asset
    timestamp
    trades
    volume
    feesSynthetix
    feesKwenta
  }
}
""")

dailyStats = gql("""
{
  futuresAggregateStats(
    first: 1000
    where: {
      period: 86400
    }
    orderBy: timestamp
    orderDirection: desc
  ) {
    id
    asset
    timestamp
    trades
    volume
    feesSynthetix
    feesKwenta
  }
}
""")


### Query hourly

In [38]:
decimal_cols = [
    'volume'
]

int_cols = [
    'trades'
]

stats_response = await run_query(hourlyStatsOld, {}, 'futuresHourlyStats', endpoint=DEV_ENDPOINT)
df_compare = pd.DataFrame(stats_response)
df_compare = clean_df(df_compare, decimal_cols, int_cols)

# calculated fields
df_compare['date'] = pd.to_datetime(df_compare['timestamp'], unit='s')
df_compare['day'] = df_compare['date'].apply(lambda x: x.date())

df_compare


Unnamed: 0,id,asset,timestamp,trades,volume,date,day
0,1668657600-0x734554480000000000000000000000000...,0x73455448000000000000000000000000000000000000...,1668657600,1,333.000000,2022-11-17 04:00:00,2022-11-17
1,1668592800-0x734554480000000000000000000000000...,0x73455448000000000000000000000000000000000000...,1668592800,1,2670.260905,2022-11-16 10:00:00,2022-11-16
2,1668441600-0x734554480000000000000000000000000...,0x73455448000000000000000000000000000000000000...,1668441600,2,28368.000000,2022-11-14 16:00:00,2022-11-14
3,1668412800-0x734554480000000000000000000000000...,0x73455448000000000000000000000000000000000000...,1668412800,1,3006.189873,2022-11-14 08:00:00,2022-11-14
4,1668117600-0x734554480000000000000000000000000...,0x73455448000000000000000000000000000000000000...,1668117600,2,695.488241,2022-11-10 22:00:00,2022-11-10
...,...,...,...,...,...,...,...
226,1661914800-0x734554480000000000000000000000000...,0x73455448000000000000000000000000000000000000...,1661914800,1,24837.500000,2022-08-31 03:00:00,2022-08-31
227,1661889600-0x734554480000000000000000000000000...,0x73455448000000000000000000000000000000000000...,1661889600,1,24837.500000,2022-08-30 20:00:00,2022-08-30
228,1661875200-0x734554480000000000000000000000000...,0x73455448000000000000000000000000000000000000...,1661875200,1,625000.000000,2022-08-30 16:00:00,2022-08-30
229,1661828400-0x734554480000000000000000000000000...,0x73455448000000000000000000000000000000000000...,1661828400,2,2000.000000,2022-08-30 03:00:00,2022-08-30


In [39]:
df_compare_daily = df_compare.groupby(['day', 'asset'])[['volume', 'trades']].sum().reset_index()
df_compare_daily

Unnamed: 0,day,asset,volume,trades
0,2022-08-30,0x73425443000000000000000000000000000000000000...,10000.000000,2
1,2022-08-30,0x73455448000000000000000000000000000000000000...,651837.500000,4
2,2022-08-31,0x73455448000000000000000000000000000000000000...,25337.500000,2
3,2022-09-01,0x73425443000000000000000000000000000000000000...,89633.000000,4
4,2022-09-01,0x73455448000000000000000000000000000000000000...,251140.546411,22
...,...,...,...,...
83,2022-11-08,0x73455448000000000000000000000000000000000000...,51.297314,1
84,2022-11-10,0x73455448000000000000000000000000000000000000...,11472.643232,13
85,2022-11-14,0x73455448000000000000000000000000000000000000...,31374.189873,3
86,2022-11-16,0x73455448000000000000000000000000000000000000...,2670.260905,1


In [40]:
decimal_cols = [
    'volume',
    'feesSynthetix',
    'feesKwenta'
]

stats_response = await run_query(dailyStats, {}, 'futuresAggregateStats', endpoint=DEV_ENDPOINT)
df_daily = pd.DataFrame(stats_response)
df_daily = clean_df(df_daily, decimal_cols, int_cols)

# calculated fields
df_daily['feesTotal'] = df_daily['feesSynthetix'] + df_daily['feesKwenta']
df_daily['date'] = pd.to_datetime(df_daily['timestamp'], unit='s')
df_daily['day'] = df_daily['date'].apply(lambda x: x.date())

df_daily


Unnamed: 0,id,asset,timestamp,trades,volume,feesSynthetix,feesKwenta,feesTotal,date,day
0,1668643200-86400-0x734554480000000000000000000...,0x73455448000000000000000000000000000000000000...,1668643200,1,333.000000,0.999000,0.066600,1.065600,2022-11-17,2022-11-17
1,1668556800-86400-0x734554480000000000000000000...,0x73455448000000000000000000000000000000000000...,1668556800,1,2670.260905,8.010783,0.534052,8.544835,2022-11-16,2022-11-16
2,1668384000-86400-0x734554480000000000000000000...,0x73455448000000000000000000000000000000000000...,1668384000,3,31374.189873,190.866387,6.274838,197.141225,2022-11-14,2022-11-14
3,1668038400-86400-0x734554480000000000000000000...,0x73455448000000000000000000000000000000000000...,1668038400,13,11472.643232,52.087867,2.700598,54.788465,2022-11-10,2022-11-10
4,1667865600-86400-0x734554480000000000000000000...,0x73455448000000000000000000000000000000000000...,1667865600,1,51.297314,0.462676,0.000000,0.462676,2022-11-08,2022-11-08
...,...,...,...,...,...,...,...,...,...,...
83,1661990400-86400-0x734554480000000000000000000...,0x73455448000000000000000000000000000000000000...,1661990400,22,251140.546411,1103.335398,0.000000,1103.335398,2022-09-01,2022-09-01
84,1661990400-86400-0x734254430000000000000000000...,0x73425443000000000000000000000000000000000000...,1661990400,4,89633.000000,308.758756,0.000000,308.758756,2022-09-01,2022-09-01
85,1661904000-86400-0x734554480000000000000000000...,0x73455448000000000000000000000000000000000000...,1661904000,2,25337.500000,88.681250,0.000000,88.681250,2022-08-31,2022-08-31
86,1661817600-86400-0x734554480000000000000000000...,0x73455448000000000000000000000000000000000000...,1661817600,4,651837.500000,1956.012500,0.000000,1956.012500,2022-08-30,2022-08-30


In [41]:
decimal_cols = [
    'volume',
    'feesSynthetix',
    'feesKwenta'
]

stats_response = await run_query(hourlyStats, {}, 'futuresAggregateStats', endpoint=DEV_ENDPOINT)
df_hourly = pd.DataFrame(stats_response)
df_hourly = clean_df(df_hourly, decimal_cols, int_cols)

# calculated fields
df_hourly['feesTotal'] = df_hourly['feesSynthetix'] + df_hourly['feesKwenta']
df_hourly['date'] = pd.to_datetime(df_hourly['timestamp'], unit='s')
df_hourly['day'] = df_hourly['date'].apply(lambda x: x.date())

df_hourly


Unnamed: 0,id,asset,timestamp,trades,volume,feesSynthetix,feesKwenta,feesTotal,date,day
0,1668657600-3600-0x7345544800000000000000000000...,0x73455448000000000000000000000000000000000000...,1668657600,1,333.000000,0.999000,0.066600,1.065600,2022-11-17 04:00:00,2022-11-17
1,1668592800-3600-0x7345544800000000000000000000...,0x73455448000000000000000000000000000000000000...,1668592800,1,2670.260905,8.010783,0.534052,8.544835,2022-11-16 10:00:00,2022-11-16
2,1668441600-3600-0x7345544800000000000000000000...,0x73455448000000000000000000000000000000000000...,1668441600,2,28368.000000,176.809458,5.673600,182.483058,2022-11-14 16:00:00,2022-11-14
3,1668412800-3600-0x7345544800000000000000000000...,0x73455448000000000000000000000000000000000000...,1668412800,1,3006.189873,14.056929,0.601238,14.658167,2022-11-14 08:00:00,2022-11-14
4,1668117600-3600-0x7345544800000000000000000000...,0x73455448000000000000000000000000000000000000...,1668117600,2,695.488241,2.376635,0.139098,2.515733,2022-11-10 22:00:00,2022-11-10
...,...,...,...,...,...,...,...,...,...,...
226,1661914800-3600-0x7345544800000000000000000000...,0x73455448000000000000000000000000000000000000...,1661914800,1,24837.500000,86.931250,0.000000,86.931250,2022-08-31 03:00:00,2022-08-31
227,1661889600-3600-0x7345544800000000000000000000...,0x73455448000000000000000000000000000000000000...,1661889600,1,24837.500000,74.512500,0.000000,74.512500,2022-08-30 20:00:00,2022-08-30
228,1661875200-3600-0x7345544800000000000000000000...,0x73455448000000000000000000000000000000000000...,1661875200,1,625000.000000,1875.000000,0.000000,1875.000000,2022-08-30 16:00:00,2022-08-30
229,1661828400-3600-0x7345544800000000000000000000...,0x73455448000000000000000000000000000000000000...,1661828400,2,2000.000000,6.500000,0.000000,6.500000,2022-08-30 03:00:00,2022-08-30


### Check the hourly aggregations

In [42]:
df_hourly_check = df_hourly.merge(
    df_compare,
    on=['asset', 'timestamp'],
    suffixes=['_agg', '_compare']
)

print((df_hourly_check['volume_agg'] == df_hourly_check['volume_compare']).value_counts())
print((df_hourly_check['trades_agg'] == df_hourly_check['trades_compare']).value_counts())


True    231
dtype: int64
True    231
dtype: int64


### Check the daily aggregations

In [47]:
df_daily_check = df_daily.merge(
    df_compare_daily,
    on=['asset', 'day'],
    suffixes=['_agg', '_compare']
)

print((df_daily_check['volume_agg'].apply(round, 4) == df_daily_check['volume_compare'].apply(round, 4)).value_counts())
print((df_daily_check['trades_agg'] == df_daily_check['trades_compare']).value_counts())


True    88
dtype: int64
True    88
dtype: int64
