# Get Deposits

In this notebook, we compile the data for borrow transactions from the API.

In [1]:
import requests
import json
import pandas as pd
import seaborn as sns
import cryptocompare
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from datetime import datetime
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import missingno as msno
from IPython.display import HTML
import matplotlib.dates as md
import math
import time

### Fetch Deposit Transaction Data

Here, we write a query to fetch transaction information from the API.

In [2]:
lastId='""'
deposit_data=[]
while(1):
    try:
        #set query
        query="""
        {
          deposits(first: 1000 orderBy: id where:{id_gt:"""+lastId+"""}) {
            id
            pool{
                id
            }
            user{
                id
            }
            onBehalfOf{
                id
            }
            reserve{
                symbol
            }
            amount
            timestamp
          }
        }
        """
        #make request
        url = 'https://api.thegraph.com/subgraphs/name/aave/protocol-v2'
        request = requests.post(url,json={'query':query})
        #store data
        deposit_data.extend(request.json()['data']['deposits'])
        lastId = "\""+request.json()['data']['deposits'][-1]['id']+"\""
    except:
        #exit when no more data left to get
        break

#create borrows data frame
df_deposits = pd.DataFrame(deposit_data)
df_deposits['type']='deposit'
df_deposits.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 192006 entries, 0 to 192005
Data columns (total 8 columns):
 #   Column      Non-Null Count   Dtype 
---  ------      --------------   ----- 
 0   amount      192006 non-null  object
 1   id          192006 non-null  object
 2   onBehalfOf  192006 non-null  object
 3   pool        192006 non-null  object
 4   reserve     192006 non-null  object
 5   timestamp   192006 non-null  int64 
 6   user        192006 non-null  object
 7   type        192006 non-null  object
dtypes: int64(1), object(7)
memory usage: 11.7+ MB


### Re-Format Deposit Transaction Data

Some of the values in the dataset are dictionaries, so we must re-format the data to get the wanted field from those dictionaries.

In [3]:
#get id's
def getUser(row):
    return row['user']['id']
df_deposits['user']=df_deposits.apply(lambda x: getUser(x), axis=1)

def getOnBehalfOf(row):
    if not isinstance(row['onBehalfOf'],float):
        return row['onBehalfOf']['id']
    else:
        return np.nan
df_deposits['onBehalfOf']=df_deposits.apply(lambda x: getOnBehalfOf(x), axis=1)

def getPool(row):
    return row['pool']['id']
df_deposits['pool']=df_deposits.apply(lambda x: getPool(x), axis=1)

#get symbols
def getReserve(row):
    if not isinstance(row['reserve'],float):
        return row['reserve']['symbol']
    else:
        return np.nan
df_deposits['reserve']=df_deposits.apply(lambda x: getReserve(x), axis=1)

### Fetch Price Data

The API does not list the price of the asset for each transaction, so we must gather this information in another way. We can search for the most recent price of the desired asset before the transaction occurred. Prices are reported in Ether, not USD. Because of this, we also get the price of Tether at the time of the transaction. Tether is a stable coin, so its price should always be close to one dollar. We will divide the asset price by the price of Tether to determine the price of the asset in USD. 

In [4]:
pricesSym=[]
pricesUSDT=[]
i=0
#get prices for each asset at time of transaction, and price for USDT at time of transaction
def getPrice(row, sym):
    global i
    
    #get symbol and time
    symbol = row[sym]
    timestamp = row['timestamp']
    
    #get query
    query="""
    {
    reserves(where: { symbol_in:["USDT",\""""+symbol+"""\"] }){
        symbol,
        price{
            priceInEth,
            priceHistory(where:{timestamp_lte: """+str(timestamp)+"""} orderBy: timestamp orderDirection: desc first:1){
                price,
                timestamp
              }
            }
          }
        }    
    """
    #keep trying request until it is successful
    while(True):
        try:
            #get json
            url = 'https://api.thegraph.com/subgraphs/name/aave/protocol-v2'
            request = requests.post(url,json={'query':query})
            req_json = request.json()
            break
        except:
            #if request unsuccessful, try again in 10 seconds
            print('stalling')
            time.sleep(10)
 
    try:
        #if only data for 1 asset...
        if len(req_json['data']['reserves'])<2:
            #if USDT, add data
            if symbol=="USDT":
                pricesSym.append(req_json['data']['reserves'][0]['price']['priceHistory'][0]['price'])
                pricesUSDT.append(req_json['data']['reserves'][0]['price']['priceHistory'][0]['price'])
            #otherwise, symbol not found
            else:
                pricesSym.append(np.nan)
                pricesUSDT.append(np.nan)
        #if both present...
        else:
            #ensure price data exists for asset
            phistory = req_json['data']['reserves'][0]['price']['priceHistory']
            #if data not available, set as null
            if len(phistory)==0:
                if symbol=='WETH' or symbol=='AmmWETH':
                    pricesSym.append(1)
                    pricesUSDT.append(req_json['data']['reserves'][1]['price']['priceHistory'][0]['price'])
                else:
                    pricesSym.append(np.nan)
                    pricesUSDT.append(req_json['data']['reserves'][1]['price']['priceHistory'][0]['price'])
            #otherwise add data
            else:
                pricesSym.append(phistory[0]['price'])
                pricesUSDT.append(req_json['data']['reserves'][1]['price']['priceHistory'][0]['price'])
    except:
        print('ERROR')
        print(req_json)
        return
    
    #update progress
    i+=1
    if i%5000==0:
        print(i)

#get borrow prices
print('getting deposit prices...')
df_deposits.apply(lambda x: getPrice(x,'reserve'),axis=1)

df_deposits['priceSym']=pricesSym
df_deposits['priceUSDT']=pricesUSDT

getting deposit prices...
5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
55000
60000
65000
70000
75000
80000
85000
90000
95000
100000
105000
110000
115000
120000
125000
130000
135000
140000
145000
150000
155000
160000
165000
170000
175000
180000
185000
190000


### Get Decimal Data

The amount value is reported in units of the lowest denomination for each currency. To standardize these values, we must know the number of decimals each currency is reported in. The following query creates a dictionary holding the number of decimals for each currency.

In [5]:
#set query
query="""
        {
  reserves(first:1000){
    symbol
    decimals
  }
}
        """
#make request
url = 'https://api.thegraph.com/subgraphs/name/aave/protocol-v2'
request = requests.post(url,json={'query':query})
jsondata=request.json()['data']['reserves']

#create dictionary of the number of decimals in each asset
decimals=dict()
for data in jsondata:
    decimals[data['symbol']]=int(data['decimals'])
    
decimals

{'TUSD': 18,
 'AmmUniWBTCUSDC': 18,
 'RAI': 18,
 'GUSD': 2,
 'YFI': 18,
 'BAT': 18,
 'MANA': 18,
 'AmmBptWBTCWETH': 18,
 'UNI': 18,
 'AmmWBTC': 8,
 'WBTC': 8,
 'AmmUniYFIWETH': 18,
 'AmmUniCRVWETH': 18,
 'REN': 18,
 'AmmUniSNXWETH': 18,
 'BUSD': 18,
 'LINK': 18,
 'SUSD': 18,
 'AmmBptBALWETH': 18,
 'AmmDAI': 18,
 'DAI': 18,
 'AAVE': 18,
 'XSUSHI': 18,
 'AmmUniRENWETH': 18,
 'PAX': 18,
 'MKR': 18,
 'AmmUSDC': 6,
 'USDC': 6,
 'AmmUniLINKWETH': 18,
 'AmmUniDAIWETH': 18,
 'AmmUniDAIUSDC': 18,
 'AmmUniUSDCWETH': 18,
 'AmmUniBATWETH': 18,
 'BAL': 18,
 'AmmUniWBTCWETH': 18,
 'SNX': 18,
 'AmmWETH': 18,
 'WETH': 18,
 'AmmUniMKRWETH': 18,
 'AmmUniUNIWETH': 18,
 'AMPL': 9,
 'RENFIL': 18,
 'CRV': 18,
 'AmmUSDT': 6,
 'USDT': 6,
 'KNC': 18,
 'AmmUniAAVEWETH': 18,
 'ZRX': 18,
 'ENJ': 18}

### Adjust Amount

Next, we convert the amount column to type float. Then, with the information we gathered above, we write a function to adjust the transaction amounts. 

In [6]:
#transform amount column to float
df_deposits['amount']=df_deposits['amount'].astype(float)

In [7]:
#function to divide each amount based on the reserve
def adjustAmount(row):
    decs = decimals[row['reserve']]
    return row['amount']/(10**decs)

#adjust amounts
df_deposits['amount']=df_deposits.apply(lambda x: adjustAmount(x),axis=1)
df_deposits['amount'].describe()

count    1.920060e+05
mean     1.615326e+05
std      3.082114e+06
min      1.000000e-18
25%      6.518791e+00
50%      1.780000e+02
75%      7.190732e+03
max      6.000000e+08
Name: amount, dtype: float64

### Re-Format Price Data

We format the raw price data so that the prices are reported in USD. We will also determine the total amount of the transaction in USD.

In [8]:
#get the prices in ethereum and in USD
df_deposits['reservePriceETH'] = df_deposits['priceSym'].astype(float)
df_deposits['reservePriceUSD'] = df_deposits['reservePriceETH']*(1/(df_deposits['priceUSDT'].astype(float)))

#get amount in USD
df_deposits['amountUSD']=df_deposits['amount'].astype(float)*df_deposits['reservePriceUSD']                                     

#drop redundent columns
df_deposits.drop(columns=['priceSym','priceUSDT'],inplace=True)
                                               
#reset index
df_deposits = df_deposits.set_index('id')           

### Save Final Data Frame

Finally, we save the dataframe to a csv file.

In [9]:
df_deposits.to_csv("deposits.csv",index=False)