# Get Borrows

In this notebook, we compile the data for borrow transactions from the API.

In [2]:
import requests
import json
import pandas as pd
import seaborn as sns
import cryptocompare
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from datetime import datetime
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import missingno as msno
from IPython.display import HTML
import matplotlib.dates as md
import time
import math

### Fetch Borrow Transaction Data

Here, we write a query to fetch transaction information from the API.

In [3]:
lastId='""'
borrow_data=[]
#loop until no more data left
while(1):
    try:
        #set query
        query="""
        {
            borrows (first: 1000 orderBy: id where:{id_gt:"""+lastId+"""}) {
            id,
            user{
                id
            }
            onBehalfOf{
                id
            }
            pool{
                id
            }
            amount,
            reserve {
              id,
              symbol
            },
            borrowRate,
            borrowRateMode,
            timestamp
            }
        }
        """
        #make request
        url = 'https://api.thegraph.com/subgraphs/name/aave/protocol-v2'
        request = requests.post(url,json={'query':query})
        #store data
        borrow_data.extend(request.json()['data']['borrows'])
        lastId = "\""+request.json()['data']['borrows'][-1]['id']+"\""
    except:
        #exit when no more data left to get
        break

#create borrows data frame
df_borrows = pd.DataFrame(borrow_data)
df_borrows['type']='borrows'
df_borrows.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85391 entries, 0 to 85390
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   amount          85391 non-null  object
 1   borrowRate      85391 non-null  object
 2   borrowRateMode  85391 non-null  object
 3   id              85391 non-null  object
 4   onBehalfOf      85391 non-null  object
 5   pool            85391 non-null  object
 6   reserve         85391 non-null  object
 7   timestamp       85391 non-null  int64 
 8   user            85391 non-null  object
 9   type            85391 non-null  object
dtypes: int64(1), object(9)
memory usage: 6.5+ MB


### Re-Format Borrow Transaction Data

Some of the values in the dataset are dictionaries, so we must re-format the data to get the wanted field from those dictionaries.

In [4]:
#get id's
def getUser(row):
    return row['user']['id']
df_borrows['user']=df_borrows.apply(lambda x: getUser(x), axis=1)

def getOnBehalfOf(row):
    if not isinstance(row['onBehalfOf'],float):
        return row['onBehalfOf']['id']
    else:
        return np.nan
df_borrows['onBehalfOf']=df_borrows.apply(lambda x: getOnBehalfOf(x), axis=1)

def getPool(row):
    return row['pool']['id']
df_borrows['pool']=df_borrows.apply(lambda x: getPool(x), axis=1)

#get symbols
def getReserve(row):
    if not isinstance(row['reserve'],float):
        return row['reserve']['symbol']
    else:
        return np.nan
df_borrows['reserve']=df_borrows.apply(lambda x: getReserve(x), axis=1)

### Fetch Price Data

The API does not list the price of the asset for each transaction, so we must gather this information in another way. We can search for the most recent price of the desired asset before the transaction occurred. Prices are reported in Ether, not USD. Because of this, we also get the price of Tether at the time of the transaction. Tether is a stable coin, so its price should always be close to one dollar. We will divide the asset price by the price of Tether to determine the price of the asset in USD. 

In [5]:
pricesSym=[]
pricesUSDT=[]
i=0
#get prices for each asset at time of transaction, and price for USDT at time of transaction
def getPrice(row, sym):
    global i
    
    #get symbol and time
    symbol = row[sym]
    timestamp = row['timestamp']
    
    #get query
    query="""
    {
    reserves(where: { symbol_in:["USDT",\""""+symbol+"""\"] }){
        symbol,
        price{
            priceInEth,
            priceHistory(where:{timestamp_lte: """+str(timestamp)+"""} orderBy: timestamp orderDirection: desc first:1){
                price,
                timestamp
              }
            }
          }
        }    
    """
    #keep trying request until it is successful
    while(True):
        try:
            #get json
            url = 'https://api.thegraph.com/subgraphs/name/aave/protocol-v2'
            request = requests.post(url,json={'query':query})
            req_json = request.json()
            break
        except:
            #if request unsuccessful, try again in 10 seconds
            print('stalling')
            time.sleep(10)
 
    try:
        #if only data for 1 asset...
        if len(req_json['data']['reserves'])<2:
            #if USDT, add data
            if symbol=="USDT":
                pricesSym.append(req_json['data']['reserves'][0]['price']['priceHistory'][0]['price'])
                pricesUSDT.append(req_json['data']['reserves'][0]['price']['priceHistory'][0]['price'])
            #otherwise, symbol not found
            else:
                pricesSym.append(np.nan)
                pricesUSDT.append(np.nan)
        #if both present...
        else:
            #ensure price data exists for asset
            phistory = req_json['data']['reserves'][0]['price']['priceHistory']
            #if data not available, set as null
            if len(phistory)==0:
                pricesSym.append(np.nan)
                pricesUSDT.append(req_json['data']['reserves'][1]['price']['priceHistory'][0]['price'])
            #otherwise add data
            else:
                pricesSym.append(phistory[0]['price'])
                pricesUSDT.append(req_json['data']['reserves'][1]['price']['priceHistory'][0]['price'])
    except:
        print('ERROR')
        print(req_json)
        return
    
    #update progress
    i+=1
    if i%5000==0:
        print(i)

#get borrow prices
print('getting borrow prices...')
df_borrows.apply(lambda x: getPrice(x,'reserve'),axis=1)

df_borrows['priceSym']=pricesSym
df_borrows['priceUSDT']=pricesUSDT

getting borrow prices...
5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
55000
60000
65000
70000
75000
80000
85000


### Re-Format Price Data

We format the raw price data so that the prices are reported in USD. We will also determine the total amount of the transaction in USD.

In [7]:
#get the prices in ethereum and in USD
df_borrows['reservePriceETH'] = df_borrows['priceSym'].astype(float)
df_borrows['reservePriceUSD'] = df_borrows['reservePriceETH']*(1/(df_borrows['priceUSDT'].astype(float)))

#get amount in USD
df_borrows['amountUSD']=df_borrows['amount'].astype(float)*df_borrows['reservePriceUSD']                                     
                 
#reduce borrow rate to percent
df_borrows['borrowRate']=df_borrows['borrowRate'].astype(float)/1e27*100                 
                                               
#drop redundent columns
df_borrows.drop(columns=['priceSym','priceUSDT'],inplace=True)
                                               
#reset index
df_borrows = df_borrows.set_index('id')                                     

### Save Final Data Frame

Finally, we save the dataframe to a csv file.

In [8]:
df_borrows.to_csv("borrows.csv",index=False)