# Get Repays

In this notebook, we compile the data for borrow transactions from the API.

In [1]:
import requests
import json
import pandas as pd
import seaborn as sns
import cryptocompare
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from datetime import datetime
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import missingno as msno
from IPython.display import HTML
import matplotlib.dates as md
import math
import time

### Fetch Repay Transaction Data

Here, we write a query to fetch transaction information from the API.

In [2]:
lastId='""'
repay_data = []
#loop until no more data left
while(1):
    try:
        #set query
        query="""
        {
            repays (first: 1000 orderBy: id where:{id_gt:"""+lastId+"""}) {
            id,
            user{
                id
            }
            onBehalfOf{
                id
            }
            pool{
                id
            }
            amount,
            reserve {
              id,
              symbol
            },
            timestamp
            }
        }
        """
        #make request
        url = 'https://api.thegraph.com/subgraphs/name/aave/protocol-v2'
        request = requests.post(url,json={'query':query})
        #store data
        repay_data.extend(request.json()['data']['repays'])
        lastId = "\""+request.json()['data']['repays'][-1]['id']+"\""
    except:
        #exit when no more data left to get
        break

#create borrows data frame
df_repays = pd.DataFrame(repay_data)
df_repays['type']='repay'
df_repays.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 56019 entries, 0 to 56018
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   amount      56019 non-null  object
 1   id          56019 non-null  object
 2   onBehalfOf  56019 non-null  object
 3   pool        56019 non-null  object
 4   reserve     56019 non-null  object
 5   timestamp   56019 non-null  int64 
 6   user        56019 non-null  object
 7   type        56019 non-null  object
dtypes: int64(1), object(7)
memory usage: 3.4+ MB


### Re-Format Repay Transaction Data

Some of the values in the dataset are dictionaries, so we must re-format the data to get the wanted field from those dictionaries.

In [3]:
#get id's
def getUser(row):
    return row['user']['id']
df_repays['user']=df_repays.apply(lambda x: getUser(x), axis=1)

def getOnBehalfOf(row):
    if not isinstance(row['onBehalfOf'],float):
        return row['onBehalfOf']['id']
    else:
        return np.nan
df_repays['onBehalfOf']=df_repays.apply(lambda x: getOnBehalfOf(x), axis=1)

def getPool(row):
    return row['pool']['id']
df_repays['pool']=df_repays.apply(lambda x: getPool(x), axis=1)

#get symbols
def getReserve(row):
    if not isinstance(row['reserve'],float):
        return row['reserve']['symbol']
    else:
        return np.nan
df_repays['reserve']=df_repays.apply(lambda x: getReserve(x), axis=1)

### Fetch Price Data

The API does not list the price of the asset for each transaction, so we must gather this information in another way. We can search for the most recent price of the desired asset before the transaction occurred. Prices are reported in Ether, not USD. Because of this, we also get the price of Tether at the time of the transaction. Tether is a stable coin, so its price should always be close to one dollar. We will divide the asset price by the price of Tether to determine the price of the asset in USD. 

In [4]:
pricesSym=[]
pricesUSDT=[]
i=0
#get prices for each asset at time of transaction, and price for USDT at time of transaction
def getPrice(row, sym):
    global i
    #get symbol and time
    symbol = row[sym]
    timestamp = row['timestamp']
    #get query
    query="""
    {
    reserves(where: { symbol_in:["USDT",\""""+symbol+"""\"] }){
        symbol,
        price{
            priceInEth,
            priceHistory(where:{timestamp_lte: """+str(timestamp)+"""} orderBy: timestamp orderDirection: desc first:1){
                price,
                timestamp
              }
            }
          }
        }    
    """
    #keep trying request until it is successful
    while(True):
        try:
            #get json
            url = 'https://api.thegraph.com/subgraphs/name/aave/protocol-v2'
            request = requests.post(url,json={'query':query})
            req_json = request.json()
            break
        except:
            #if request unsuccessful, try again in 10 seconds
            print('stalling')
            time.sleep(10)
 
    try:
        #if only data for 1 asset...
        if len(req_json['data']['reserves'])<2:
            #if USDT, add data
            if symbol=="USDT":
                pricesSym.append(req_json['data']['reserves'][0]['price']['priceHistory'][0]['price'])
                pricesUSDT.append(req_json['data']['reserves'][0]['price']['priceHistory'][0]['price'])
            #otherwise, symbol not found
            else:
                pricesSym.append(np.nan)
                pricesUSDT.append(np.nan)
        #if both present...
        else:
            #ensure price data exists for asset
            phistory = req_json['data']['reserves'][0]['price']['priceHistory']
            #if data not available, set as null
            if len(phistory)==0:
                pricesSym.append(np.nan)
                pricesUSDT.append(req_json['data']['reserves'][1]['price']['priceHistory'][0]['price'])
            #otherwise add data
            else:
                pricesSym.append(phistory[0]['price'])
                pricesUSDT.append(req_json['data']['reserves'][1]['price']['priceHistory'][0]['price'])
    except:
        print('ERROR')
        print(req_json)
        return
    
    #update progress
    i+=1
    if i%5000==0:
        print(i)

#get borrow prices
print('getting repay prices...')
df_repays.apply(lambda x: getPrice(x,'reserve'),axis=1)

df_repays['priceSym']=pricesSym
df_repays['priceUSDT']=pricesUSDT

getting repay prices...
5000
10000
15000
20000
25000
30000
40000
45000
50000
55000


### Re-Format Price Data

We format the raw price data so that the prices are reported in USD. We will also determine the total amount of the transaction in USD.

In [6]:
#get the prices in ethereum and in USD
df_repays['reservePriceETH'] = df_repays['priceSym'].astype(float)
df_repays['reservePriceUSD'] = df_repays['reservePriceETH']*(1/(df_repays['priceUSDT'].astype(float)))

#get amount in USD
df_repays['amountUSD']=df_repays['amount'].astype(float)*df_repays['reservePriceUSD']                                     

#drop redundent columns
df_repays.drop(columns=['priceSym','priceUSDT'],inplace=True)
                                               
#reset index
df_repays = df_repays.set_index('id')           

### Save Final Data Frame

Finally, we save the dataframe to a csv file.

In [7]:
df_repays.to_csv("repays.csv",index=False)