In [1]:
import re
import os
import pandas as pd
import numpy as np
import traceback2

where_map = {}
# pairs regexp: "\[([a-zA-z0-9$\-+τ\.%]*)\]-", "-\[([a-zA-z0-9$\-+τ]*)\]"

In [2]:
sm = {
    "WBTC" : "BTC",
    "WETH" : "ETH",
    "WBNB" : "BNB",
    "WFTM" : "FTM",
    "WAVAX" : "AVAX",
}


In [3]:
def pair1(line):
    try:
        is_3_pairs = line.split("]-[")
        if len(is_3_pairs) > 2:
            return None
        s_index = line.index("[") + 1
        s = line[s_index: line.index("]", s_index)]
        if all(ord(c) < 128 for c in s):
            return s
        else:
            print(line)
            return None
    except:
        return None

def pair2(line):
    s_index = line.index("-[") + 3
    s = line[s_index: line.index("]", s_index)]
    if all(ord(c) < 128 for c in s):
        return s
    else:
        print(line)
        return None

In [4]:
def apr(line):
    apr = "Year [0-9\,\.]+\%"
    x = re.findall(apr, line)
    return x[0].replace("Year ", "").replace("%", "").strip() if len(x) else 0

In [5]:
def tvl(line):
    tvl = "TVL: \$[0-9\.\,]+"
    x = re.findall(tvl, line)
    return x[0].replace("TVL: $", "").replace("%", "").replace(",","").strip()

In [6]:
def where(line):
    if 'INFO' in line:
        return line.replace("INFO", "").replace('\n', '').replace(":", "").replace("//", "://").strip()
    return None

In [7]:
def get_single_pair(line):
    exp = "\d* *.*Price:"
    x = re.findall(exp, line)
    if not len(x):
#         print(">>>>>>>>>>>",line)
        return
    pair = x[0].replace(" Price:", "")
    return pair[pair.rindex(" "):].strip() if " " in pair else pair

def get_single_tvl(line):
    exp = "\([0-9\,\.$]+\)"
    x = re.findall(exp, line)
    return x[0].replace("(", "").replace(")", "").replace("$", "").replace(",","").strip()


In [8]:
def read_blocks_from_files(file):
    f = open(file)
    lines = f.readlines()
    blocks = []
    block = []
    for line in lines:
        line = line.strip()
        if line == "" or len(re.findall("INFO[ ]+: ", line)):
            blocks.append(block)
            if len(line):
                blocks.append([line])
            block = []
        else:
            block.append(line)
    blocks.append(block)
    return blocks

def get_pair_info_from_block(block, wh, network):
    if len(block) == 1:
        if 'INFO' in block[0]:
            return [where(block[0])]
        else:
            return [wh]
    if len(block) and pair1(block[0]):
        apr_index = -1
        for index in range(1, len(block)):
            if 'APR: ' in block[index]:
                apr_index = index
                break
        return [pair1(block[0]), pair2(block[0]), tvl(block[0]), apr(block[apr_index]) if apr_index > 1 else 0, where_map[wh] if wh in where_map else wh, network]
    elif len(block) > 2 and get_single_pair(block[0]):
        apr_index = -1
        for index in range(1, len(block)):
            if 'APR: ' in block[index]:
                apr_index = index
                break
        return [get_single_pair(block[0]), "", get_single_tvl(block[1]), apr(block[apr_index]) if apr_index > 1 else 0, where_map[wh] if wh in where_map else wh, network]
        
    return None


def get_pairs(blocks, network):
    where = ""
    pairs = []
    for block in blocks:
        try:
            pair = get_pair_info_from_block(block, where, network)
            if len(where) == 0 and pair and len(pair) > 2:
                print(block, network)
            if pair:
                if len(pair) == 1 and pair[0] and len(pair[0]):
                    where = pair[0]
                else:
                    pairs.append(pair)
        except Exception:
            print(block[0])
    return pairs
        

In [9]:
data_dir = 'vfat_tool_data/2021-06-07_set/'
ignorable_files = ['.ipynb_checkpoints', "Yield Farm Manual 6:7 New .xlsx"]
def conver_files_to_one_df(data_dir):
    all_pairs = []
    files = os.listdir(data_dir)
    for file_name in files:
        if file_name in ignorable_files:
            continue
        network = file_name[0:file_name.index(" ")]
        blocks = read_blocks_from_files(f"{data_dir}{file_name}")
        pairs = get_pairs(blocks, network)
        all_pairs.extend(pairs)
    return all_pairs

In [10]:
pairs = conver_files_to_one_df(data_dir)

UnicodeDecodeError: 'utf-8' codec can't decode byte 0x9e in position 14: invalid start byte

In [None]:
pair_df = pd.DataFrame(pairs, columns=['pair1', 'pair2', 'tvl', 'apr', 'where', 'network'])
pair_df['apy'] = np.nan

In [None]:
pairs

In [None]:
pair_df

In [None]:
pair_df[(pair_df['pair1'] == 'uUNICLY') | (pair_df['pair2'] == 'uARTBLOCKS')]

In [None]:
sheet_pairs_df = pd.read_csv('MATIC 6_1 New.csv')
eth_sheet_pairs_df = pd.read_csv('eth_net_pairs.csv')
eth_sheet_pairs_df.columns=['pair1', 'pair2', 'tvl', 'where', 'apr']
eth_sheet_pairs_df['apy'] = np.nan
eth_sheet_pairs_df['network'] = 'ETH'
sheet_pairs_df = sheet_pairs_df.append(eth_sheet_pairs_df)

In [None]:
# sheet_pairs_df

In [None]:
def update_apr(apr):
    apr = f"{apr}".replace("%", "")
    if len(apr) == 0 or "nan" == apr:
        return np.nan
    else:
        return apr

In [None]:
pair_df = pair_df.append(sheet_pairs_df).reset_index(drop=True)
pair_df['apr'] = pair_df['apr'].apply(lambda x: update_apr(x))
pair_df['pair1'] = pair_df['pair1'].apply(lambda x: sm[x.strip()] if x.strip() in sm else x.strip())
pair_df['pair2'] = pair_df['pair2'].apply(lambda x: sm[x.strip()] if x.strip() in sm else x.strip())

In [None]:
pair_df

In [None]:
# pair_df['apr'].unique().tolist()

In [None]:
def convert_apr_to_apy(pair_df):
    tmp_df = pair_df[pair_df['apy'].isna()]
    print(tmp_df.shape)
    for index, row in tmp_df.iterrows():
        apr = row['apr']
        if apr:
            apr = float(apr.replace(',',''))
        else:
            apr = 0
        try:
            pair_df.loc[index, 'apy'] = ((1+(apr/100)/365)**365-1)*100
        except:
            pair_df.loc[index, 'apy'] = np.nan
    return
            
def convert_apy_to_apr(pair_df):
    tmp_df = pair_df[(pair_df['apr'].isnull())]
    print(tmp_df.shape)
    for index, row in tmp_df.iterrows():
        apy = row['apy']
        if apy:
            apy = float(apy.replace(',',''))
        else:
            apy = 0
        try:
            pair_df.loc[index, 'apr'] = (((1+(apy/100))**(1/365)-1)*365)*100
        except:
            pair_df.loc[index, 'apr'] = np.nan
    return


convert_apr_to_apy(pair_df)
convert_apy_to_apr(pair_df)

In [None]:
pair_df[pair_df['apy'].isna()]

In [None]:
pair_df[pair_df['pair1'] == 'AZUKI']

In [None]:
single_piar_df = pair_df[pair_df['pair2'] == ""].copy()
double_coin_pair_df = pair_df[pair_df['pair2'] != ""].copy()
tmp_df = double_coin_pair_df.copy()

In [None]:
single_piar_df

In [None]:
double_coin_pair_df

In [None]:
tmp_df

In [None]:
tmp_df['tmp'] = tmp_df['pair1']
tmp_df['pair1'] = tmp_df['pair2']
tmp_df['pair2'] = tmp_df['tmp']
del tmp_df['tmp']

In [None]:
double_coin_pair_df.append(tmp_df).sort_values(['pair1', 'pair2']).reset_index(drop=True)

In [None]:
pair_df = double_coin_pair_df.append(tmp_df).sort_values(['pair1', 'pair2']).reset_index(drop=True)
pair_df = single_piar_df.append(pair_df).sort_values(['pair1', 'pair2']).reset_index(drop=True)

In [None]:
pair_df

In [None]:
pair_df = pair_df.drop_duplicates()

In [None]:
pair_df['dex'] = pair_df['where']
del pair_df['where']

In [None]:
pair_df.to_csv('vfat_tools_data.csv')

In [None]:
pair_df['dex'].unique().tolist()

In [None]:
pair_df[pair_df['pair1'] == 'ETH']

In [None]:
pair_df['apr'].unique().tolist()

In [None]:
pair_df