# Loading Necessary Libraries and Data

In [1]:
import pandas as pd
df = pd.read_csv(
    filepath_or_buffer = '../data/all_currencies_table.csv',
    index_col = 0
)
trend = pd.read_csv('../data/cryptocurrency_prices_by_date.csv')

## Mapping through prefix and replace functions

In [2]:
def rip(l: str):
    for i in '()[]./+- ':
        l = l.replace(i, '')
    return l.lower()

l1 = list(df.name.apply(rip))
l1.sort()

l2 = list(trend.currency.apply(rip).unique())
l2.sort()

t1 = [i for i in l1 if i not in l2]
t2 = [i for i in l2 if i not in l1]

modified_replace = {i: j for i in t1 for j in t2 if i.startswith(j) or j.startswith(i)}

## Mapping through observation

In [3]:
modified_replace.update(
    {
        'adex': 'adxnet',
        'ambrosus': 'amber',
        'ammoreloaded': 'ammorewards',
        'atmchain': 'attentiontokenofmedia',
        'crypto20': 'c20',
        'cryptobullion': 'cryptogenicbullion',
        'ebitcoin': 'ebtcnew',
        'escroco': 'escoro',
        'farstcoin': 'firstbitcoincapital',
        'futurxe': 'futurexe',
        'g3n': 'genstake',
        'gaymoney': 'gaycoin',
        'kickcoin': 'kickico',
        'lbrycredits': 'librarycredit',
        "miners'rewar": 'minersrewardtoken',
        'monoeci': 'monacocoin',
        'russianminer': 'russianminingcoin',
        'spectreaidi': 'spectredividend',
        'spectreaiut': 'spectreutility',
        'unitedtrader': 'uttoken',
        'wetrust': 'trust',
        'zlancer': 'zcashgold'
    }
)

## Data to be removed

In [4]:
df['name'] = df.name.apply(rip)

df.replace(modified_replace, inplace = True)

l1 = list(df.name.apply(rip))
l1.sort()

l2 = list(trend.currency.apply(rip).unique())
l2.sort()

t1 = [i for i in l1 if i not in l2]
t2 = [i for i in l2 if i not in l1]

remove = [i for i in t1 if i not in t2]

## Duplicates

In [5]:
display(d := df[df.duplicated(subset = 'name', keep = False)])

Unnamed: 0_level_0,name,symbol,market_cap,price,circulating_supply,volume_24hr,1h,24h,7d
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
83,enigma,ENG,242161600.0,3.23589,74836170.0,21605500.0,-3.12,36.52,19.37
242,hempcoin,THC,41746160.0,0.181128,230478800.0,377391.0,-1.27,18.91,0.5
493,encryptotel,ETT,7652512.0,0.123167,62131190.0,863.626,-1.53,56.02,38.85
860,hempcoin,HMP,240320.2,0.000177,1356645000.0,574.643,-1.0,11.42,105.11
1054,enigma,XNG,197992.8,0.343037,577176.3,202.097,-0.99,18.05,2.58
1325,firstbitcoin,BIT,,0.041009,,7879.95,-1.84,9.63,111.89
1383,firstbitcoin,BITCF,,0.240117,,1186.05,-0.99,-32.45,8.73
1389,encryptotel,ETT,,0.079232,,949.529,-1.23,6.45,-2.17


From the above 8 data points, we know that we'll be getting rid of the below 3.  
Lets try finding whether the other ones have any more info related to symbol etc.

In [6]:
display(d := list(d.name)[:2])

['enigma', 'hempcoin']

In [7]:
for i in d:
    for j in l2:
        if i in j:
            print(i, j)

enigma enigma
enigma enigmaproject
hempcoin hempcoin
hempcoin hempcoinhmp


In hempcoin, we can clearly makeout that the 2nd one maps to hempcoinhmp through the symbol.  
In enigma, we'll have to make an assumption that the 2nd one maps to hempcoinproject.

In [8]:
d_replace = {
    'enigma': 'enigmaproject',
    'hempcoin': 'hempcoinhmp'
}

In [9]:
pd.to_pickle([modified_replace, remove, d_replace], filepath_or_buffer = '../Data/Replacements.pkl')