In [1]:
import numpy as np
import pandas as pd
import glob, os
import warnings
warnings.filterwarnings("ignore")

In [2]:
# method of read files
def read_currencies(folder_path, _type='conventional'):
    datasets = {}
    if _type == 'conventional':
        files = glob.glob(os.path.join(folder_path, "*.csv"))
        for file in files:
            name = ''.join(file.split('/')[-1].split('.')[0])
            datasets[name] = pd.read_csv(file)
    else:
        files = glob.glob(os.path.join(folder_path, "*.xlsx"))
        for file in files:
            name = file.split('/')[-1].split(' ')[0] + 'USD'
            datasets[name] = pd.read_excel(file)
    return datasets

In [3]:
crypto = read_currencies("/Users/shiyang/Desktop/cryptocurrency/Crypto Data", _type='crypto')
conven = read_currencies("/Users/shiyang/Desktop/cryptocurrency/Conven Data", _type='conventional')

In [4]:
# fill data set
def fill_dataset(dataset, start_date, end_date):
    dataset.Date = pd.to_datetime(dataset.Date)
    dataset = dataset[(dataset.Date >= start_date) & (dataset.Date <= end_date)]
    dates = pd.date_range(str(start_date), str(end_date)).to_list()
    dataset.set_index('Date', inplace=True)
    new_dataset = pd.DataFrame(columns=dataset.columns, index=dates)
    for index, row in dataset.iterrows():
        new_dataset.loc[index, :] = dataset.loc[index, :]
    new_dataset.fillna(method='bfill', inplace=True)
    new_dataset.fillna(method='ffill', inplace=True)
    new_dataset.reset_index(inplace=True)
    new_dataset.rename(columns={'index': 'Date'}, inplace=True)
    return new_dataset

In [5]:
# change the unit of exchange rate to US dollar
def unify_unit(dataset, set_name):
    if set_name[-3:] == 'USD':
        for col in ['Open', 'Close', 'High', 'Low']:
            dataset[col] = 1/dataset[col]
        set_name = 'USD' + set_name[:-3]
    return dataset, set_name

In [6]:
origional_keys = list(conven.keys()).copy()
for set_name in origional_keys:
    conven[set_name] = fill_dataset(conven[set_name], '2009-01-01', '2022-01-01')
    dataset, new_set_name = unify_unit(conven[set_name], set_name)
    if set_name != new_set_name:
        del conven[set_name]
        conven[new_set_name] = dataset

In [7]:
origional_keys = list(crypto.keys()).copy()
for set_name in origional_keys:
    crypto[set_name] = fill_dataset(crypto[set_name], '2009-01-01', '2022-01-01')
    dataset, new_set_name = unify_unit(crypto[set_name], set_name)
    if set_name != new_set_name:
        del crypto[set_name]
        crypto[new_set_name] = dataset

In [12]:
# calculate the returns
# r(t) = ln(p(t)) - ln(p(t-1))
def calculate_returns(dataset, _by='Close', how='mixed'):
    returns = pd.Series(index=[dataset.Date.values[1:]])
    for index, row in dataset.iterrows():
            if index != 0:
                today = dataset.loc[index, _by]
                yesterday = dataset.loc[index-1, _by]
                returns.loc[row['Date']] = np.log(today) - np.log(yesterday)
    
    if how == 'mixed':
        return returns
    elif how == 'positive':
        return pd.Series(data=[a if a > 0 else 0 for a in returns],
                         index=[dataset.Date.values[1:]])
    elif how == 'negative':
        return pd.Series(data=[a if a < 0 else 0 for a in returns],
                         index=[dataset.Date.values[1:]])
    else:
        raise ValueError

In [13]:
# conventional currency returns 
conv_returns = []
for name in conven.keys():
    conv_returns.append(calculate_returns(conven[name], how='mixed'))
conv_returns = pd.concat(conv_returns, axis=1)
conv_returns.columns = conven.keys()
conv_returns.to_csv('/Users/shiyang/Desktop/cryptocurrency/Exp Data/v2/conv_returns.csv')

conv_returns = []
for name in conven.keys():
    conv_returns.append(calculate_returns(conven[name], how='positive'))
conv_returns = pd.concat(conv_returns, axis=1)
conv_returns.columns = conven.keys()
conv_returns.to_csv('/Users/shiyang/Desktop/cryptocurrency/Exp Data/v2/conv_preturns.csv')

conv_returns = []
for name in conven.keys():
    conv_returns.append(calculate_returns(conven[name], how='negative'))
conv_returns = pd.concat(conv_returns, axis=1)
conv_returns.columns = conven.keys()
conv_returns.to_csv('/Users/shiyang/Desktop/cryptocurrency/Exp Data/v2/conv_nreturns.csv')

In [14]:
# cryptocurrency returns
cryp_returns = []
for name in crypto.keys():
    cryp_returns.append(calculate_returns(crypto[name], how='mixed'))
cryp_returns = pd.concat(cryp_returns, axis=1)
cryp_returns.columns = crypto.keys()
cryp_returns.to_csv('/Users/shiyang/Desktop/cryptocurrency/Exp Data/v2/crypto_returns.csv')

cryp_returns = []
for name in crypto.keys():
    cryp_returns.append(calculate_returns(crypto[name], how='positive'))
cryp_returns = pd.concat(cryp_returns, axis=1)
cryp_returns.columns = crypto.keys()
cryp_returns.to_csv('/Users/shiyang/Desktop/cryptocurrency/Exp Data/v2/crypto_preturns.csv')

cryp_returns = []
for name in crypto.keys():
    cryp_returns.append(calculate_returns(crypto[name], how='negative'))
cryp_returns = pd.concat(cryp_returns, axis=1)
cryp_returns.columns = crypto.keys()
cryp_returns.to_csv('/Users/shiyang/Desktop/cryptocurrency/Exp Data/v2/crypto_nreturns.csv')

In [16]:
# calculate volatility
def calculate_volatility(dataset):
    volatility = pd.Series(index=dataset.Date.values)
    for index, row in dataset.iterrows():
        h = np.log(row['High'])
        l = np.log(row['Low'])
        c = np.log(row['Close'])
        o = np.log(row['Open'])
        v = 0.511*(h-l)**2 - 0.019*((c-o)*(h+l-2*o)-2*(h-o)*(l-o)) - 0.383*(c-o)**2
        volatility.loc[row['Date']] = v
    return volatility

In [25]:
conv_volat = []
for name in conven.keys():
    conv_volat.append(calculate_volatility(conven[name]))
conv_volat = pd.concat(conv_volat, axis=1)
conv_volat.columns = conven.keys()
conv_volat.to_csv('/Users/shiyang/Desktop/cryptocurrency/Exp Data/v2/conv_volatility.csv')

cryp_volat = []
for name in crypto.keys():
    cryp_volat.append(calculate_volatility(crypto[name]))
cryp_volat = pd.concat(cryp_volat, axis=1)
cryp_volat.columns = crypto.keys()
cryp_volat.to_csv('/Users/shiyang/Desktop/cryptocurrency/Exp Data/v2/crypto_volatility.csv')

In [36]:
# finally save original exchange rate data
conv_ex = []
for name in conven.keys():
    conv_ex.append(conven[name].Close)
conv_ex = pd.concat(conv_ex, axis=1)
conv_ex.columns = conven.keys()
conv_ex.index = conven[name].Date
conv_ex.to_csv('/Users/shiyang/Desktop/cryptocurrency/Exp Data/v2/conv_exchange.csv')

cryp_ex = []
for name in crypto.keys():
    cryp_ex.append(crypto[name].Close)
cryp_ex = pd.concat(cryp_ex, axis=1)
cryp_ex.columns = crypto.keys()
cryp_ex.index = crypto[name].Date
cryp_ex.to_csv('/Users/shiyang/Desktop/cryptocurrency/Exp Data/v2/crypto_exchange.csv')

In [37]:
cryp_ex

Unnamed: 0_level_0,USDBCH,USDDOGE,USDBTC,USDLTC,USDUSDT,USDETH,USDLINK,USDBNB,USDADA,USDBUSD,USDXRP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2009-01-01,0.002421,3356.831151,0.007451,0.229885,0.826446,0.361011,5.285412,9.442871,40.048058,9.442871,170.010201
2009-01-02,0.002421,3356.831151,0.007451,0.229885,0.826446,0.361011,5.285412,9.442871,40.048058,9.442871,170.010201
2009-01-03,0.002421,3356.831151,0.007451,0.229885,0.826446,0.361011,5.285412,9.442871,40.048058,9.442871,170.010201
2009-01-04,0.002421,3356.831151,0.007451,0.229885,0.826446,0.361011,5.285412,9.442871,40.048058,9.442871,170.010201
2009-01-05,0.002421,3356.831151,0.007451,0.229885,0.826446,0.361011,5.285412,9.442871,40.048058,9.442871,170.010201
...,...,...,...,...,...,...,...,...,...,...,...
2021-12-28,0.002275,5.743825,0.000021,0.006854,1.000000,0.000263,0.049044,0.001869,0.714286,0.001869,1.172333
2021-12-29,0.002325,5.959476,0.000022,0.006872,1.000000,0.000276,0.050761,0.001946,0.751880,0.001946,1.224440
2021-12-30,0.002317,5.837712,0.000021,0.006744,1.000000,0.000269,0.050075,0.001927,0.735294,0.001927,1.191043
2021-12-31,0.002322,5.865103,0.000022,0.006825,1.000000,0.000272,0.051046,0.001954,0.763359,0.001954,1.203080
