# Importing and starting the work on HW2

In [1]:
import pandas as pd
import numpy as np
import requests

In [2]:
def get_data_coin(token):
    res = requests.get(
        f'https://api.cryptowat.ch/markets/coinbase-pro/{token}usd/ohlc',
        params={
            'periods': '3600',
            'after': str(int(pd.Timestamp('2021-11-22').timestamp()))
        }
    )

    df = pd.DataFrame(
        res.json()['result']['3600'],
        columns=['ts', 'open', 'high', 'low', 'close', 'volume', 'volumeUSD']
    )
    df['ts'] = pd.to_datetime(df.ts, unit='s')
    df['token'] = token
    
    return df


In [3]:
tokens = ['ETH', 'SOL', 'AVAX', 'USDT', 'FLOW']

In [4]:
df_eth = get_data_coin('ETH')
df_sol = get_data_coin('SOL')
df_avax = get_data_coin('AVAX')
df_usdt = get_data_coin('USDT')

In [5]:
df_usdt.head()

Unnamed: 0,ts,open,high,low,close,volume,volumeUSD,token
0,2021-11-22 00:00:00,1.001,1.0017,1.0009,1.0012,9987681.09,9999090.0,USDT
1,2021-11-22 01:00:00,1.0012,1.0014,1.0012,1.0013,7766318.34,7775823.0,USDT
2,2021-11-22 02:00:00,1.0012,1.0013,1.0011,1.0012,7979323.48,7988793.0,USDT
3,2021-11-22 03:00:00,1.0012,1.0014,1.001,1.0011,10681505.84,10694580.0,USDT
4,2021-11-22 04:00:00,1.0011,1.0011,1.0008,1.0009,3567405.96,3570749.0,USDT


In [6]:
def get_data_kraken(token):
    res = requests.get(
        f'https://api.cryptowat.ch/markets/kraken/{token}usd/ohlc',
        params={
            'periods': '3600',
            'after': str(int(pd.Timestamp('2021-11-22').timestamp()))
        }
    )

    df = pd.DataFrame(
        res.json()['result']['3600'],
        columns=['ts', 'open', 'high', 'low', 'close', 'volume', 'volumeUSD']
    )
    df['ts'] = pd.to_datetime(df.ts, unit='s')
    df['token'] = token
    
    return df


In [7]:
df_flow = get_data_kraken('FLOW')

In [8]:
df_flow.head()

Unnamed: 0,ts,open,high,low,close,volume,volumeUSD,token
0,2021-11-22 00:00:00,13.887,14.169,13.887,14.15,23261.381369,327475.506965,FLOW
1,2021-11-22 01:00:00,14.053,14.453,13.543,13.621,29108.788932,411767.484368,FLOW
2,2021-11-22 02:00:00,13.534,13.862,13.387,13.753,24737.688935,338660.037818,FLOW
3,2021-11-22 03:00:00,13.753,13.813,13.435,13.652,10224.587462,139852.146013,FLOW
4,2021-11-22 04:00:00,13.652,13.652,13.382,13.581,5998.121286,81263.038401,FLOW


In [9]:
df_list = [df_eth, df_sol, df_avax, df_usdt, df_flow]
df_base = pd.concat(df_list)


In [10]:
df = df_base.set_index('ts')

In [11]:
df.head()

Unnamed: 0_level_0,open,high,low,close,volume,volumeUSD,token
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-11-22 00:00:00,4317.98,4342.24,4246.07,4262.99,7262.562789,31230770.0,ETH
2021-11-22 01:00:00,4263.04,4270.34,4212.45,4234.37,8437.946084,35745860.0,ETH
2021-11-22 02:00:00,4234.87,4246.72,4171.17,4217.89,9259.72537,38956390.0,ETH
2021-11-22 03:00:00,4217.88,4223.48,4163.58,4193.47,9259.899519,38820320.0,ETH
2021-11-22 04:00:00,4192.95,4213.59,4147.0,4168.35,7934.546906,33154760.0,ETH


## Getting total USD volume traded for each token in a dataframe, sorted from highest volume to lowest volume.

In [12]:
df.volumeUSD.describe()

count    3.285000e+03
mean     1.209168e+07
std      2.052207e+07
min      1.970443e+01
25%      1.562095e+06
50%      5.397404e+06
75%      1.370922e+07
max      3.702769e+08
Name: volumeUSD, dtype: float64

In [13]:
total_usd_vol_per_token = df.groupby('token').sum().volumeUSD.sort_values(ascending=False)

In [14]:
total_usd_vol_per_token

token
ETH     2.484148e+10
SOL     8.138634e+09
AVAX    4.398566e+09
USDT    2.245864e+09
FLOW    9.661679e+07
Name: volumeUSD, dtype: float64

## Adding a column that calculates the close price ratio between ETH and SOL for each house (i.e. close price of ETH / close price of SOL for each period)

In [22]:
df

Unnamed: 0_level_0,open,high,low,close,volume,volumeUSD,token
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-11-22 00:00:00,4317.980,4342.240,4246.070,4262.990,7262.562789,3.123077e+07,ETH
2021-11-22 01:00:00,4263.040,4270.340,4212.450,4234.370,8437.946084,3.574586e+07,ETH
2021-11-22 02:00:00,4234.870,4246.720,4171.170,4217.890,9259.725370,3.895639e+07,ETH
2021-11-22 03:00:00,4217.880,4223.480,4163.580,4193.470,9259.899519,3.882032e+07,ETH
2021-11-22 04:00:00,4192.950,4213.590,4147.000,4168.350,7934.546906,3.315476e+07,ETH
...,...,...,...,...,...,...,...
2021-12-19 04:00:00,8.565,8.610,8.565,8.600,1797.236578,1.544555e+04,FLOW
2021-12-19 05:00:00,8.609,8.630,8.609,8.628,2454.143371,2.116960e+04,FLOW
2021-12-19 06:00:00,8.617,8.628,8.582,8.593,878.909600,7.555117e+03,FLOW
2021-12-19 07:00:00,8.580,8.580,8.557,8.566,3754.982212,3.217920e+04,FLOW


### Tried working with this but didn't work for some reason

In [23]:
df_token = df.groupby('token')
df[df_token]

  result = np.asarray(values, dtype=dtype)


TypeError: unhashable type: 'DataFrame'

In [17]:
df[(df['token'] == 'ETH') | (df['token'] == 'SOL')]['close', 'token']

KeyError: ('close', 'token')

In [24]:
df[df['token'] == ('ETH', 'SOL')]

Unnamed: 0_level_0,open,high,low,close,volume,volumeUSD,token
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1


### Will be trying a new approach for the task 

In [29]:
df_eth_token = df[df['token'] == ('ETH')]
df_eth_token

Unnamed: 0_level_0,open,high,low,close,volume,volumeUSD,token
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-11-22 00:00:00,4317.98,4342.24,4246.07,4262.99,7262.562789,3.123077e+07,ETH
2021-11-22 01:00:00,4263.04,4270.34,4212.45,4234.37,8437.946084,3.574586e+07,ETH
2021-11-22 02:00:00,4234.87,4246.72,4171.17,4217.89,9259.725370,3.895639e+07,ETH
2021-11-22 03:00:00,4217.88,4223.48,4163.58,4193.47,9259.899519,3.882032e+07,ETH
2021-11-22 04:00:00,4192.95,4213.59,4147.00,4168.35,7934.546906,3.315476e+07,ETH
...,...,...,...,...,...,...,...
2021-12-19 04:00:00,3945.33,4031.71,3945.00,3990.64,10713.408764,4.277069e+07,ETH
2021-12-19 05:00:00,3990.63,4039.73,3990.63,4010.91,4960.046194,1.989931e+07,ETH
2021-12-19 06:00:00,4010.91,4020.38,3985.13,3990.43,2848.402892,1.139783e+07,ETH
2021-12-19 07:00:00,3989.72,3993.15,3970.01,3972.45,2478.724828,9.875826e+06,ETH


In [30]:
df_sol_token = df[df['token'] == ('SOL')]
df_sol_token

Unnamed: 0_level_0,open,high,low,close,volume,volumeUSD,token
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-11-22 00:00:00,230.483,234.688,228.805,230.785,93531.805,2.172284e+07,SOL
2021-11-22 01:00:00,230.752,232.269,225.052,226.000,88840.999,2.017188e+07,SOL
2021-11-22 02:00:00,225.952,227.996,223.812,227.037,95224.398,2.153562e+07,SOL
2021-11-22 03:00:00,227.012,227.485,221.900,223.117,73456.342,1.646345e+07,SOL
2021-11-22 04:00:00,223.129,224.545,220.080,220.652,78399.621,1.742352e+07,SOL
...,...,...,...,...,...,...,...
2021-12-19 04:00:00,181.370,184.970,180.690,184.630,55973.706,1.026771e+07,SOL
2021-12-19 05:00:00,184.660,186.960,184.270,186.740,42932.460,7.967289e+06,SOL
2021-12-19 06:00:00,186.740,187.910,185.450,185.800,41907.455,7.829859e+06,SOL
2021-12-19 07:00:00,185.780,187.490,185.580,187.360,22181.303,4.144035e+06,SOL


In [37]:
df_close_eth_sol = (df_eth['close'] / df_sol['close'])
df_close_eth_sol

0      18.471694
1      18.736150
2      18.577985
3      18.794937
4      18.891059
         ...    
652    21.614256
653    21.478580
654    21.477018
655    21.202231
656    21.205261
Name: close, Length: 657, dtype: float64

In [44]:
df_eth[['ts', 'close']]

Unnamed: 0,ts,close
0,2021-11-22 00:00:00,4262.99
1,2021-11-22 01:00:00,4234.37
2,2021-11-22 02:00:00,4217.89
3,2021-11-22 03:00:00,4193.47
4,2021-11-22 04:00:00,4168.35
...,...,...
652,2021-12-19 04:00:00,3990.64
653,2021-12-19 05:00:00,4010.91
654,2021-12-19 06:00:00,3990.43
655,2021-12-19 07:00:00,3972.45


In [56]:
df_eth_close = df_eth[['ts', 'close']].set_index('ts')
df_sol_close = df_sol[['ts', 'close']].set_index('ts')

df_close_eth_sol = df_eth_close.div(df_sol_close)
df_close_eth_sol

Unnamed: 0_level_0,close
ts,Unnamed: 1_level_1
2021-11-22 00:00:00,18.471694
2021-11-22 01:00:00,18.736150
2021-11-22 02:00:00,18.577985
2021-11-22 03:00:00,18.794937
2021-11-22 04:00:00,18.891059
...,...
2021-12-19 04:00:00,21.614256
2021-12-19 05:00:00,21.478580
2021-12-19 06:00:00,21.477018
2021-12-19 07:00:00,21.202231


### Not really sure from the assignment whenever this is required

In [64]:
df_with_eth_sol = df

In [66]:
df_close_eth_sol['token'] = 'ETH/SOL'

In [68]:
df_with_eth_sol.append(df_close_eth_sol)

Unnamed: 0_level_0,open,high,low,close,volume,volumeUSD,token
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-11-22 00:00:00,4317.98,4342.24,4246.07,4262.990000,7262.562789,3.123077e+07,ETH
2021-11-22 01:00:00,4263.04,4270.34,4212.45,4234.370000,8437.946084,3.574586e+07,ETH
2021-11-22 02:00:00,4234.87,4246.72,4171.17,4217.890000,9259.725370,3.895639e+07,ETH
2021-11-22 03:00:00,4217.88,4223.48,4163.58,4193.470000,9259.899519,3.882032e+07,ETH
2021-11-22 04:00:00,4192.95,4213.59,4147.00,4168.350000,7934.546906,3.315476e+07,ETH
...,...,...,...,...,...,...,...
2021-12-19 04:00:00,,,,21.614256,,,ETH/SOL
2021-12-19 05:00:00,,,,21.478580,,,ETH/SOL
2021-12-19 06:00:00,,,,21.477018,,,ETH/SOL
2021-12-19 07:00:00,,,,21.202231,,,ETH/SOL


## Changing the name of the `volume` and `volumeUSD` columns to `volumeBase` and `volumeTerm`

In [57]:
df.head()

Unnamed: 0_level_0,open,high,low,close,volume,volumeUSD,token
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-11-22 00:00:00,4317.98,4342.24,4246.07,4262.99,7262.562789,31230770.0,ETH
2021-11-22 01:00:00,4263.04,4270.34,4212.45,4234.37,8437.946084,35745860.0,ETH
2021-11-22 02:00:00,4234.87,4246.72,4171.17,4217.89,9259.72537,38956390.0,ETH
2021-11-22 03:00:00,4217.88,4223.48,4163.58,4193.47,9259.899519,38820320.0,ETH
2021-11-22 04:00:00,4192.95,4213.59,4147.0,4168.35,7934.546906,33154760.0,ETH


In [61]:
df_renamed = df.rename(
    columns={
        'volume':'volumeBase',
        'volumeUSD':'volumeTerm'
    }
)

In [62]:
df_renamed.head()

Unnamed: 0_level_0,open,high,low,close,volumeBase,volumeTerm,token
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-11-22 00:00:00,4317.98,4342.24,4246.07,4262.99,7262.562789,31230770.0,ETH
2021-11-22 01:00:00,4263.04,4270.34,4212.45,4234.37,8437.946084,35745860.0,ETH
2021-11-22 02:00:00,4234.87,4246.72,4171.17,4217.89,9259.72537,38956390.0,ETH
2021-11-22 03:00:00,4217.88,4223.48,4163.58,4193.47,9259.899519,38820320.0,ETH
2021-11-22 04:00:00,4192.95,4213.59,4147.0,4168.35,7934.546906,33154760.0,ETH


## Creating a fat table indexed by the timestamp, and each column is the close price of each token (i.e. this should be a table of  200 rows and 5 columns)

In [69]:
df.head()

Unnamed: 0_level_0,open,high,low,close,volume,volumeUSD,token
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-11-22 00:00:00,4317.98,4342.24,4246.07,4262.99,7262.562789,31230770.0,ETH
2021-11-22 01:00:00,4263.04,4270.34,4212.45,4234.37,8437.946084,35745860.0,ETH
2021-11-22 02:00:00,4234.87,4246.72,4171.17,4217.89,9259.72537,38956390.0,ETH
2021-11-22 03:00:00,4217.88,4223.48,4163.58,4193.47,9259.899519,38820320.0,ETH
2021-11-22 04:00:00,4192.95,4213.59,4147.0,4168.35,7934.546906,33154760.0,ETH


In [77]:
df_close_tokens = df

In [78]:
df_close_tokens[['close', 'token']]

Unnamed: 0_level_0,close,token
ts,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-11-22 00:00:00,4262.990,ETH
2021-11-22 01:00:00,4234.370,ETH
2021-11-22 02:00:00,4217.890,ETH
2021-11-22 03:00:00,4193.470,ETH
2021-11-22 04:00:00,4168.350,ETH
...,...,...
2021-12-19 04:00:00,8.600,FLOW
2021-12-19 05:00:00,8.628,FLOW
2021-12-19 06:00:00,8.593,FLOW
2021-12-19 07:00:00,8.566,FLOW


In [222]:
df_close_tokens_seperate = pd.DataFrame()

In [223]:
df_close_tokens_seperate['ETH'] = df_close_tokens['close'][df_close_tokens['token']=='ETH']
df_close_tokens_seperate['SOL'] = df_close_tokens['close'][df_close_tokens['token']=='SOL']
df_close_tokens_seperate['AVAX'] = df_close_tokens['close'][df_close_tokens['token']=='AVAX']
df_close_tokens_seperate['USDT'] = df_close_tokens['close'][df_close_tokens['token']=='USDT']
df_close_tokens_seperate['FLOW'] = df_close_tokens['close'][df_close_tokens['token']=='FLOW']

In [224]:
df_close_tokens_seperate

Unnamed: 0_level_0,ETH,SOL,AVAX,USDT,FLOW
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-11-22 00:00:00,4262.99,230.785,128.67,1.0012,14.150
2021-11-22 01:00:00,4234.37,226.000,134.85,1.0013,13.621
2021-11-22 02:00:00,4217.89,227.037,132.54,1.0012,13.753
2021-11-22 03:00:00,4193.47,223.117,133.38,1.0011,13.652
2021-11-22 04:00:00,4168.35,220.652,136.29,1.0009,13.581
...,...,...,...,...,...
2021-12-19 04:00:00,3990.64,184.630,115.09,1.0004,8.600
2021-12-19 05:00:00,4010.91,186.740,114.73,1.0004,8.628
2021-12-19 06:00:00,3990.43,185.800,113.62,1.0004,8.593
2021-12-19 07:00:00,3972.45,187.360,113.60,1.0002,8.566


In [225]:
df_log_return = df_close_tokens_seperate

In [226]:
df_log_return['ETH'][0]

4262.99

In [227]:
df_log_return

Unnamed: 0_level_0,ETH,SOL,AVAX,USDT,FLOW
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-11-22 00:00:00,4262.99,230.785,128.67,1.0012,14.150
2021-11-22 01:00:00,4234.37,226.000,134.85,1.0013,13.621
2021-11-22 02:00:00,4217.89,227.037,132.54,1.0012,13.753
2021-11-22 03:00:00,4193.47,223.117,133.38,1.0011,13.652
2021-11-22 04:00:00,4168.35,220.652,136.29,1.0009,13.581
...,...,...,...,...,...
2021-12-19 04:00:00,3990.64,184.630,115.09,1.0004,8.600
2021-12-19 05:00:00,4010.91,186.740,114.73,1.0004,8.628
2021-12-19 06:00:00,3990.43,185.800,113.62,1.0004,8.593
2021-12-19 07:00:00,3972.45,187.360,113.60,1.0002,8.566


In [228]:
for token in df_log_return.columns:
    df_log_return[token + '_log_return'] = np.nan
    for i in range(1,len(df_log_return)):
        df_log_return[token + '_log_return'][i] = np.log(df_log_return[token][i] / [df_log_return[token][i-1]])


In [229]:
df_log_return

Unnamed: 0_level_0,ETH,SOL,AVAX,USDT,FLOW,ETH_log_return,SOL_log_return,AVAX_log_return,USDT_log_return,FLOW_log_return
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-11-22 00:00:00,4262.99,230.785,128.67,1.0012,14.150,,,,,
2021-11-22 01:00:00,4234.37,226.000,134.85,1.0013,13.621,-0.006736,-0.020952,0.046912,0.0001,-0.038102
2021-11-22 02:00:00,4217.89,227.037,132.54,1.0012,13.753,-0.003900,0.004578,-0.017279,-0.0001,0.009644
2021-11-22 03:00:00,4193.47,223.117,133.38,1.0011,13.652,-0.005806,-0.017417,0.006318,-0.0001,-0.007371
2021-11-22 04:00:00,4168.35,220.652,136.29,1.0009,13.581,-0.006008,-0.011109,0.021583,-0.0002,-0.005214
...,...,...,...,...,...,...,...,...,...,...
2021-12-19 04:00:00,3990.64,184.630,115.09,1.0004,8.600,0.011467,0.017649,0.002871,0.0000,0.005480
2021-12-19 05:00:00,4010.91,186.740,114.73,1.0004,8.628,0.005067,0.011363,-0.003133,0.0000,0.003251
2021-12-19 06:00:00,3990.43,185.800,113.62,1.0004,8.593,-0.005119,-0.005046,-0.009722,0.0000,-0.004065
2021-12-19 07:00:00,3972.45,187.360,113.60,1.0002,8.566,-0.004516,0.008361,-0.000176,-0.0002,-0.003147


In [236]:
for token in df_log_return.columns:
    [token +'_mean'] = df_log_return[token + '_log_return'].mean()

SyntaxError: cannot assign to operator (1554505083.py, line 2)

In [231]:
df_log_return.fillna(
    for token in df_log_return.columns:
        df_log_return[token + '_log_return'].mean())

SyntaxError: invalid syntax (1335092893.py, line 1)

In [246]:
df_log_return.columns[5:].tolist()

['ETH_log_return',
 'SOL_log_return',
 'AVAX_log_return',
 'USDT_log_return',
 'FLOW_log_return']

In [250]:
df_log_return[df_log_return.columns[5:].tolist()].corr

<bound method DataFrame.corr of                      ETH_log_return  SOL_log_return  AVAX_log_return  \
ts                                                                     
2021-11-22 00:00:00             NaN             NaN              NaN   
2021-11-22 01:00:00       -0.006736       -0.020952         0.046912   
2021-11-22 02:00:00       -0.003900        0.004578        -0.017279   
2021-11-22 03:00:00       -0.005806       -0.017417         0.006318   
2021-11-22 04:00:00       -0.006008       -0.011109         0.021583   
...                             ...             ...              ...   
2021-12-19 04:00:00        0.011467        0.017649         0.002871   
2021-12-19 05:00:00        0.005067        0.011363        -0.003133   
2021-12-19 06:00:00       -0.005119       -0.005046        -0.009722   
2021-12-19 07:00:00       -0.004516        0.008361        -0.000176   
2021-12-19 08:00:00        0.002542        0.002399         0.004830   

                     USDT_log_r