# Importing Necessary Libraries and Data

In [None]:
import pandas as pd
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
pio.templates.default = 'plotly_dark'
# pio.renderers.default = 'png'
def custom(t, x, y):
    fig.update_layout(
        title = t,
        xaxis_title = x,
        yaxis_title = y,
        font_color = '#ff8c00',
        title_font_color = '#00fe35'
    )

In [None]:
df = pd.read_csv(
    filepath_or_buffer = '../Data/all_currencies_clean_table.csv',
    index_col = ['Name', 'Symbol']
)

In [None]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,MarketCap,Price,Circulate,Volume,Hourly,Daily,Weekly
Name,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
bitcoin,BTC,150703000000.0,8940.74,16855760.0,8103300000.0,-0.63,11.22,3.58
ethereum,ETH,87546970000.0,897.729,97520490.0,3062570000.0,-0.41,11.99,-0.25
ripple,XRP,40627710000.0,1.04149,39009220000.0,2504810000.0,-0.37,32.86,23.94
bitcoincash,BCH,22946390000.0,1353.04,16959140.0,1149550000.0,0.11,7.89,18.32
cardano,ADA,11637290000.0,0.448847,25927070000.0,760372000.0,-0.71,27.62,23.61


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 1142 entries, ('bitcoin', 'BTC') to ('enigmaproject', 'XNG')
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   MarketCap  1142 non-null   float64
 1   Price      1142 non-null   float64
 2   Circulate  1142 non-null   float64
 3   Volume     1142 non-null   float64
 4   Hourly     1142 non-null   float64
 5   Daily      1142 non-null   float64
 6   Weekly     1142 non-null   float64
dtypes: float64(7)
memory usage: 149.3+ KB


In the above dataset, we can see data about 1142 different cryptocurrencies at a static point in time (i.e. 02/09/2018 12:10pm).  
Which makes all the datapoints unrelated except for the fact that all of them represent a certain cryptocurrency.  
Thus one could say that all this data picked feature-wise would be discrete data.

In [None]:
tr = pd.read_csv(
    filepath_or_buffer = '../Data/clean_cryptocurrency_prices_by_date.csv',
    index_col = 0,
    parse_dates = [2],
    infer_datetime_format = '%d-%m-%Y %H:%M:%S',
    dayfirst = True
)

In [None]:
tr.head()

Unnamed: 0,Coin,Date,Price
0,0x,2017-08-16 14:09:21,0.111725
1,0x,2017-08-17 15:05:05,0.211486
2,0x,2017-08-18 15:04:52,0.283789
3,0x,2017-08-19 15:05:08,0.511434
4,0x,2017-08-20 15:05:03,0.429522


In [None]:
tr.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 570927 entries, 0 to 570926
Data columns (total 3 columns):
 #   Column  Non-Null Count   Dtype         
---  ------  --------------   -----         
 0   Coin    570927 non-null  object        
 1   Date    570927 non-null  datetime64[ns]
 2   Price   570927 non-null  float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 17.4+ MB


In [None]:
tr.head()

Unnamed: 0,Coin,Date,Price
0,0x,2017-08-16 14:09:21,0.111725
1,0x,2017-08-17 15:05:05,0.211486
2,0x,2017-08-18 15:04:52,0.283789
3,0x,2017-08-19 15:05:08,0.511434
4,0x,2017-08-20 15:05:03,0.429522


In this dataset, we can see how price changes over a certain period of time where the cryptocurrency exists.  
Our time period of interest being 2014-2018 as that is when the cryptocurrency market boomed.  

In [None]:
outliers = ['bitcoin', 'ethereum', 'ripple', 'bitcoincash', 'bit20', 'projectx', '42coin', 'sprouts', 'paccoin', 'kin', 'dimecoin', 'fedoracoin', 'unityingot', 'ecoin']

These are all the observed outliers based on the first three features.  
And two observational ones from the trend visualizations which are unityingot and ecoin because of their magnanimous change percentages.

In [None]:
df = df.loc[list(filter(lambda l: l not in outliers, df.index.get_level_values(0)))]
name = pd.Series(df.index.get_level_values(0))
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,MarketCap,Price,Circulate,Volume,Hourly,Daily,Weekly
Name,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
cardano,ADA,11637290000.0,0.448847,25927070000.0,760372000.0,-0.71,27.62,23.61
litecoin,LTC,9109557000.0,165.179,55149610.0,769065000.0,-1.62,13.37,28.81
stellar,XLM,8047186000.0,0.436583,18432200000.0,212032000.0,0.05,21.88,14.83
neo,NEO,7740330000.0,119.082,65000000.0,304126000.0,-1.08,6.72,2.78
eos,EOS,6547181000.0,9.92927,659381900.0,606048000.0,-1.05,19.34,6.47


## The Appropriate Features

### MarketCap

In [None]:
fig = px.violin(
    data_frame = df,
    x = 'MarketCap',
    hover_data = df,
    hover_name = name
)
custom(
    x = 'Market Capitalization',
    y = 'Density of Cryptocurrencies',
    t = 'Violin Plot of Market Capitalization for all the Cryptocurrencies'
)
fig.update_xaxes(
    rangeslider_visible = True,
    rangeslider_bgcolor = '#202020',
    range = [-3e8, 2e9]
)
fig.show()

### Price

In [None]:
fig = px.violin(
    data_frame = df,
    x = 'Price',
    hover_data = df,
    hover_name = name
)
custom(
    t = 'Violin Plot of the Prices of all the Cryptocurrencies',
    x = 'Price (in USD)',
    y = 'Density of Cryptocurrencies'
)
fig.update_xaxes(
    rangeslider_visible = True,
    rangeslider_bgcolor = '#202020',
    range = [-3e2, 2e3]
)
fig.show()

### Circulating Supply

In [None]:
fig = px.violin(
    data_frame = df,
    x = 'Circulate',
    hover_data = df,
    hover_name = name
)
custom(
    t = 'Histogram of the Circulating Supply of all the Cryptocurrencies',
    x = 'Circulating Supply',
    y = 'Number of Cryptocurrencies'
)
fig.update_xaxes(
    rangeslider_visible = True,
    rangeslider_bgcolor = '#202020',
    range = [-7e9, 38e9]
)
fig.show()

After the removal of the 12 outliers, our data looks quite similar even though a bit more spread out.  
This is because there will always be values that easily dominate the rest because of the pareto principle.

## Price Percentage Changes (Hourly, Daily and Weekly trend)

In [None]:
trends = ['Hourly', 'Daily', 'Weekly']
fig = make_subplots(
    cols = 3,
    shared_yaxes = True,
    horizontal_spacing = 0,
    subplot_titles = trends
)
for i in range(3):
    period = trends[i]
    trend = df[period]
    fig.add_bar(
        x = trend,
        y = name,
        name = i,
        orientation = 'h',
        showlegend = False,
        marker_color = trend.apply(lambda l: 'green' if l > 0 else 'red'),
        row = 1,
        col = i + 1
    )
fig.update_layout(
    height = 4000,
    width = 1369,
    title = 'Percentage Change in the Price of Coin',
    font_color = '#ff8c00',
    title_font_color = '#00fe35'
)
fig.show()

From this graph, we can see, that on an hourly basis, the market is very bearish, since we have more red bars than green.  
However on a daily basis, the market is extremely bullish, since we have way more greens than reds and there is a lot of strength in the trend.  
On a weekly basis, we have almost equal number of reds and greens, but the greens are way taller than the reds.  
This implies that the market is following a slightly weak bullish trend.  

In [None]:
def pieplot(name):
    value = (df[name] > 0).value_counts()
    return [value[True], value[False]]
fig = make_subplots(
    cols = 3,
    subplot_titles = ['Hourly', 'Daily', 'Weekly'],
    specs = [[{'type':'domain'}, {'type':'domain'}, {'type':'domain'}]]
)
for i in range(3):
    fig.add_pie(
        values = pieplot(trends[i]),
        labels = ['Profit', 'Loss'],
        marker_colors = ['green', 'red'],
        name = trends[i],
        row = 1,
        col = i + 1
    )
fig.update_layout(
    title = 'Profit-Loss Pie Plots',
    font_color = '#ff8c00',
    title_font_color = '#00fe35'
)
fig.show()

## Volatility

In [None]:
volatility = name.apply(lambda l: tr[tr.Coin == l].Price.std())
df['Volatility'] = volatility.values
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,MarketCap,Price,Circulate,Volume,Hourly,Daily,Weekly,Volatility
Name,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
cardano,ADA,11637290000.0,0.448847,25927070000.0,760372000.0,-0.71,27.62,23.61,0.310058
litecoin,LTC,9109557000.0,165.179,55149610.0,769065000.0,-1.62,13.37,28.81,45.002213
stellar,XLM,8047186000.0,0.436583,18432200000.0,212032000.0,0.05,21.88,14.83,0.100708
neo,NEO,7740330000.0,119.082,65000000.0,304126000.0,-1.08,6.72,2.78,34.8425
eos,EOS,6547181000.0,9.92927,659381900.0,606048000.0,-1.05,19.34,6.47,4.345754


In the above code cell, we calculate the volatility of each cryptocurrency.  
We calculate it by taking the standard deviation of the price over their respective active time period.

In [None]:
fig = px.violin(
    data_frame = df,
    x = 'Volatility',
    hover_data = df,
    hover_name = name
)
custom(
    t = 'Violin Plot of the Volatility of all the Cryptocurrencies',
    x = 'Volatility (in USD)',
    y = 'Density of Cryptocurrencies'
)
fig.update_xaxes(
    rangeslider_visible = True,
    rangeslider_bgcolor = '#202020',
    range = [-150, 500]
)
fig.show()

In [None]:
fig = px.imshow(
    df.corr(),
    color_continuous_scale = 'algae'
)
fig.update_layout(
    title = 'Heatmap of the Correlation Matrix of our dataset',
    xaxis_title = 'Features',
    yaxis_title = 'Features',
    font_color = '#ff8c00',
    title_font_color = '#00fe35'
)
fig.show()

- From the above _Heatmap_ we can observe 3 pairs with high correlation. The highest correlation is between the features __Price__ and __Volatility__, followed by the correlation between __Volume__ and __Market Capitalization__ and the last being __Daily Price change__ and __Weekly Change.__  
- We observe very high correlation between __Price__ and __Volatility__. A possible reason could be that popular coins generally have a high __Volatility__, since people from all corners of the world, at all times of the day trade them. And we can expect the higher __Price__ as a direct consequence of high popularity.  
- High correlation between __Volume__ and __Market Capitalization__ stems from the fact that __Market Capitalization__ is the product of the total number of coins in __Circulation__ and the __Price__ of the coin in the market.  
- Correlation between __Daily Price Change__ and __Weekly Price Change__ is relatively low but substantial enough. The most possible reason can be that our dataset encompasses the late 2017 period, which was the time when cryptocurrencies became very popular. And since that period was very bullish, a coin which experiences a rising trend on a __Weekly__ basis was ought to experience a rising trend on a __Daily__ basis. Hence we have a slight correlation between the two features.

## Price vs Volatility

In [None]:
fig = px.scatter(
    df,
    x = 'Price',
    y = 'Volatility',
    hover_data = df,
    hover_name = name,
    trendline='ols'
)
custom(
    t = 'Price vs Volatility analysis of all the Cryptocurrencies',
    x = 'Price of Cryptocurrencies (in USD)',
    y = 'Volatility of cryptocurrencies (in USD)'
)
fig.update_xaxes(range = [0, 1200])
fig.update_yaxes(range = [0, 500])
fig.show()

Here, as we can observe the pattern between the price and volatility, the majority of the data is linearly related with the representation roughtly showing (2y = x). But there are exceptions, mostly the outliers with price value greater than 4k which dont fit in to this linear pattern.There are many possible factors why volatility is proportional to price. Popularity is a highly possible reason. Cryptocurrency markets are open 24-7. Therefore there are people all across the globe trading cryptocurrencies at almost all times of the day, depending on their time-zones. So the more the popular a cryptocurrency is, the more it will be traded by people. Which means, we have more investors in the popular currencies( like BTC and ETH), making them more expensive.

## Market Capitalization vs Volume

In [None]:
fig = px.scatter(df, x = 'MarketCap', y = 'Volume', color_continuous_scale = 'algae')
custom(
    t = 'Maket Cap. vs Volume of all the Cryptocurrencies',
    x = 'Volume of Cryptocurrencies (in USD)',
    y = 'Market Cap. of cryptocurrencies (in USD)'
)
fig.show()

In this graph of Market cap. and Price, we can observe the datapoints grouped towards the origin of the graph. on having a overall look including the outliers this looks like a linear pattern with very low slope. But if we observe the grouped data points the pattern is vsisble with a linear line going parallel to x-axis.

The reason of this pattern is that most of our cryptocurrencies lie between 0 and 0.5 billion and contains a varity of market caps ranging from some thousands to millions, thus showing a stright line parallel to x axis.

## Daily vs Weekly

In [None]:
px.scatter(df, x = 'Daily', y = 'Weekly')

Daily and Weekly had a low, correlation. This is vissible in the scatter plot above. The only reason that we see a slight correlation is that, our data has the time period when cryptocurrencies became very popular, and the markets became very bullish. Hence both weekly trends and daily trends are bullish in nature, which explain the slight correlation.

# Summary

1.Volatility and price of a crypto currency are highly correlated because both independently depend on the popularity of the coin.

2.Volume and Market Cap. have a high correlation, as the formula for Market Cap. is (Voume * Price).

3.The time stamp at which the data was collected is having a bearish performance on hourly rate, a very bullish performance in daily rate and almost balanced    performance in the weekly rate.

4.both weekly trends and daily trends are weakly correalated, because the overall market trend is bullish.

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=c7c21189-3596-4bb0-b26b-62d9c9762408' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>