# The social aspect of  cryptocurrencies
This app analyzes the connections between cryptocurrencies' market capitalizaion and social activity. Specifically, this app explores the relation between market cap, reddit subscribers, twitter followers, and alexa rank for the top 100 cryptocurencies


### How does exchange listing affect the price of a coin?
#### Will coins listed in more exchanges have a higher appreciation in price?


### From the top 100 coins, how many coins have appreciated double digits? triple? quadruple?

### Does country of origin affect price of coin?


### What is the relationship between market cap and percentage gains in the last 3 years?

### How has internet interactions changed over the last 3 years for cryptocurrencies?

#### Look at twitter followers for accounts, re

In [1]:
%matplotlib notebook

In [2]:
#import dependencies
import numpy as np
import pandas as pd
import requests
import time
from scipy import stats
from pprint import pprint
from matplotlib import pyplot as plt
import os
import seaborn as sns

In [3]:
# Import csv files and convert them to Data frames

# Set up paths
crypto_path = os.path.join("data", "cryptocurrencies_data_2021.csv")
social_2018_path = os.path.join("data", "2018_social.csv")
social_2021_path = os.path.join("data", "2021_social.csv")

# Load first csv and drop extra column "Unnamed: 0"
crypto_data_2021_df = pd.read_csv(crypto_path).drop(columns='Unnamed: 0')
crypto_data_2021_df.nunique()

id                          2250
symbol                      2182
price_04-22-21              2116
market_cap_billion(2021)    2250
market_cap_rank             2250
ath                         2003
ath_change_percentage       2245
ath_date                    1980
atl                         2151
atl_change_percentage       2247
atl_date                    2198
dtype: int64

In [99]:
# Load 2018 social_metrics csv and make it a data frame
social_2018_df = pd.read_csv(social_2018_path).drop(columns='Unnamed: 0')

In [96]:
social_2018_df.nunique()

id                               328
price 01-04-2018                 104
market_cap_2018(millions)        100
reddit_subscribers 01-04-2018     92
twitter_followers 01-04-2018     103
alexa_rank 01-04-2018            100
dtype: int64

In [6]:
# Load 2021 social metrics csv, make it a dataframe and drop unnamed column
social_2021_df = pd.read_csv(social_2021_path).drop(columns='Unnamed: 0')
social_2021_df.nunique()

id                               204
price 01-04-2021                 200
market_cap_2021(millions)        195
reddit_subscribers 01-04-2021    128
twitter_followers 01-04-2021     171
alexa_rank 01-04-2021            185
dtype: int64

In [97]:
# Clean 2018 Data frame by dropping NAN values
clean_2018_df = social_2018_df.dropna()
clean_2018_df.nunique()

id                               89
price 01-04-2018                 89
market_cap_2018(millions)        86
reddit_subscribers 01-04-2018    89
twitter_followers 01-04-2018     89
alexa_rank 01-04-2018            88
dtype: int64

In [100]:
social_2018_df.head()

Unnamed: 0,id,price 01-04-2018,market_cap_2018(millions),reddit_subscribers 01-04-2018,twitter_followers 01-04-2018,alexa_rank 01-04-2018
0,bitcoin,6975.275314,118234.055447,785619.0,819312.0,5574.0
1,ethereum,395.796381,39000.868214,348116.0,382159.0,8204.0
2,binancecoin,11.115511,1100.591194,40605.0,176.0,230.0
3,ripple,0.51199,20016.003614,184063.0,843082.0,5224.0
4,tether,1.00069,2288.718941,,23302.0,34929.0


In [101]:
#clean_2018_df['market_cap_2018(millions)'] = clean_2018_df['market_cap_2018(millions)'] / 1000
#clean_2018_df.rename(columns = {'market_cap_2018(millions)' : 'market_cap_2018(millions)'}, inplace=True)
clean_2018_df.head()

Unnamed: 0,id,price 01-04-2018,market_cap_2018(millions),reddit_subscribers 01-04-2018,twitter_followers 01-04-2018,alexa_rank 01-04-2018
0,bitcoin,6975.275314,118234.055447,785619.0,819312.0,5574.0
1,ethereum,395.796381,39000.868214,348116.0,382159.0,8204.0
2,binancecoin,11.115511,1100.591194,40605.0,176.0,230.0
3,ripple,0.51199,20016.003614,184063.0,843082.0,5224.0
5,cardano,0.155532,4032.488129,60486.0,124345.0,13650.0


In [92]:
# Clean 2021 Data frame by dropping NAN values
clean_2021_df = social_2021_df.dropna()
clean_2021_df

Unnamed: 0,id,price 01-04-2021,market_cap_2021(millions),reddit_subscribers 01-04-2021,twitter_followers 01-04-2021,alexa_rank 01-04-2021
0,bitcoin,58817.373888,1.098127e+06,2648989.0,83714.0,5018.0
1,ethereum,1915.832536,2.212107e+05,753614.0,852223.0,6152.0
3,ripple,0.573818,2.634585e+04,278986.0,1310270.0,11412.0
5,cardano,1.192192,3.826120e+04,302631.0,297688.0,8118.0
6,dogecoin,0.054382,7.014690e+03,1229059.0,686374.0,11467.0
...,...,...,...,...,...,...
195,aragon,10.851830,3.801019e+02,2912.0,81289.0,113796.0
198,metadium,0.354968,5.731558e+02,206.0,826.0,586913.0
199,bitshares,0.098297,2.686574e+02,7029.0,3523.0,111487.0
202,velas,0.161867,2.221146e+02,93.0,18798.0,168994.0


In [10]:
def bar_plot(data_frame, x_values,y_column, color='b', fig_size = [10,5]):
    # Define size of figure
    plt.figure(figsize = fig_size)
    # arrange x_axis values based in the lenght of the Data Frame
    x_axis = np.arange(len(data_frame))
    # Use a list comprehension to adjust x ticks location if needed
    tick_locations = [value for value in x_axis]
    # define y values
    y_values = data_frame[f"{y_column}"]
    # plot x and y values, 
    plt.bar(x_axis, y_values, alpha=0.5, color=f'{color}')
    # place x ticks at the locations we calculated earlier, utilize the x values as labels, rotate and resize ticks
    plt.xticks(tick_locations, data_frame[f"{x_values}"], rotation='vertical', size=9)
    # label graph
    plt.xlabel("Cryptocurrency")
    plt.ylabel(f"{y_column}")
    title = plt.title(f"{y_column} vs {x_values}")
    # annotate figures with twitter handle because thieves
    plt.annotate("@johannvillalvir", [50, 700000], color="b", size=24)
    # make the layout fit in the screen size
    plt.tight_layout()
    # show the grapgh
    plt.show()



In [11]:
# What is the relationship between social media activity and market cap in the last 3 years?

# Make a graph for social metrics for 2018
# X axis is sorted by market cap in descending from left to right
twitter_metrics = bar_plot(clean_2018_df, 'id', 'twitter_followers 01-04-2018', fig_size =[25, 10])
plt.savefig("images/2018_twitter.png")


<IPython.core.display.Javascript object>

In [12]:
reddit_metrics = bar_plot(clean_2018_df,'id', 'reddit_subscribers 01-04-2018', 'red', [25, 10])
plt.savefig("images/2018_reddit.png")


<IPython.core.display.Javascript object>

In [13]:
alexa_metrics = bar_plot(clean_2018_df, 'id', 'alexa_rank 01-04-2018', 'green', [25, 10])
#plt.savefig("images/2018_alexa_rank.png")

<IPython.core.display.Javascript object>

In [15]:
bar_plot(clean_2021_df, 'id', 'reddit_subscribers 01-04-2021', 'r', [25, 10])
#plt.savefig("images/2021_reddit.png")

<IPython.core.display.Javascript object>

In [16]:
bar_plot(clean_2021_df, 'id', 'twitter_followers 01-04-2021', fig_size=[25,10]) 
#plt.savefig("images/2021_twitter.png")

<IPython.core.display.Javascript object>

In [17]:
bar_plot(clean_2021_df, 'id', 'alexa_rank 01-04-2021', 'g', [25,10])
#plt.savefig("images/2021_alexa.png")

<IPython.core.display.Javascript object>

In [103]:
# merge both data frames to get full picture of social metrics between 2018 and 2021
social_metrics_merged_df = pd.merge(clean_2018_df, clean_2021_df,on="id", how="right")
#social_metrics_merged_df.dropna(inplace=True)
social_metrics_merged_df.head()

Unnamed: 0,id,price 01-04-2018,market_cap_2018(millions),reddit_subscribers 01-04-2018,twitter_followers 01-04-2018,alexa_rank 01-04-2018,price 01-04-2021,market_cap_2021(millions),reddit_subscribers 01-04-2021,twitter_followers 01-04-2021,alexa_rank 01-04-2021
0,bitcoin,6975.275314,118234.055447,785619.0,819312.0,5574.0,58817.373888,1098127.0,2648989.0,83714.0,5018.0
1,ethereum,395.796381,39000.868214,348116.0,382159.0,8204.0,1915.832536,221210.7,753614.0,852223.0,6152.0
2,ripple,0.51199,20016.003614,184063.0,843082.0,5224.0,0.573818,26345.85,278986.0,1310270.0,11412.0
3,cardano,0.155532,4032.488129,60486.0,124345.0,13650.0,1.192192,38261.2,302631.0,297688.0,8118.0
4,dogecoin,0.002799,318.539152,115819.0,208970.0,31077.0,0.054382,7014.69,1229059.0,686374.0,11467.0


In [104]:
clean_social_metrics_merged_df = social_metrics_merged_df.dropna()
clean_social_metrics_merged_df.head()

Unnamed: 0,id,price 01-04-2018,market_cap_2018(millions),reddit_subscribers 01-04-2018,twitter_followers 01-04-2018,alexa_rank 01-04-2018,price 01-04-2021,market_cap_2021(millions),reddit_subscribers 01-04-2021,twitter_followers 01-04-2021,alexa_rank 01-04-2021
0,bitcoin,6975.275314,118234.055447,785619.0,819312.0,5574.0,58817.373888,1098127.0,2648989.0,83714.0,5018.0
1,ethereum,395.796381,39000.868214,348116.0,382159.0,8204.0,1915.832536,221210.7,753614.0,852223.0,6152.0
2,ripple,0.51199,20016.003614,184063.0,843082.0,5224.0,0.573818,26345.85,278986.0,1310270.0,11412.0
3,cardano,0.155532,4032.488129,60486.0,124345.0,13650.0,1.192192,38261.2,302631.0,297688.0,8118.0
4,dogecoin,0.002799,318.539152,115819.0,208970.0,31077.0,0.054382,7014.69,1229059.0,686374.0,11467.0


In [90]:
# create a function to do a scatter plot
def scatter_plot(dataframe, x_values, y_values, fig_size=[9,5], color='red', edgecolor='black', marker='o'):

    plt.figure(figsize = fig_size)
    x_axis = dataframe[f"{x_values}"]
    y_axis = dataframe[f"{y_values}"]
    plt.scatter(x_axis, y_axis, color=f"{color}", edgecolor=f"{edgecolor}", marker=f"{marker}")
    plt.xlabel(f"{x_axis.name}")
    #plt.ylim(-1, 3.5)
    #plt.xlim(-100, 100000)
    #plt.ylim((min(dataframe[f"{y_values}"]) * (-1.5)), (max(dataframe[f"{y_values}"]) * 0.6))
    plt.ylabel(f"{y_axis.name}")
    plt.title(f"{x_axis.name} vs {y_axis.name}")
    plt.tight_layout()
    plt.show()
#   print(plt.ylim(min(dataframe[f"{y_values}"]), max(dataframe[f"{y_values}"])))
    


In [115]:
# Create linear regression function to be used with data frames
def linear_regression (dataframe, x_axis, y_axis):
    
    # Get x and y values
    x_values = dataframe[f'{x_axis}']
    y_values = dataframe[f'{y_axis}']
    
    # Calculate statistics
    slope, y_int, r_value, p_value, std_err = stats.linregress(x_values, y_values)

    # Make the equation of the line
    line_fit = slope * x_values + y_int
    
    # Build equation
    line_eq = "y = " + str(round(slope,6)) + "x + " + str(round(y_int,2))
    
    # Show the plot and equations
    plt.plot(x_values,line_fit)
    plt.annotate(line_eq,(60000,80000),fontsize=15,color="red")
    plt.show()
    print(f"The value of R^2 is {r_value**2}")

In [105]:
clean_2018_df.corr()

Unnamed: 0,price 01-04-2018,market_cap_2018(millions),reddit_subscribers 01-04-2018,twitter_followers 01-04-2018,alexa_rank 01-04-2018
price 01-04-2018,1.0,0.948117,0.863016,0.549476,-0.086068
market_cap_2018(millions),0.948117,1.0,0.951993,0.683847,-0.116877
reddit_subscribers 01-04-2018,0.863016,0.951993,1.0,0.788793,-0.160846
twitter_followers 01-04-2018,0.549476,0.683847,0.788793,1.0,-0.193261
alexa_rank 01-04-2018,-0.086068,-0.116877,-0.160846,-0.193261,1.0


In [106]:
sorted_2018_df = clean_2018_df.sort_values('market_cap_2018(millions)', ascending=False)

In [122]:
# Create new data frame for smaller cap coins
y2018_small_caps = sorted_2018_df.iloc[5:, :]
y2018_small_caps.head(30)

Unnamed: 0,id,price 01-04-2018,market_cap_2018(millions),reddit_subscribers 01-04-2018,twitter_followers 01-04-2018,alexa_rank 01-04-2018
26,eos,5.883528,4490.618497,42951.0,128624.0,22055.0
5,cardano,0.155532,4032.488129,60486.0,124345.0,13650.0
15,stellar,0.207794,3854.703383,76111.0,224029.0,12412.0
23,neo,50.351917,3272.874575,88358.0,299298.0,23186.0
25,iota,1.080227,3002.523973,108796.0,108410.0,25994.0
22,monero,180.1852,2862.251991,124683.0,283411.0,25712.0
52,dash,307.396965,2452.568223,20672.0,291818.0,35871.0
18,tron,0.034138,2244.484867,62267.0,278918.0,754627.0
50,nem,0.221839,1996.550352,16950.0,195953.0,34800.0
36,ethereum-classic,14.452266,1457.934051,18346.0,198009.0,199911.0


In [113]:
# Scatter plot < 4.5 market cap coins
scatter_plot(y2018_small_caps, 'reddit_subscribers 01-04-2018', 'market_cap_2018(millions)')
linear_regression(y2018_small_caps,'reddit_subscribers 01-04-2018', 'market_cap_2018(millions)')
#plt.savefig("images/2018_small_caps_reddit_vs_market_cap.png")

<IPython.core.display.Javascript object>

The value of R^2 is 0.43228949094892444


In [117]:
def prediction_machine (value):
    predicted_price = 0.023758 * value + 144.79
    return predicted_price

In [123]:
prediction_machine(40605.)

1109.48359

In [116]:
scatter_plot(clean_social_metrics_merged_df, 'reddit_subscribers 01-04-2018', 'market_cap_2018(millions)')
linear_regression(clean_social_metrics_merged_df,'reddit_subscribers 01-04-2018', 'market_cap_2018(millions)')
#plt.savefig("images/2018_reddit_vs_market_cap.png")

<IPython.core.display.Javascript object>

The value of R^2 is 0.9279083270354638


In [128]:
scatter_plot(clean_social_metrics_merged_df, "twitter_followers 01-04-2018", "market_cap_2018(millions)", fig_size = [15,10], color='b')

<IPython.core.display.Javascript object>

In [26]:
social_metrics_merged_df.columns

Index(['id', 'price 01-04-2018', 'market_cap_2018(millions)',
       'reddit_subscribers 01-04-2018', 'twitter_followers 01-04-2018',
       'alexa_rank 01-04-2018', 'price 01-04-2021',
       'market_cap_2021(millions)', 'reddit_subscribers 01-04-2021',
       'twitter_followers 01-04-2021', 'alexa_rank 01-04-2021'],
      dtype='object')

In [27]:
scatter_plot(clean_social_metrics_merged_df,'market_cap_2018(millions)','alexa_rank 01-04-2018', color='g')

<IPython.core.display.Javascript object>

In [28]:
clean_2018_df.columns

Index(['id', 'price 01-04-2018', 'market_cap_2018(millions)',
       'reddit_subscribers 01-04-2018', 'twitter_followers 01-04-2018',
       'alexa_rank 01-04-2018'],
      dtype='object')

In [29]:
scatter_plot(clean_2018_df, 'market_cap_2018(millions)', 'reddit_subscribers 01-04-2018')

<IPython.core.display.Javascript object>