# This app focuses on extracting cryptocurrencies data from coingecko's API

## APIs used:

    - Get/coins/markets to get a list of all supported coins, ath, price, volume and other info
    - Get/coins/{id}/history to get community and developer data


In [55]:
import requests
import pandas as pd
import time

In [2]:
# Create an empty list to hold the coins information
crypto_list = []

In [3]:
# Find an API that has information on coin and make several requests

# Goal is 2,500 entries, each page has 300. Make a loop to get 10 pages
for i in range(1,10,1):
    page_number = i
    base_url = f"https://api.coingecko.com/api/v3/coins/markets?vs_currency=usd&order=market_cap_desc&per_page=300&page={page_number}&sparkline=false"
    # Test out the URL
    #print(base_url)

    # Request json file from the url
    response_json = requests.get(base_url).json()

    # append response to empty list
    crypto_list.append(response_json)

In [4]:
# Make each page entry into a datafrme and concat dataframes together
# Set up the a value for the previous dataframe
previous_df = pd.DataFrame(crypto_list[0])
# Loop through the entries in the list
for i in range(1, len(crypto_list)):
    next_df = pd.DataFrame(crypto_list[i])
    complete_df = pd.concat([previous_df, next_df])
    previous_df = complete_df



In [5]:
# reset the index
clean_complete_df = complete_df.reset_index(drop=True)
#clean_complete_df

In [6]:
# See the name of the columns
#clean_complete_df.columns

In [59]:
# Only get specific columns
#crypto_df = clean_complete_df[['id', 'symbol', 'current_price','market_cap', 'market_cap_rank', 'ath', 'ath_change_percentage', 'ath_date','atl', 'atl_change_percentage', 'atl_date']]
#crypto_df

In [8]:
# Only run this cell if the data hasn't been renamed and cleaned
if crypto_df.columns[3] == "market_cap":
    # Clean data by dropping columns, ONLY RUN IT ONCE, since columns will change name
    #crypto_df = crypto_df[['id', 'symbol', 'current_price','market_cap', 'market_cap_rank', 'ath', 'ath_change_percentage', 'ath_date','atl', 'atl_change_percentage', 'atl_date']]
    crypto_df['market_cap'] = crypto_df['market_cap']/1000000000

    # Rename columns to see easier
    crypto_df.rename(columns={'market_cap':'market_cap_billion(2021)',
                              'current_price' : 'price_04-22-21'

                             }, inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  crypto_df['market_cap'] = crypto_df['market_cap']/1000000000
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [9]:
# Verify data drame
#crypto_df.head()

In [10]:
# Save data as csv
# crypto_df.to_csv("data/cryptocurrencies_data_2021.csv")

In [69]:
# Get each coin's price, and socialdata on 01-04-2018 
# Make empty list to hold the value of coins

coin_data_2018 = []

target_date = "01-04-2018"

In [12]:
# Set up timer to slow down requests speed
def timer_milliseconds(milliseconds = 3):
    for i in range(0, milliseconds):
        print(f"Counting... {i +1}")
        time.sleep(1)
        

In [66]:
response_json =requests.get("https://api.coingecko.com/api/v3/coins/bitcoin/history?date=04-01-2018localization=False").json()
response_json['market_data']['market_cap']['usd']

268124738814.99704

In [67]:
# Make function to perform request based on specific data, use dataframe above's id to search that specific coin
def obtain_metrics_by_date (target_df = crypto_df, target_list = coin_data_2018, date='01-04-2018'):
    # Set up error counter to break out if there are too many errors
    key_error_counter = 0
    timeout_error_counter = 0
    #iterate through dataframe getting index and row
    for index, row in target_df.iterrows():
        
        #get coin name through dictionary syntax
        target_coin = row['id']
        
        # print the current row's coin
        print(f"Processing {target_coin}")
        
        # If there are less than the number of errors allowed, try doing a response
        if (key_error_counter < 150) and (timeout_error_counter < 2):
            try:
                # Build url
                price_url = f"https://api.coingecko.com/api/v3/coins/{target_coin}/history?date={date}localization=False"
                # Print message to keep user informed
                print(f"Requesting from URL: {price_url}")
                print(f"At index: {index}")
                
                # save json respons
                response_json = requests.get(price_url).json()
                
                # traverse json getting specific data
                coin_id = response_json['id']
                coin_price = response_json['market_data']['current_price']['usd']
                market_cap = response_json['market_data']['market_cap']['usd']
                coin_reddit_subscribers = response_json['community_data']['reddit_subscribers']
                coin_twitter_followers = response_json['community_data']['twitter_followers']
                coin_alexa_rank = response_json['public_interest_stats']['alexa_rank']
                
                # Add data to empty list in dictionary style
                target_list.append({
                    "id" : coin_id,
                    f"price {date}" : coin_price,
                    f"market_cap" : market_cap,
                    f"reddit_subscribers {date}" : coin_reddit_subscribers,
                    f"twitter_followers {date}" : coin_twitter_followers,
                    f"alexa_rank {date}" : coin_alexa_rank
                       })
                
            # The first error is a no market_data for that coin
            except :
                try:
                    # The first error will be a key error, since young coins dont have 'market_data'
                    # add one to the key_error counter
                    key_error_counter+=1
                    
                    #print message
                    print("Key error. Coin too young to find all data")   
                    print(f"Retrying {target_coin} to make sure we are not timedout")
                    print(f"Retrying URL: {price_url}")
                    
                    # make a request again, in case we got timedout
                    response_json = requests.get(price_url).json()
                    #still add the coin to the list
                    target_list.append({'id': coin_id})
                 
                # the next error would be a timedout error
                except :
                    print('Ruh Roh! Looks like you have been timed out')
                    print(f"The last coin to be pulled was {target_coin} at index {index}")
                    
                    # If we got timed out, we want to exit the function
                    timeout_error_counter += 1
                    timer_milliseconds(30)
                    #break
        # print message for too many requests errors
        else:
            print(f"You have reached {key_error_counter} key errors and {timeout_error_counter} timeouts.")
            print(f"Last data pulled at position index:{index}, id: {target_coin}")
            break
    print("Done with Data Retieval")
    print(f"We found a total of {len(target_list)} entries")
  



In [14]:
obtain_metrics_by_date()

Processing bitcoin
Requesting from URL: https://api.coingecko.com/api/v3/coins/bitcoin/history?date=01-04-2018localization=False
At index: 0
Processing ethereum
Requesting from URL: https://api.coingecko.com/api/v3/coins/ethereum/history?date=01-04-2018localization=False
At index: 1
Processing binancecoin
Requesting from URL: https://api.coingecko.com/api/v3/coins/binancecoin/history?date=01-04-2018localization=False
At index: 2
Processing ripple
Requesting from URL: https://api.coingecko.com/api/v3/coins/ripple/history?date=01-04-2018localization=False
At index: 3
Processing tether
Requesting from URL: https://api.coingecko.com/api/v3/coins/tether/history?date=01-04-2018localization=False
At index: 4
Processing cardano
Requesting from URL: https://api.coingecko.com/api/v3/coins/cardano/history?date=01-04-2018localization=False
At index: 5
Processing dogecoin
Requesting from URL: https://api.coingecko.com/api/v3/coins/dogecoin/history?date=01-04-2018localization=False
At index: 6
Proce

Key error. Coin too young to find all data
Retrying okb to make sure we are not timedout
Retrying URL: https://api.coingecko.com/api/v3/coins/okb/history?date=01-04-2018localization=False
Processing compound-usd-coin
Requesting from URL: https://api.coingecko.com/api/v3/coins/compound-usd-coin/history?date=01-04-2018localization=False
At index: 35
Key error. Coin too young to find all data
Retrying compound-usd-coin to make sure we are not timedout
Retrying URL: https://api.coingecko.com/api/v3/coins/compound-usd-coin/history?date=01-04-2018localization=False
Processing ethereum-classic
Requesting from URL: https://api.coingecko.com/api/v3/coins/ethereum-classic/history?date=01-04-2018localization=False
At index: 36
Processing compound-ether
Requesting from URL: https://api.coingecko.com/api/v3/coins/compound-ether/history?date=01-04-2018localization=False
At index: 37
Key error. Coin too young to find all data
Retrying compound-ether to make sure we are not timedout
Retrying URL: http

In [15]:
len(coin_data_2018)

45

In [25]:
# If we got timed out, make a DF starting from the last token proccesed to run the function again
# last_coin_proccesed = len(coin_data_2018)
# left_over_df = crypto_df.iloc[last_coin_proccesed:]
# left_over_df



In [32]:
# Keep obtaining data until our df is at least 500 short from the original list
def persistent_scrapping(list_to_append_entries = coin_data_2018, number_entries_desired = len(crypto_df) -500, date = '01-04-2018'):
    
    # Set up a leftover dataframe based on the original dataframe we were trying to use for scrapping
    last_coin_proccesed = len(list_to_append_entries)
    left_over_df = crypto_df.iloc[last_coin_proccesed:]
    
    # Keep obtaining data until our desired threshold 
    while len(list_to_append_entries) < number_entries_desired:
        
        # Call function above
        obtain_metrics_by_date(left_over_df, list_to_append_entries, date)
        # Get the index for the last coin pricesses
        last_coin_proccesed = len(list_to_append_entries)
        # Update the leftovers data frame starting from the last coin_processed
        left_over_df = crypto_df.iloc[last_coin_proccesed:]



In [86]:
persistent_scrapping(coin_data_2018, 300)

Processing titanswap
Requesting from URL: https://api.coingecko.com/api/v3/coins/titanswap/history?date=01-04-2018localization=False
At index: 219
Key error. Coin too young to find all data
Retrying titanswap to make sure we are not timedout
Retrying URL: https://api.coingecko.com/api/v3/coins/titanswap/history?date=01-04-2018localization=False
Processing hathor
Requesting from URL: https://api.coingecko.com/api/v3/coins/hathor/history?date=01-04-2018localization=False
At index: 220
Key error. Coin too young to find all data
Retrying hathor to make sure we are not timedout
Retrying URL: https://api.coingecko.com/api/v3/coins/hathor/history?date=01-04-2018localization=False
Processing badger-dao
Requesting from URL: https://api.coingecko.com/api/v3/coins/badger-dao/history?date=01-04-2018localization=False
At index: 221
Key error. Coin too young to find all data
Retrying badger-dao to make sure we are not timedout
Retrying URL: https://api.coingecko.com/api/v3/coins/badger-dao/history?d

Key error. Coin too young to find all data
Retrying keep-network to make sure we are not timedout
Retrying URL: https://api.coingecko.com/api/v3/coins/keep-network/history?date=01-04-2018localization=False
Processing compound-0x
Requesting from URL: https://api.coingecko.com/api/v3/coins/compound-0x/history?date=01-04-2018localization=False
At index: 247
Key error. Coin too young to find all data
Retrying compound-0x to make sure we are not timedout
Retrying URL: https://api.coingecko.com/api/v3/coins/compound-0x/history?date=01-04-2018localization=False
Processing olympus
Requesting from URL: https://api.coingecko.com/api/v3/coins/olympus/history?date=01-04-2018localization=False
At index: 248
Key error. Coin too young to find all data
Retrying olympus to make sure we are not timedout
Retrying URL: https://api.coingecko.com/api/v3/coins/olympus/history?date=01-04-2018localization=False
Processing monacoin
Requesting from URL: https://api.coingecko.com/api/v3/coins/monacoin/history?dat

Key error. Coin too young to find all data
Retrying fx-coin to make sure we are not timedout
Retrying URL: https://api.coingecko.com/api/v3/coins/fx-coin/history?date=01-04-2018localization=False
Ruh Roh! Looks like you have been timed out
The last coin to be pulled was fx-coin at index 275
Counting... 1
Counting... 2
Counting... 3
Counting... 4
Counting... 5
Counting... 6
Counting... 7
Counting... 8
Counting... 9
Counting... 10
Counting... 11
Counting... 12
Counting... 13
Counting... 14
Counting... 15
Counting... 16
Counting... 17
Counting... 18
Counting... 19
Counting... 20
Counting... 21
Counting... 22
Counting... 23
Counting... 24
Counting... 25
Counting... 26
Counting... 27
Counting... 28
Counting... 29
Counting... 30
Processing ethernity-chain
Requesting from URL: https://api.coingecko.com/api/v3/coins/ethernity-chain/history?date=01-04-2018localization=False
At index: 276
Key error. Coin too young to find all data
Retrying ethernity-chain to make sure we are not timedout
Retryin

Key error. Coin too young to find all data
Retrying render-token to make sure we are not timedout
Retrying URL: https://api.coingecko.com/api/v3/coins/render-token/history?date=01-04-2018localization=False
Processing sharering
Requesting from URL: https://api.coingecko.com/api/v3/coins/sharering/history?date=01-04-2018localization=False
At index: 295
Key error. Coin too young to find all data
Retrying sharering to make sure we are not timedout
Retrying URL: https://api.coingecko.com/api/v3/coins/sharering/history?date=01-04-2018localization=False
Processing compound-basic-attention-token
Requesting from URL: https://api.coingecko.com/api/v3/coins/compound-basic-attention-token/history?date=01-04-2018localization=False
At index: 296
Key error. Coin too young to find all data
Retrying compound-basic-attention-token to make sure we are not timedout
Retrying URL: https://api.coingecko.com/api/v3/coins/compound-basic-attention-token/history?date=01-04-2018localization=False
Processing deriv

Key error. Coin too young to find all data
Retrying auction to make sure we are not timedout
Retrying URL: https://api.coingecko.com/api/v3/coins/auction/history?date=01-04-2018localization=False
Processing divi
Requesting from URL: https://api.coingecko.com/api/v3/coins/divi/history?date=01-04-2018localization=False
At index: 321
Key error. Coin too young to find all data
Retrying divi to make sure we are not timedout
Retrying URL: https://api.coingecko.com/api/v3/coins/divi/history?date=01-04-2018localization=False
Processing zcoin
Requesting from URL: https://api.coingecko.com/api/v3/coins/zcoin/history?date=01-04-2018localization=False
At index: 322
Processing zenon
Requesting from URL: https://api.coingecko.com/api/v3/coins/zenon/history?date=01-04-2018localization=False
At index: 323
Key error. Coin too young to find all data
Retrying zenon to make sure we are not timedout
Retrying URL: https://api.coingecko.com/api/v3/coins/zenon/history?date=01-04-2018localization=False
Process

In [88]:
pd.DataFrame(coin_data_2018)

Unnamed: 0,id,price 01-04-2018,market_cap,reddit_subscribers 01-04-2018,twitter_followers 01-04-2018,alexa_rank 01-04-2018
0,bitcoin,6975.275314,1.182341e+11,785619.0,819312.0,5574.0
1,ethereum,395.796381,3.900087e+10,348116.0,382159.0,8204.0
2,binancecoin,11.115511,1.100591e+09,40605.0,176.0,230.0
3,ripple,0.511990,2.001600e+10,184063.0,843082.0,5224.0
4,tether,1.000690,2.288719e+09,,23302.0,34929.0
...,...,...,...,...,...,...
324,marlin,,,,,
325,nominex,,,,,
326,mx-token,,,,,
327,sparkpoint,,,,,


In [93]:
coin_data_2018

[{'id': 'bitcoin',
  'price 01-04-2018': 6975.27531402076,
  'market_cap': 118234055446.5432,
  'reddit_subscribers 01-04-2018': 785619,
  'twitter_followers 01-04-2018': 819312,
  'alexa_rank 01-04-2018': 5574},
 {'id': 'ethereum',
  'price 01-04-2018': 395.79638095782116,
  'market_cap': 39000868214.27854,
  'reddit_subscribers 01-04-2018': 348116,
  'twitter_followers 01-04-2018': 382159,
  'alexa_rank 01-04-2018': 8204},
 {'id': 'binancecoin',
  'price 01-04-2018': 11.115510878254739,
  'market_cap': 1100591194.0995147,
  'reddit_subscribers 01-04-2018': 40605,
  'twitter_followers 01-04-2018': 176,
  'alexa_rank 01-04-2018': 230},
 {'id': 'ripple',
  'price 01-04-2018': 0.51199,
  'market_cap': 20016003613.76977,
  'reddit_subscribers 01-04-2018': 184063,
  'twitter_followers 01-04-2018': 843082,
  'alexa_rank 01-04-2018': 5224},
 {'id': 'tether',
  'price 01-04-2018': 1.00069,
  'market_cap': 2288718941.16166,
  'reddit_subscribers 01-04-2018': None,
  'twitter_followers 01-04-20

In [95]:
# Data cleanup
coins_2018_df = pd.DataFrame(coin_data_2018)
coins_2018_df['market_cap'] = coins_2018_df['market_cap'] / 1000000
coins_2018_df.rename(columns={
    'market_cap' : 'market_cap_2018(millions)'
},inplace=True)
coins_2018_df


Unnamed: 0,id,price 01-04-2018,market_cap_2018(millions),reddit_subscribers 01-04-2018,twitter_followers 01-04-2018,alexa_rank 01-04-2018
0,bitcoin,6975.275314,118234.055447,785619.0,819312.0,5574.0
1,ethereum,395.796381,39000.868214,348116.0,382159.0,8204.0
2,binancecoin,11.115511,1100.591194,40605.0,176.0,230.0
3,ripple,0.511990,20016.003614,184063.0,843082.0,5224.0
4,tether,1.000690,2288.718941,,23302.0,34929.0
...,...,...,...,...,...,...
324,marlin,,,,,
325,nominex,,,,,
326,mx-token,,,,,
327,sparkpoint,,,,,


In [96]:
coins_2018_df.nunique()

id                               328
price 01-04-2018                 104
market_cap_2018(millions)        100
reddit_subscribers 01-04-2018     92
twitter_followers 01-04-2018     103
alexa_rank 01-04-2018            100
dtype: int64

In [110]:
coins_2018_df.to_csv("data/2018_social.csv")



In [97]:
coins_2021_df = []

In [98]:
persistent_scrapping(coins_2021_df, 200, '01-04-2021')

Processing bitcoin
Requesting from URL: https://api.coingecko.com/api/v3/coins/bitcoin/history?date=01-04-2021localization=False
At index: 0
Processing ethereum
Requesting from URL: https://api.coingecko.com/api/v3/coins/ethereum/history?date=01-04-2021localization=False
At index: 1
Processing binancecoin
Requesting from URL: https://api.coingecko.com/api/v3/coins/binancecoin/history?date=01-04-2021localization=False
At index: 2
Processing ripple
Requesting from URL: https://api.coingecko.com/api/v3/coins/ripple/history?date=01-04-2021localization=False
At index: 3
Processing tether
Requesting from URL: https://api.coingecko.com/api/v3/coins/tether/history?date=01-04-2021localization=False
At index: 4
Processing cardano
Requesting from URL: https://api.coingecko.com/api/v3/coins/cardano/history?date=01-04-2021localization=False
At index: 5
Processing dogecoin
Requesting from URL: https://api.coingecko.com/api/v3/coins/dogecoin/history?date=01-04-2021localization=False
At index: 6
Proce

Processing hedera-hashgraph
Requesting from URL: https://api.coingecko.com/api/v3/coins/hedera-hashgraph/history?date=01-04-2021localization=False
At index: 57
Processing zilliqa
Requesting from URL: https://api.coingecko.com/api/v3/coins/zilliqa/history?date=01-04-2021localization=False
At index: 58
Processing celsius-degree-token
Requesting from URL: https://api.coingecko.com/api/v3/coins/celsius-degree-token/history?date=01-04-2021localization=False
At index: 59
Processing enjincoin
Requesting from URL: https://api.coingecko.com/api/v3/coins/enjincoin/history?date=01-04-2021localization=False
At index: 60
Processing blockstack
Requesting from URL: https://api.coingecko.com/api/v3/coins/blockstack/history?date=01-04-2021localization=False
At index: 61
Processing leo-token
Requesting from URL: https://api.coingecko.com/api/v3/coins/leo-token/history?date=01-04-2021localization=False
At index: 62
Processing amp-token
Requesting from URL: https://api.coingecko.com/api/v3/coins/amp-token

Processing fantom
Requesting from URL: https://api.coingecko.com/api/v3/coins/fantom/history?date=01-04-2021localization=False
At index: 93
Processing xdce-crowd-sale
Requesting from URL: https://api.coingecko.com/api/v3/coins/xdce-crowd-sale/history?date=01-04-2021localization=False
At index: 94
Processing reserve-rights-token
Requesting from URL: https://api.coingecko.com/api/v3/coins/reserve-rights-token/history?date=01-04-2021localization=False
At index: 95
Processing ecomi
Requesting from URL: https://api.coingecko.com/api/v3/coins/ecomi/history?date=01-04-2021localization=False
At index: 96
Processing iostoken
Requesting from URL: https://api.coingecko.com/api/v3/coins/iostoken/history?date=01-04-2021localization=False
At index: 97
Processing dent
Requesting from URL: https://api.coingecko.com/api/v3/coins/dent/history?date=01-04-2021localization=False
At index: 98
Processing paxos-standard
Requesting from URL: https://api.coingecko.com/api/v3/coins/paxos-standard/history?date=01

Counting... 27
Counting... 28
Counting... 29
Counting... 30
Processing bounce-token
You have reached 2 key errors and 2 timeouts.
Last data pulled at position index:131, id: bounce-token
Done with Data Retieval
We found a total of 129 entries
Processing mirror-protocol
Requesting from URL: https://api.coingecko.com/api/v3/coins/mirror-protocol/history?date=01-04-2021localization=False
At index: 129
Processing balancer
Requesting from URL: https://api.coingecko.com/api/v3/coins/balancer/history?date=01-04-2021localization=False
At index: 130
Processing bounce-token
Requesting from URL: https://api.coingecko.com/api/v3/coins/bounce-token/history?date=01-04-2021localization=False
At index: 131
Processing gatechain-token
Requesting from URL: https://api.coingecko.com/api/v3/coins/gatechain-token/history?date=01-04-2021localization=False
At index: 132
Processing celo
Requesting from URL: https://api.coingecko.com/api/v3/coins/celo/history?date=01-04-2021localization=False
At index: 133
Proc

Processing orion-protocol
Requesting from URL: https://api.coingecko.com/api/v3/coins/orion-protocol/history?date=01-04-2021localization=False
At index: 180
Processing true-usd
Requesting from URL: https://api.coingecko.com/api/v3/coins/true-usd/history?date=01-04-2021localization=False
At index: 181
Processing swipe
Requesting from URL: https://api.coingecko.com/api/v3/coins/swipe/history?date=01-04-2021localization=False
At index: 182
Processing bakerytoken
Requesting from URL: https://api.coingecko.com/api/v3/coins/bakerytoken/history?date=01-04-2021localization=False
At index: 183
Processing ark
Requesting from URL: https://api.coingecko.com/api/v3/coins/ark/history?date=01-04-2021localization=False
At index: 184
Processing celer-network
Requesting from URL: https://api.coingecko.com/api/v3/coins/celer-network/history?date=01-04-2021localization=False
At index: 185
Processing rocket-pool
Requesting from URL: https://api.coingecko.com/api/v3/coins/rocket-pool/history?date=01-04-2021

In [99]:
coins_2021_df = pd.DataFrame(coins_2021_df)
coins_2021_df['market_cap'] = coins_2021_df['market_cap'] / 1000000
coins_2021_df.rename(columns={
    'market_cap' : 'market_cap_2021(millions)'
},inplace=True)
coins_2021_df


Unnamed: 0,id,price 01-04-2021,market_cap_2021(millions),reddit_subscribers 01-04-2021,twitter_followers 01-04-2021,alexa_rank 01-04-2021
0,bitcoin,58817.373888,1.098127e+06,2648989.0,83714.0,5018.0
1,ethereum,1915.832536,2.212107e+05,753614.0,852223.0,6152.0
2,binancecoin,302.745193,4.684157e+04,215806.0,,154.0
3,ripple,0.573818,2.634585e+04,278986.0,1310270.0,11412.0
4,tether,1.000288,4.081691e+04,,80505.0,38477.0
...,...,...,...,...,...,...
200,anchor-protocol,4.648715,2.198307e+02,,,53718.0
201,klever,0.087603,2.940475e+02,,158334.0,59129.0
202,velas,0.161867,2.221146e+02,93.0,18798.0,168994.0
203,trustswap,4.428006,2.875174e+02,,37260.0,26668.0


In [109]:
coins_2021_df.to_csv("data/2021_social.csv")

In [100]:
coins_2018_stats = pd.DataFrame(coins_2018_df.describe())
coins_2018_stats

Unnamed: 0,price 01-04-2018,market_cap_2018(millions),reddit_subscribers 01-04-2018,twitter_followers 01-04-2018,alexa_rank 01-04-2018
count,104.0,104.0,92.0,103.0,101.0
mean,95.100871,2399.536375,33098.945652,99108.737864,125221.1
std,688.380391,12305.968917,93679.675577,136612.392029,198965.8
min,0.000867,0.0,7.0,160.0,230.0
25%,0.115632,68.516749,2859.25,20587.5,36137.0
50%,0.75563,189.210337,6814.5,56561.0,81892.0
75%,6.308137,601.12102,20366.0,110823.5,122366.0
max,6975.275314,118234.055447,785619.0,843082.0,1476215.0


In [101]:
coins_2021_stats = pd.DataFrame(coins_2021_df.describe())
coins_2021_stats

Unnamed: 0,price 01-04-2021,market_cap_2021(millions),reddit_subscribers 01-04-2021,twitter_followers 01-04-2021,alexa_rank 01-04-2021
count,201.0,201.0,130.0,173.0,197.0
mean,1437.322981,9247.779,67109.14,117714.6,150032.9
std,8617.567475,78958.44,267478.7,161527.1,246621.6
min,0.000294,0.0,93.0,40.0,154.0
25%,0.407197,401.308,3407.25,28942.0,26281.0
50%,2.713329,814.2892,8854.5,69213.0,63219.0
75%,17.64601,2301.335,28119.75,128391.0,142460.0
max,59657.00314,1098127.0,2648989.0,1310270.0,1640689.0


In [102]:
# merge both data frames on the index
merged_stats = pd.merge(coins_2018_stats, coins_2021_stats, left_index=True, right_index=True)
merged_stats

Unnamed: 0,price 01-04-2018,market_cap_2018(millions),reddit_subscribers 01-04-2018,twitter_followers 01-04-2018,alexa_rank 01-04-2018,price 01-04-2021,market_cap_2021(millions),reddit_subscribers 01-04-2021,twitter_followers 01-04-2021,alexa_rank 01-04-2021
count,104.0,104.0,92.0,103.0,101.0,201.0,201.0,130.0,173.0,197.0
mean,95.100871,2399.536375,33098.945652,99108.737864,125221.1,1437.322981,9247.779,67109.14,117714.6,150032.9
std,688.380391,12305.968917,93679.675577,136612.392029,198965.8,8617.567475,78958.44,267478.7,161527.1,246621.6
min,0.000867,0.0,7.0,160.0,230.0,0.000294,0.0,93.0,40.0,154.0
25%,0.115632,68.516749,2859.25,20587.5,36137.0,0.407197,401.308,3407.25,28942.0,26281.0
50%,0.75563,189.210337,6814.5,56561.0,81892.0,2.713329,814.2892,8854.5,69213.0,63219.0
75%,6.308137,601.12102,20366.0,110823.5,122366.0,17.64601,2301.335,28119.75,128391.0,142460.0
max,6975.275314,118234.055447,785619.0,843082.0,1476215.0,59657.00314,1098127.0,2648989.0,1310270.0,1640689.0


In [105]:
# Add columns to calculate differences in yeard
merged_stats['price_difference'] = merged_stats['price 01-04-2021'] - merged_stats['price 01-04-2018']
merged_stats['market_cap_difference'] = merged_stats['market_cap_2021(millions)'] - merged_stats['market_cap_2018(millions)']
merged_stats['reddit_difference'] = merged_stats['reddit_subscribers 01-04-2021'] - merged_stats['reddit_subscribers 01-04-2018']
merged_stats['twitter_difference'] = merged_stats['twitter_followers 01-04-2021'] - merged_stats['twitter_followers 01-04-2018']
merged_stats['alexa_difference'] = merged_stats['alexa_rank 01-04-2021'] - merged_stats['alexa_rank 01-04-2018']

merged_stats

Unnamed: 0,price 01-04-2018,market_cap_2018(millions),reddit_subscribers 01-04-2018,twitter_followers 01-04-2018,alexa_rank 01-04-2018,price 01-04-2021,market_cap_2021(millions),reddit_subscribers 01-04-2021,twitter_followers 01-04-2021,alexa_rank 01-04-2021,price_difference,market_cap_difference,reddit_difference,twitter_difference,alexa_difference
count,104.0,104.0,92.0,103.0,101.0,201.0,201.0,130.0,173.0,197.0,97.0,97.0,38.0,70.0,96.0
mean,95.100871,2399.536375,33098.945652,99108.737864,125221.1,1437.322981,9247.779,67109.14,117714.6,150032.9,1342.22211,6848.242171,34010.19,18605.869072,24811.714429
std,688.380391,12305.968917,93679.675577,136612.392029,198965.8,8617.567475,78958.44,267478.7,161527.1,246621.6,7929.187084,66652.472506,173799.0,24914.747731,47655.766999
min,0.000867,0.0,7.0,160.0,230.0,0.000294,0.0,93.0,40.0,154.0,-0.000572,0.0,86.0,-120.0,-76.0
25%,0.115632,68.516749,2859.25,20587.5,36137.0,0.407197,401.308,3407.25,28942.0,26281.0,0.291566,332.791249,548.0,8354.5,-9856.0
50%,0.75563,189.210337,6814.5,56561.0,81892.0,2.713329,814.2892,8854.5,69213.0,63219.0,1.957699,625.078876,2040.0,12652.0,-18673.0
75%,6.308137,601.12102,20366.0,110823.5,122366.0,17.64601,2301.335,28119.75,128391.0,142460.0,11.337873,1700.214415,7753.75,17567.5,20094.0
max,6975.275314,118234.055447,785619.0,843082.0,1476215.0,59657.00314,1098127.0,2648989.0,1310270.0,1640689.0,52681.727826,979893.2555,1863370.0,467188.0,164474.0


In [106]:
difference_stats = merged_stats[['price_difference', 'market_cap_difference','reddit_difference','twitter_difference','alexa_difference']]
difference_stats

Unnamed: 0,price_difference,market_cap_difference,reddit_difference,twitter_difference,alexa_difference
count,97.0,97.0,38.0,70.0,96.0
mean,1342.22211,6848.242171,34010.19,18605.869072,24811.714429
std,7929.187084,66652.472506,173799.0,24914.747731,47655.766999
min,-0.000572,0.0,86.0,-120.0,-76.0
25%,0.291566,332.791249,548.0,8354.5,-9856.0
50%,1.957699,625.078876,2040.0,12652.0,-18673.0
75%,11.337873,1700.214415,7753.75,17567.5,20094.0
max,52681.727826,979893.2555,1863370.0,467188.0,164474.0
