### load required packages 

In [None]:
import pandas as pd
import numpy as np

import os
import glob
from functools import reduce

import statistics 
import matplotlib.pyplot as plt

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

### load and clean data

In [None]:
df = pd.read_csv('./nft_data.csv', low_memory=False, dtype='unicode')
df['transaction_time'] = pd.to_datetime(df['transaction_time'],errors='coerce').dt.strftime('%Y-%m-%d %H:%M:%S')
df = df.drop(columns=['Unnamed: 0', 'Unnamed: 0.1', 'Unnamed: 0.1.1', 'Unnamed: 0.1.1.1'])

# data cleaning
df = df.rename(columns = {'payment_tocken':'payment_token'})

# reorganize dataframe 
df = df[['id', 'name', 'asset_name', 'collection_slug_x', 'asset_contract_type', 
         'schema_name', 'event_type', 'description', 'traits', 'trait_type',
         'trait_value', 'num_sales', 'creator_adress', 'created_date_x',
         'transaction_time', 'total_price', 'payment_token', 'token_usd_price',
         'total_usd_price', 'to_account', 'seller_account', 'seller_address',
         'winner_account', 'winner_address', 'owner_address',
         'image_url', 'image_original_url']]

df = df.rename(columns = {'collection_slug_x':'collection_slug',
                          'created_date_x':'created_date'})

df['id'] = df['id'].astype(float).astype(int)
df['total_usd_price'] = df['total_usd_price'].astype(float)
df['num_sales'] = df['num_sales'].astype(int)

# replace NaN  
df.loc[df['asset_name']=='Art Blocks','collection_slug'] = 'art-blocks'
df.loc[df['asset_name']=='BoredApeYachtClub','collection_slug'] = 'boredapeyachtclub'
df.loc[df['asset_name']=='Rarible V1','collection_slug'] = 'rarible'
df.loc[df['asset_name']=='SuperRare','collection_slug'] = 'superrare'
df.loc[df['asset_name']=='Beeple Round 2','collection_slug'] = 'beeple-everydays'
df.loc[df['asset_name']=='Hashmasks','collection_slug'] = 'hashmasks'
df.loc[df['asset_name']=='Beeple Round 2 Open Edition','collection_slug'] = 'beeple-everydays'
df.loc[df['asset_name']=='Makersplace V2','collection_slug'] = 'makersplace'
df.loc[df['asset_name']=='Wrapped Cryptopunks','collection_slug'] = 'wrapped-cryptopunks'
df.loc[df['asset_name']=='CryptoPunks','collection_slug'] = 'cryptopunks'
df.loc[df['asset_name']=='MakersPlace V3','collection_slug'] = 'makersplace'
df.loc[df['asset_name']=='MakersPlace','collection_slug'] = 'makersplace'
df.loc[df['asset_name']=='Autoglyphs','collection_slug'] = 'autoglyphs'

### get data for each collection

In [None]:
# define function to get all events for each collection
def get_collection(df, slug):
    df_slug = df.loc[df['collection_slug'] == slug]
    return df_slug

beeple_df = get_collection(df, 'beeple-everydays')
artblocks_df = get_collection(df, 'art-blocks')
hashmasks_df = get_collection(df, 'hashmasks')
superrare_df = get_collection(df, 'superrare')
boredapeyachtclub_df = get_collection(df, 'boredapeyachtclub')
rarible_df = get_collection(df, 'rarible')
makersplace_df = get_collection(df, 'makersplace')
wrappedcryptopunks_df = get_collection(df, 'wrapped-cryptopunks')
cryptopunks_df = get_collection(df, 'cryptopunks')
autoglyphs_df = get_collection(df, 'autoglyphs')


### price box plot

In [None]:
# only keep successsful events
suc_df = df.loc[df['event_type'] == 'successful']
# drop NAN price
suc_df = suc_df.dropna(subset=['total_usd_price'])

# suc_df_gp = suc_df.groupby(by=['collection_slug', 'id']).sum()

def get_last_price(df, slug):
    collection_df = get_collection(df, slug)
    # sort dataframe by transaction_time
    collection_df = collection_df.sort_values('transaction_time', ascending=True)
    # keep the last price and remove the historical price 
    collection_df = collection_df.drop_duplicates(subset='id', keep="last")
    return collection_df

beeple_suc_df = get_last_price(suc_df, 'beeple-everydays')
artblocks_suc_df = get_last_price(suc_df, 'art-blocks')
hashmasks_suc_df = get_last_price(suc_df, 'hashmasks')
superrare_suc_df = get_last_price(suc_df, 'superrare')
boredapeyachtclub_suc_df = get_last_price(suc_df, 'boredapeyachtclub')
rarible_suc_df = get_last_price(suc_df, 'rarible')
makersplace_suc_df = get_last_price(suc_df, 'makersplace')
wrappedcryptopunks_suc_df = get_last_price(suc_df, 'wrapped-cryptopunks')
cryptopunks_suc_df = get_last_price(suc_df, 'cryptopunks')
autoglyphs_suc_df = get_last_price(suc_df, 'autoglyphs')

In [None]:
suc_dfs = [beeple_suc_df, 
           artblocks_suc_df,
           hashmasks_suc_df,
           superrare_suc_df,
           boredapeyachtclub_suc_df,
           rarible_suc_df,
           makersplace_suc_df,
           wrappedcryptopunks_suc_df,
           cryptopunks_suc_df,
           autoglyphs_suc_df]

pd.options.display.float_format = "{:.2f}".format

price_stat = []
for i in range(len(suc_dfs)):
    price_stat.append(suc_dfs[i]['total_usd_price'].describe())

price_stat = pd.concat(price_stat, axis=1)
price_stat.columns = df['collection_slug'].unique()

price_stat

In [None]:
# price box plot
fig = go.Figure()

fig['layout'].update(height=800, width=1000, 
                     title='',
                     showlegend=False,
                     font=dict(family='Times New Roman', size=20))

fig.add_trace(go.Box(y=np.log(beeple_suc_df['total_usd_price']), name='beeple-everydays',marker_color='blue'))
fig.add_trace(go.Box(y=np.log(artblocks_suc_df['total_usd_price']), name='art-blocks', marker_color='blue'))
fig.add_trace(go.Box(y=np.log(hashmasks_suc_df['total_usd_price']), name='hashmasks', marker_color='blue'))
fig.add_trace(go.Box(y=np.log(superrare_suc_df['total_usd_price']), name='superrare', marker_color='blue'))
fig.add_trace(go.Box(y=np.log(boredapeyachtclub_suc_df['total_usd_price']), name='boredapeyachtclub', marker_color='blue'))
fig.add_trace(go.Box(y=np.log(rarible_suc_df['total_usd_price']), name='rarible', marker_color='blue'))
fig.add_trace(go.Box(y=np.log(makersplace_suc_df['total_usd_price']), name='makersplace', marker_color='blue'))
fig.add_trace(go.Box(y=np.log(wrappedcryptopunks_suc_df['total_usd_price']), name='wrapped-cryptopunks', marker_color='blue'))
fig.add_trace(go.Box(y=np.log(cryptopunks_suc_df['total_usd_price']), name='cryptopunks', marker_color='blue'))
fig.add_trace(go.Box(y=np.log(autoglyphs_suc_df['total_usd_price']), name='autoglyphs', marker_color='blue'))

fig['layout']['xaxis'].update(title='Collection')
fig['layout']['yaxis'].update(title='Log price in USD')#, range=[-20, 25], dtick=10, autorange=False)

fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True, showgrid=False, tickangle=15)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True, showgrid=False)

fig.update_layout({'plot_bgcolor': 'rgba(0,0,0,0)',
                   'paper_bgcolor': 'rgba(0,0,0,0)'},
                  font_color='black',
                  bargap=0.01)

fig.show()
# fig.write_image('./box_prices.pdf')

In [None]:
# num_sales box plot
fig = go.Figure()

fig['layout'].update(height=800, width=1000, 
                     title='',
                     showlegend=False,
                     font=dict(family='Times New Roman', size=20))

fig.add_trace(go.Box(y=np.log(beeple_suc_df['num_sales']), name='beeple-everydays',marker_color='blue'))
fig.add_trace(go.Box(y=np.log(artblocks_suc_df['num_sales']), name='art-blocks', marker_color='blue'))
fig.add_trace(go.Box(y=np.log(hashmasks_suc_df['num_sales']), name='hashmasks', marker_color='blue'))
fig.add_trace(go.Box(y=np.log(superrare_suc_df['num_sales']), name='superrare', marker_color='blue'))
fig.add_trace(go.Box(y=np.log(boredapeyachtclub_suc_df['num_sales']), name='boredapeyachtclub', marker_color='blue'))
fig.add_trace(go.Box(y=np.log(rarible_suc_df['num_sales']), name='rarible', marker_color='blue'))
fig.add_trace(go.Box(y=np.log(makersplace_suc_df['num_sales']), name='makersplace', marker_color='blue'))
fig.add_trace(go.Box(y=np.log(wrappedcryptopunks_suc_df['num_sales']), name='wrapped-cryptopunks', marker_color='blue'))
fig.add_trace(go.Box(y=np.log(cryptopunks_suc_df['num_sales']), name='cryptopunks', marker_color='blue'))
fig.add_trace(go.Box(y=np.log(autoglyphs_suc_df['num_sales']), name='autoglyphs', marker_color='blue'))

fig['layout']['xaxis'].update(title='Collection')
fig['layout']['yaxis'].update(title='Number of sales (log)')#, range=[-20, 25], dtick=10, autorange=False)

fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True, showgrid=False, tickangle=15)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True, showgrid=False)

fig.update_layout({'plot_bgcolor': 'rgba(0,0,0,0)',
                   'paper_bgcolor': 'rgba(0,0,0,0)'},
                  font_color='black',
                  bargap=0.01)

fig.show()
# fig.write_image('./box_num_sales.pdf')



### avg. price vs. crix

In [None]:
crix_df = pd.read_csv('./crix.csv')
crix_df['date'] = pd.to_datetime(crix_df['date'])
crix_df = crix_df[(crix_df['date'] > pd.to_datetime('2019-03-01')) & (crix_df['date'] < pd.to_datetime('2022-03-09'))]
crix_df = crix_df.drop_duplicates(subset=['date'], keep='first')

adjusted_df = df
adjusted_df['transaction_time'] = pd.to_datetime(adjusted_df['transaction_time']).dt.date

adjusted_df = adjusted_df.dropna(subset=['total_usd_price'])

adjusted_df = adjusted_df.loc[adjusted_df['event_type'] == 'successful']

vol_df = adjusted_df.groupby(['transaction_time'], as_index=False)['total_usd_price'].sum()

vol_df = vol_df[(vol_df['transaction_time'] > pd.to_datetime('2019-03-01'))]

fig = make_subplots(specs=[[{"secondary_y": True}]])

fig['layout'].update(height=800, width=1200, 
                     title='',
                     showlegend=False)

fig.add_trace(go.Scatter(
    y=np.log(vol_df['total_usd_price']), 
    x=vol_df['transaction_time'],
    line=dict(color='blue', dash='solid')  
    ), secondary_y=False)

fig.add_trace(go.Scatter(
    y=np.log(crix_df['price']), 
    x=crix_df['date'],
    line=dict(color='#FF7F00', dash='dot')    
    ), secondary_y=True)

fig['layout']['xaxis'].update(title='Date')
fig['layout']['yaxis1'].update(title='Total transaction volume in USD (log)')
fig['layout']['yaxis2'].update(title='CRIX (log)')


fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True, tickformat="%b\n%Y", showgrid=False, tickvals = crix_df['date'][0::40])

fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True, showgrid=False)


fig.update_layout({'plot_bgcolor': 'rgba(0,0,0,0)',
                   'paper_bgcolor': 'rgba(0,0,0,0)'},
                  font_color='black')

fig.update_layout( font=dict(family='Times New Roman', size=20))

fig.show()
# fig.write_image('./vol_vs_crix.pdf')

