# Rational Bubbles in NFTs

## Packages and Data

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
from statsmodels.tsa.stattools import adfuller, kpss

In [3]:
bcoin = pd.read_csv('bcoin.csv')
cake = pd.read_csv('cake.csv')
brl = pd.read_csv('brl.csv')
eur = pd.read_csv('eur.csv')
jpy = pd.read_csv('jpy.csv')
cny = pd.read_csv('cny.csv')

## Oversampling

The Brl, Eur, Jpy and Cny data will have a smaller number of observations since they are not quoted on weekends or specific holidays, so I'm going to carry out the oversampling process, adjusting the smaller bases to contain NA on non-closing dates, and then carry out a missing imputation with a moving average. 
The idea of oversampling comes from the fact that the matrices need to be the same size, which is not the case, and undersampling can lead to the loss of important data for the test. The literature supports oversampling because it is easy to impute missing in these cases without biasing the data

In [4]:
def overfit(df, data_col='Date', valor_col='Close', formato='%d/%m/%Y'):

    df[data_col] = pd.to_datetime(df[data_col], format=formato)
    
    min_date = df[data_col].min()
    max_date = df[data_col].max()
    all_dates = pd.date_range(start=min_date, end=max_date)

    df_c = pd.DataFrame({data_col: all_dates})

    df = df_c.merge(df, on=data_col, how='left')

    df = df.sort_values(data_col).reset_index(drop=True)
    
    return df

brl = overfit(brl)
cny = overfit(cny)
jpy = overfit(jpy)
eur = overfit(eur)

for df in [brl, cny, jpy, eur]:
    df['Date_str'] = df['Date'].dt.strftime('%d/%m/%Y')

## Missing Imputation

As the dates placed through oversampling have no entries for the currencies, I'm going to perform an imputation process by local average, thus guaranteeing a “smoothness” in the imputation

In [6]:
def loc_mean(df, value_col='Close'):

    std_before = df[value_col].std()

    prev = df[value_col].shift(1)
    next_ = df[value_col].shift(-1)
    
    df[value_col] = df[value_col].combine_first(
        ((prev + next_) / 2)
    )
    
    std_after = df[value_col].std()

    return df, std_before, std_after

currencies = {'CNY': cny, 'BRL': brl, 'JPY': jpy, 'EUR': eur}

results = {}

for name, df in currencies.items():
    df_imputed, std_before, std_after = loc_mean(df.copy())
    results[name] = {
        'std_before': std_before,
        'std_after': std_after
    }

    currencies[name] = df_imputed

for name, res in results.items():
    print(f'{name}:')
    print(f'  std_before : {res["std_before"]:.4f}')
    print(f'  std_after  : {res["std_after"]:.4f}\n')

CNY:
  std_before : 0.0066
  std_after  : 0.0066

BRL:
  std_before : 0.0095
  std_after  : 0.0095

JPY:
  std_before : 0.0007
  std_after  : 0.0007

EUR:
  std_before : 0.0492
  std_after  : 0.0492



We can notice that the change in standard deviation was very low, which according to the literature is a good indicator when performing imputation

## Unit Root Tests (URT)

In [9]:
adf_lags = {
    'BRL': {'level': 3,  'diff': 2},
    'EUR': {'level': 34, 'diff': 33},
    'JPY': {'level': 22, 'diff': 21},
    'CNY': {'level': 32, 'diff': 33}
}

kpss_lags = {
    'BRL': {'level': 21, 'diff': 4},
    'EUR': {'level': 21, 'diff': 37},
    'JPY': {'level': 21, 'diff': 66},
    'CNY': {'level': 21, 'diff': 17}
}

def run_lag(series, name, level, adf_lag, kpss_lag):
    data = series.dropna()
    if level == 'diff':
        data = data.diff().dropna()
    
    adf_result = adfuller(data, maxlag=adf_lag, autolag=None)
    adf_stat = adf_result[0]
    adf_pvalue = adf_result[1]

    kpss_result = kpss(data, regression='c', nlags=kpss_lag)
    kpss_stat = kpss_result[0]
    kpss_pvalue = kpss_result[1]

    return {
        'name': name,
        'level': 'First Difference' if level == 'diff' else 'Level',
        'ADF': {'stat': adf_stat, 'lag': adf_lag, 'p': adf_pvalue},
        'KPSS': {'stat': kpss_stat, 'lag': kpss_lag, 'p': kpss_pvalue}
    }

currencies = {'BRL': brl, 'EUR': eur, 'JPY': jpy, 'CNY': cny}
results = []

for name, df in currencies.items():
    for level in ['level', 'diff']:
        result = run_lag(
            series=df['Close'],
            name=name,
            level=level,
            adf_lag=adf_lags[name][level],
            kpss_lag=kpss_lags[name][level]
        )
        results.append(result)

for res in results:
    print(f"{res['name']} - {res['level']}")
    print(f"  ADF  : stat={res['ADF']['stat']:.2f}, lag={res['ADF']['lag']}, p={res['ADF']['p']:.4f}")
    print(f"  KPSS: stat={res['KPSS']['stat']:.2f}, lag={res['KPSS']['lag']}, p={res['KPSS']['p']:.4f}\n")

BRL - Level
  ADF  : stat=-1.91, lag=3, p=0.3281
  KPSS: stat=0.58, lag=21, p=0.0246

BRL - First Difference
  ADF  : stat=-13.57, lag=2, p=0.0000
  KPSS: stat=0.06, lag=4, p=0.1000

EUR - Level
  ADF  : stat=-1.49, lag=34, p=0.5383
  KPSS: stat=0.79, lag=21, p=0.0100

EUR - First Difference
  ADF  : stat=-3.12, lag=33, p=0.0253
  KPSS: stat=0.43, lag=37, p=0.0655

JPY - Level
  ADF  : stat=-1.66, lag=22, p=0.4506
  KPSS: stat=1.56, lag=21, p=0.0100

JPY - First Difference
  ADF  : stat=-4.09, lag=21, p=0.0010
  KPSS: stat=0.20, lag=66, p=0.1000

CNY - Level
  ADF  : stat=-1.44, lag=32, p=0.5617
  KPSS: stat=1.72, lag=21, p=0.0100

CNY - First Difference
  ADF  : stat=-3.12, lag=33, p=0.0248
  KPSS: stat=0.10, lag=17, p=0.1000



The results remain similar to those originally obtained in the article, indicating that the bubbles observed in NFTs are more speculative than rational