# 2 - Token Volume Analysis
In this section, we focus on the tokens' 24H volume trend. Both time and frequency domain techniques will be used. Hourly volume analysis may be added in the future.

In [None]:
# Standard Library
import math
import os

# Third Party Library
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.pyplot import MaxNLocator

# Local Folder Library
from pyammanalysis.util import read_yaml

In [None]:
# config
config = read_yaml("../config.yaml")
SUBPLOTS_PER_ROW = config["SUBPLOTS_PER_ROW"]

# data folder paths
DATA_PATH = "data"

tokens_df = pd.read_csv(os.path.join(DATA_PATH, "tokens_df.csv"))
token_names = tokens_df["symbol"]

token_day_df = pd.read_csv(
    os.path.join(DATA_PATH, "token_day_df.csv"), parse_dates=["date"]
)

In [None]:
# TODO: cleanse the gaps and erroneous entries.
# also, avoid the first few days of token launch

# for now, do naive plots and correlations

## Time Domain
### Time Trend

In [None]:
missing_tokens = []

plt.figure(figsize=(19, 9))
for token_name in token_names:
    try:
        plt.plot(token_day_df["date"], token_day_df[token_name + "_volumeUSD"])
    except:
        missing_tokens.append(token_name)
plt.title("24H Volume over Time")
plt.xlabel("date")
plt.ylabel("24H Volume (in USD)")
plt.legend(token_names, loc="upper left")
plt.show()

In [None]:
print(missing_tokens)

### Histogram
Observe the distribution of the prices.

In [None]:
fig = plt.figure(figsize=(24, 16))
axes = fig.subplots(4, SUBPLOTS_PER_ROW)
for i, token_name in enumerate(token_names):
    volumeUSD_series = token_day_df[token_name + "_volumeUSD"]
    ax = axes[math.floor(i / SUBPLOTS_PER_ROW), i % SUBPLOTS_PER_ROW]
    ax.title.set_text(token_name)
    ax.hist(volumeUSD_series, bins=100, range=(0, volumeUSD_series.quantile(0.99)))

    # force y-axis ticks to use integers
    ax.get_yaxis().set_major_locator(MaxNLocator(integer=True))

    # highlight 25%-75% percentile
    lq = volumeUSD_series.quantile(0.25)
    uq = volumeUSD_series.quantile(0.75)
    ax.axvspan(lq, uq, color="green", alpha=0.25)

fig.suptitle("24H Volume Distributions")
fig.supxlabel("24H Volume (in USD)")
plt.show()

In [None]:
token_df = token_day_df.drop(columns="timestamp")
# note: df.std() is normalized by N-1
token_metrics_df = pd.DataFrame(
    data=[token_df.mean(), token_df.std()], index=["mean", "stdev"]
)
token_metrics_df

## Correlation
### TVL Correlation

In [None]:
token_day_tvl_df = token_day_df.loc[
    :, token_day_df.columns.str.endswith("totalValueLockedUSD")
].rename(lambda x: x.split("_")[0], axis="columns")
corr_df = token_day_tvl_df.corr(method="pearson").rename_axis("symbol", axis=1)
corr_df

In [None]:
# take the bottom triangle since it repeats itself
mask = np.zeros_like(corr_df)
mask[np.triu_indices_from(mask)] = True

# generate plot
plt.figure(figsize=(18, 18))
plt.title("Token TVL Pearson Correlation")
sns.heatmap(corr_df, cmap="RdYlGn", vmax=1.0, vmin=-1.0, mask=mask, linewidths=2.5)
plt.yticks(rotation=0)
plt.xticks(rotation=90)
plt.show()

## Frequency Domain
### Fast Fourier Transform (FFT)
FFT computes the frequency content of the prices as signals.

In [None]:
fig = plt.figure(figsize=(24, 16))
axes = fig.subplots(4, SUBPLOTS_PER_ROW)
for i, token_name in enumerate(token_names):
    date_volume_df = token_day_df[["date", token_name + "_volumeUSD"]].dropna()
    volumeUSD_series = date_volume_df[token_name + "_volumeUSD"]

    # reference for zero-mean signal:
    # https://dsp.stackexchange.com/questions/46950/removing-mean-from-signal-massively-distorts-fft
    # only keep those with freq STRICTLY > 0
    f_max = math.ceil(date_volume_df.shape[0] / 2)
    Y = abs(np.fft.fft(volumeUSD_series - volumeUSD_series.mean()))[1:f_max]
    freq = np.fft.fftfreq(date_volume_df.shape[0], 1)[1:f_max]

    ax = axes[math.floor(i / SUBPLOTS_PER_ROW), i % SUBPLOTS_PER_ROW]
    ax.title.set_text(token_name)
    ax.plot(freq, Y)

fig.suptitle("24H Volume FFT")
fig.supxlabel("freq (in /day)")
fig.supylabel("24H Volume (in USD)")
plt.show()

In [None]:
fig = plt.figure(figsize=(24, 16))
axes = fig.subplots(4, SUBPLOTS_PER_ROW)
for i, token_name in enumerate(token_names):
    date_volume_df = token_day_df[["date", token_name + "_volumeUSD"]].dropna()
    volumeUSD_series = date_volume_df[token_name + "_volumeUSD"]

    # reference for zero-mean signal:
    # https://dsp.stackexchange.com/questions/46950/removing-mean-from-signal-massively-distorts-fft
    # only keep those with freq STRICTLY > 0
    f_max = math.ceil(date_volume_df.shape[0] / 2)
    Y = abs(np.fft.fft(volumeUSD_series - volumeUSD_series.mean()))[1:f_max]
    freq = np.fft.fftfreq(date_volume_df.shape[0], 1)[1:f_max]

    # c.f. power spectral density in signal processing
    spectrum = Y.real * Y.real + Y.imag * Y.imag

    ax = axes[math.floor(i / SUBPLOTS_PER_ROW), i % SUBPLOTS_PER_ROW]
    ax.title.set_text(token_name)
    ax.set_xlim(left=freq[1], right=freq[-1])

    # Note: this is possible because FFT must give positive values,
    # so that their logarithms always exist.
    # plot log10(spectrum) against frequency
    ax.semilogy(freq, spectrum)

fig.suptitle("Semilog Plot of 24H Volume FFT")
fig.supxlabel("freq (in /day)")
fig.supylabel("Magnitude")
plt.show()