In [3]:
import pandas as pd
from plotnine import *
from mizani.formatters import date_format

base_path = './Task1'

In [None]:
# Bitcoin Monthly Volume Plot from 2013-04 to 2025-11, data source: CoinMarketCap
df = pd.read_csv(
    base_path + r"\data\Bitcoin_monthly_010413-011125.csv",
    sep=";"
)
df['timeOpen'] = pd.to_datetime(df['timeOpen'], utc=True).dt.tz_convert(None)           
df = df.sort_values('timeOpen', ascending=True).reset_index(drop=True)

p = (
    ggplot(df, aes(x='timeOpen', y='volume'))
    + geom_line()
    + geom_point()
    + scale_x_datetime(
        date_breaks="6 months",   # every 12 months
        labels=date_format("%Y-%m")        # show YYYY-MM
    )
    + scale_y_continuous(labels=lambda l: [f"{v:.2e}" for v in l])
    + labs(title="Bitcoin Monthly Volum - CoinMarketCap", x="Month", y="Volume")
    + theme(axis_text_x=element_text(rotation=45, ha='right'))
)
p.save(base_path + "/img/bitcoin_monthly_coinmarketcap.png", dpi=300)



In [None]:
# Bitcoin daily Volume from 2013-04-28 to 2025-12-26, data source: CoinGecko
df = pd.read_csv(
    base_path + r"\data\btc-usd-daily_coingecko.csv"
)
df['snapped_at'] = pd.to_datetime(df['snapped_at'], utc=True).dt.tz_convert(None)           
df = df.sort_values('snapped_at', ascending=True).reset_index(drop=True)

df_monthly = df.groupby(pd.Grouper(key='snapped_at', freq='M'))['total_volume'].sum().reset_index()
df_monthly = df_monthly.iloc[:-1]

p = (
    ggplot(df_monthly, aes(x='snapped_at', y='total_volume'))
    + geom_line()
    + geom_point()
    + scale_x_datetime(
        date_breaks="6 months",   # every 12 months
        labels=date_format("%Y-%m")        # show YYYY-MM
    )
    + scale_y_continuous(labels=lambda l: [f"{v:.2e}" for v in l])
    + labs(title="Bitcoin Monthly Volume - CoinGecko", x="Month", y="Volume")
    + theme(axis_text_x=element_text(rotation=45, ha='right'))
)
p.save(base_path + "/img/bitcoin_monthly_coingecko.png", dpi=300)



In [3]:
# Ethereum Monthly Volume Plot from 2015-09 to 2025-11, data source: CoinMarketCap
df = pd.read_csv(
    base_path + r"\data\Ethereum_monthly_010915-011125.csv",
    sep=";"
)
df['timeOpen'] = pd.to_datetime(df['timeOpen'], utc=True).dt.tz_convert(None)             
df = df.sort_values('timeOpen', ascending=True).reset_index(drop=True)

p = (
    ggplot(df, aes(x='timeOpen', y='volume'))
    + geom_line()
    + geom_point()
    + scale_x_datetime(
        date_breaks="6 months",   # every 12 months
        labels=date_format("%Y-%m")        # show YYYY-MM
    )
    + scale_y_continuous(labels=lambda l: [f"{v:.2e}" for v in l])
    + labs(title="Ethereum Monthly Volume - CoinMarketCap", x="Month", y="Volume")
    + theme(axis_text_x=element_text(rotation=45, ha='right'))
)
p.save(base_path + "/img/ethereum_monthly_coinmarketcap.png", dpi=300)



In [None]:
# Ethereum daily Volume from 2015-08-07 to 2025-12-26, data source: CoinGecko
df = pd.read_csv(
    base_path + r"\data\eth-usd-daily_coingecko.csv"
)
df['snapped_at'] = pd.to_datetime(df['snapped_at'], utc=True).dt.tz_convert(None)           
df = df.sort_values('snapped_at', ascending=True).reset_index(drop=True)

df_monthly = df.groupby(pd.Grouper(key='snapped_at', freq='M'))['total_volume'].sum().reset_index()
df_monthly = df_monthly.iloc[:-1]

p = (
    ggplot(df_monthly, aes(x='snapped_at', y='total_volume'))
    + geom_line()
    + geom_point()
    + scale_x_datetime(
        date_breaks="6 months",   # every 12 months
        labels=date_format("%Y-%m")        # show YYYY-MM
    )
    + scale_y_continuous(labels=lambda l: [f"{v:.2e}" for v in l])
    + labs(title="Ethereum Monthly Volume - CoinGecko", x="Month", y="Volume")
    + theme(axis_text_x=element_text(rotation=45, ha='right'))
)
p.save(base_path + "/img/ethereum_monthly_coingecko.png", dpi=300)



In [None]:
# Tether Monthly Volume Plot from 2013-03 to 2025-11, data source: CoinMarketCap
df = pd.read_csv(
    base_path + r"\data\Tether_monthly_010315-011125.csv",
    sep=";"
)
df['timeOpen'] = pd.to_datetime(df['timeOpen'], utc=True).dt.tz_convert(None)                
df = df.sort_values('timeOpen', ascending=True).reset_index(drop=True)

p =(
    ggplot(df, aes(x='timeOpen', y='volume'))
    + geom_line()
    + geom_point()
    + scale_x_datetime(
        date_breaks="6 months",   # every 6 months
        labels=date_format("%Y-%m")        # show YYYY-MM
    )
    + scale_y_continuous(labels=lambda l: [f"{v:.2e}" for v in l])
    + labs(title="Tether Monthly Volume - CoinMarketCap", x="Month", y="Volume")
    + theme(axis_text_x=element_text(rotation=45, ha='right'))
)
p.save(base_path + "/img/tether_monthly_coinmarketcap.png", dpi=300)



In [6]:
# Tether daily Volume from 2015-02-25 to 2025-12-26, data source: CoinGecko
df = pd.read_csv(
    base_path + r"\data\usdt-usd-daily_coingecko.csv"
)
df['snapped_at'] = pd.to_datetime(df['snapped_at'], utc=True).dt.tz_convert(None)           
df = df.sort_values('snapped_at', ascending=True).reset_index(drop=True)

df_monthly = df.groupby(pd.Grouper(key='snapped_at', freq='M'))['total_volume'].sum().reset_index()
df_monthly = df_monthly.iloc[:-1]

p = (
    ggplot(df_monthly, aes(x='snapped_at', y='total_volume'))
    + geom_line()
    + geom_point()
    + scale_x_datetime(
        date_breaks="6 months",   # every 6 months
        labels=date_format("%Y-%m")        # show YYYY-MM
    )
    + scale_y_continuous(labels=lambda l: [f"{v:.2e}" for v in l])
    + labs(title="Tether Monthly Volume - CoinGecko", x="Month", y="Volume")
    + theme(axis_text_x=element_text(rotation=45, ha='right'))
)
p.save(base_path + "/img/tether_monthly_coingecko.png", dpi=300)



In [4]:
#'North America': 'NAC',
#'East Asia & Pacific': 'EAS',
#'Europe & Central Asia': 'ECS',
#'Latin America & Caribbean': 'LCN',
#'Sub-Saharan Africa': 'SSF'
#'South Asia': 'SAS',            
#'Middle East, North Africa, Afghanistan & Pakistan': 'MEA',
# 7 economic regions defined by World Bank

chainalysis_data = base_path + r"\data\cha_report_summary.xlsx"
cha_xls = pd.ExcelFile(chainalysis_data)

regions = ['NAC', 'EAS', 'ECS', 'LCN', 'SSF', 'SAS', 'MEA']

region_counts = {}

for sheet_yr in cha_xls.sheet_names:
    df_cha = pd.read_excel(cha_xls, sheet_name=sheet_yr)
    df_cha = df_cha[df_cha['Region'].isin(regions)]
    counts = df_cha['Region'].value_counts()
    counts = counts.reindex(regions, fill_value=0)
    region_counts[sheet_yr] = counts

cha_xls.close()
region_counts_df = pd.DataFrame(region_counts)
print(region_counts_df)

        2020  2021  2022  2023  2024  2025
Region                                    
NAC        1     1     1     2     2     1
EAS        6     4     5     6     7     6
ECS        4     2     4     4     4     4
LCN        4     4     4     3     4     3
SSF        3     6     2     1     1     2
SAS        1     3     3     3     2     3
MEA        1     0     1     1     0     1


In [7]:
colors = [
    "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728",
    "#9467bd", "#8c564b", "#e377c2", "#7f7f7f",
    "#bcbd22", "#17becf", "#aec7e8", "#ffbb78",
    "#98df8a", "#ff9896", "#c5b0d5", "#c49c94",
    "#f7b6d2", "#c7c7c7", "#dbdb8d", "#9edae5"
]

df_long = region_counts_df.reset_index().melt(id_vars='Region', var_name='Year', value_name='Count')

heatmap = (
    ggplot(df_long, aes(x='Year', y='Region', fill='Count'))
    + geom_tile(color='white')  
    + geom_text(aes(label='Count'), color='black')  
    + scale_fill_gradient(low='white', high='red')  
    + theme_minimal()
)
heatmap.save(base_path + "/img/region_report_heatmap.png", dpi=300)

stacked_bar = (
    ggplot(df_long, aes(x='Year', y='Count', fill='Region'))
    + geom_col()  # geom_col automatically stacks values
    + scale_fill_manual(values=colors[:len(df_long['Region'].unique())])
    + labs(title='Counts by Region per Year',
           x='Year',
           y='Count')
    + theme_minimal()
    + theme(
        axis_text_x=element_text(rotation=0, hjust=0.5),
        figure_size=(8, 6)
    )
)
stacked_bar.save(base_path + "/img/region_report_stacked_bar.png", dpi=300)

