In [1]:
from bokeh.plotting import figure,show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, HoverTool, FactorRange, NumeralTickFormatter, Select, Div, LegendItem, DatetimeTickFormatter
from bokeh.layouts import column, row
from bokeh.models.annotations import Label
from bokeh.palettes import d3, Turbo256, tol

import pandas as pd
import numpy as np

In [2]:
# activate Bokeh output in Jupyter notebook
output_notebook()

In [3]:
# Read all pickle files containing all the cleaned data and useful dataframes

df_asset = pd.read_pickle("data/asset_subset.pkl")
df_inflation = pd.read_pickle("data/inflation_subset.pkl")
pct_asset = pd.read_pickle("data/pct_asset.pkl")
pct_inflation = pd.read_pickle("data/pct_inflation.pkl")
data = pd.read_pickle("data/data_only")

## **Original Value Trend Overtime** 
- In the next following figure
- you can have an overview on the trend of each asset and inflation indicators over time
- for scaling purpose, the figure consists of four parts with assets on the first row and indicators on the second row

In [4]:
# Full palette
full_palette = list(d3['Category20'][16])

# Divide the palette into four segments
palette_segment_size = len(full_palette) // 4
palettes = [full_palette[i:i + palette_segment_size] for i in range(0, len(full_palette), palette_segment_size)]

# Assign each segment to a figure
p1_colors = palettes[0]
p2_colors = palettes[1]
p3_colors = palettes[2]
p4_colors = palettes[3]

# Initialize four figures and connect the x axises
p1 = figure(width=500, height=400, x_axis_type="datetime", title="Asset Prices 1")
p2 = figure(width=500, height=400, x_axis_type="datetime", title="Asset Prices 2", x_range=p1.x_range)
p3 = figure(width=500, height=400, x_axis_type="datetime", title="Inflation", x_range=p1.x_range)
p4 = figure(width=500, height=400, x_axis_type="datetime", title="CPI", x_range=p1.x_range)

# Add lines for assets
for i, col in enumerate(['House_price', 'Shanghai_stock_index', 'Gold_price', 'Fixed_deposit_rate']):
    source = ColumnDataSource(data={
        'x': df_asset['Time'],
        'y': df_asset[col],
        'label': [col] * len(df_asset)  # Repeat the column name to match the length of the data
    })
    p1.line('x', 'y', source=source, color=p1_colors[i % len(p1_colors)], legend_label=col)

for i, col in enumerate(['government_bond_yield_sixm', 'government_bond_yield_fivey', 'government_bond_yield_teny']):
    source = ColumnDataSource(data={
        'x': df_asset['Time'],
        'y': df_asset[col],
        'label': [col] * len(df_asset)  # Repeat the column name to match the length of the data
    })
    p2.line('x', 'y', source=source, color=p2_colors[i % len(p2_colors)], legend_label=col)

# Add lines for inflation indicators
for i, col in enumerate(['core_inflation', 'headline_inflation']):
    source = ColumnDataSource(data={
        'x': df_inflation['Time'],
        'y': df_inflation[col],
        'label': [col] * len(df_inflation)  # Repeat the column name to match the length of the data
    })
    p3.line('x', 'y', source=source, color=p3_colors[i + 1 % len(p3_colors)], legend_label=col)

for i, col in enumerate(['core_CPI', 'headline_CPI']):
    source = ColumnDataSource(data={
        'x': df_inflation['Time'],
        'y': df_inflation[col],
        'label': [col] * len(df_inflation)  # Repeat the column name to match the length of the data
    })
    p4.line('x', 'y', source=source, color=p4_colors[i % len(p4_colors)], legend_label=col)

# Add HoverTool
hover = HoverTool(tooltips=[("Label", "@label"), ("Time", "@x{%F}"), ("Value", "@y")], formatters={'@x': 'datetime'})
p1.add_tools(hover)
p2.add_tools(hover)
p3.add_tools(hover)
p4.add_tools(hover)

# Adjust the legends
p1.legend.location = "top_right"
p3.legend.location = "top_center"
p4.legend.location = "top_left"
p1.legend.background_fill_alpha = 0
p2.legend.background_fill_alpha = 0
p3.legend.background_fill_alpha = 0
p4.legend.background_fill_alpha = 0
p1.legend.label_text_font_size = "8pt"
p2.legend.label_text_font_size = "8pt"
p3.legend.label_text_font_size = "8pt"
p4.legend.label_text_font_size = "8pt"

# Give a markup instruction
instruction = Div(text="""
    <p style='background-color: #ADD8E6; padding: 10px; font-size: 16px;'><b>Instructions:</b> Hover over the plot to see data values. Use the toolbar to pan, zoom, and reset.<br>The four charts are connected, zoom in on one, the others will adjust accordingly.</p>
""", width=800, align="center", margin=10, )

# Layout
p = column(instruction, row(p1, p2), row(p3, p4))

show(p)

## **Percentage Change in All Assets And Inflation Indicators**
- With "All asset"/"All inflation" tag, you can browse through all percentage change in bar chart, with nested coordinates, you can compare all the assets/inflation indicators within a specific quarter in a year
- With "All asset in lines"/"All inflation in lines" tag, the line charts are shown comparatively, you can get the trend of percentage change over time
- With specific tags in assets/inflation indicators, a histogram is shown

In [5]:
years = sorted(df_asset['Year'].unique().tolist())
quarters = sorted(df_asset['Quarter'].unique().tolist())

In [6]:
select_pct_tag = 'All asset'
select_inflation_tag = 'All inflation'
asset_pct = pct_asset.drop(columns=['Time','Year','Quarter']).columns.tolist()
inflation_pct = pct_inflation.drop(columns=['Time','Year','Quarter']).columns.tolist()

In [7]:
# Function to select N equally spaced colors from a palette
def select_n_colors(palette, n):
    indices = np.linspace(0, len(palette) - 1, n, dtype=int)
    return [palette[i] for i in indices]

# Select 10 equally spaced colors from the Turbo palette
#palette = select_n_colors(Turbo256, 10)

palette = list(tol['TolRainbow'][14])
all_labels = sorted(set(inflation_pct) | set(asset_pct))
color_mapping = {label: palette[i % len(palette)] for i, label in enumerate(all_labels)}

In [8]:
def data_prepare_asset(select_tag):
    asset = []
    if select_tag == 'All asset':
        asset = asset_pct
    else:
        asset = [select_tag]
    data = pct_asset.pivot_table(
        values=asset,
        index=['Year', 'Quarter']
    )
    
    x_range = FactorRange(factors=[(str(year), quarter, item) for year in years for quarter in quarters for item in asset])
    
    x_labels = [(str(year), quarter, item) for year in years for quarter in quarters for item in asset]
     
    return data, x_labels, x_range

In [9]:
def data_prepare_inflation(select_tag):
    asset = []
    if select_tag == 'All inflation':
        asset = inflation_pct
    else:
        asset = [select_tag]
    data = pct_inflation.pivot_table(
        values=asset,
        index=['Year', 'Quarter']
    )
    
    x_range = FactorRange(factors=[(str(year), quarter, item) for year in years for quarter in quarters for item in asset])
    
    x_labels = [(str(year), quarter, item) for year in years for quarter in quarters for item in asset]
     
    return data, x_labels, x_range

In [10]:
def create_source(data, x_labels):
    
    y = data.values.flatten().tolist()
    
    labels = data.columns.tolist() * len(data)
    
    colors = [color_mapping[label] for label in labels]
    
    source = dict(
        x_labels=x_labels,
        y=y,
        label=labels,
        colors=colors,
        Year=[label[0] for label in x_labels],
        Quarter=[label[1] for label in x_labels]
    )
    
    return ColumnDataSource(source)

In [11]:
def draw_bar_chart(source, x_range):
    
    p = figure(
        
        x_range = x_range,
        title = 'Percentage Change',
        tools='pan,wheel_zoom,box_zoom,reset,save',
        width = 1000, 
        height = 500
    )
    
    p.xgrid.grid_line_color = None
    
    p.x_range.range_padding = 0.1
    
    p.xaxis.major_label_text_font_size = '0px'
    
    p.xaxis.major_tick_line_color = None

    p.xaxis.axis_label = 'Datetime in year and quarter'
    
    p.yaxis.axis_label = 'Percentage Change (%)'
    
    p.yaxis.formatter = NumeralTickFormatter(format='0,0')
    
    vbar = p.vbar(
        x = 'x_labels',
        top = 'y',
        width = 0.9,
        source = source,
        
        legend_group = 'label',
        line_color = None,
        
        fill_color='colors'
    )

        
    p.add_tools(HoverTool(tooltips=[
            ('Year', '@Year'),
            ('Quarter', '@Quarter'),
            ('Asset', '@label'),
            ('pct value', '@y{0,0.00}')
        ]))
    
    p.legend.label_text_font_size = '8pt'
    p.legend.label_height = 15
    p.legend.glyph_height = 10
    p.legend.glyph_width = 10
    p.legend.orientation = 'vertical'
    p.legend.location = 'top_right'
    p.legend.background_fill_alpha = 0

    
    p.output_backend = "svg"
    
    return p

In [12]:
def draw_line_chart(tag):

    p = figure(
        title = 'Percentage Change in line charts',
        tools='pan,wheel_zoom,box_zoom,reset,save',
        x_axis_type="datetime",
        width = 1000, 
        height = 500
    )
    
    p.xgrid.grid_line_color = None
    
    p.x_range.range_padding = 0.1
    
    p.xaxis.axis_label = 'Date'
    p.yaxis.axis_label = 'Percentage Change (%)'

    p.xaxis.formatter = DatetimeTickFormatter(months="%m/%Y")
    p.yaxis.formatter = NumeralTickFormatter(format='0,0')

    dt = pd.DataFrame()
    columns = []
    if tag == 'All assets in lines':
        dt = pct_asset.drop(columns=['Year','Quarter']).copy()
        columns = asset_pct
    if tag == 'All inflations in lines':
        dt = pct_inflation.drop(columns=['Year','Quarter']).copy()
        columns = inflation_pct

    palettes = list(tol['Bright'][7])
    
    for i, col in enumerate(columns): 
        source = ColumnDataSource(data={
            'x': dt['Time'],
            'y': dt[col],
            'label': [col] * len(dt)
        })
    
        p.line('x', 'y', source=source, color=palettes[i % len(palettes)], legend_label=col, line_width=1.5)


    p.add_tools(HoverTool(tooltips=[
            ('Asset', '@label'),
            ('Time', '@x{%F}'),
            ('percentage', '@y{0,0.00}')
        ], formatters={'@x': 'datetime'}))
    
    p.legend.background_fill_alpha = 0
    p.legend.label_text_font_size = "8pt"
    #p.legend.location = "top_center"

    return p

In [13]:
def plot(doc):
    select_pct_tag = 'All asset'
    select_inflation_tag = 'All inflation'
    
    data, x_labels, x_range = data_prepare_asset(select_pct_tag)
    source = create_source(data, x_labels)
    data2, x_labels2, x_range2 = data_prepare_inflation(select_inflation_tag)
    source2 = create_source(data2, x_labels2)

    p = draw_bar_chart(source, x_range)
    h = draw_bar_chart(source2, x_range2)
    
    select_opt = ['All asset', 'All assets in lines'] + asset_pct
    inflation_opt = ['All inflation', 'All inflations in lines'] + inflation_pct
    select_asset = Select(title="Select Asset:", value=select_opt[0], options=select_opt)
    select_inflation = Select(title="Select Inflation:", value=inflation_opt[0], options=inflation_opt)

    def update_asset_chart(attr, old, new):
        #global select_pct_tag, source
        select_pct_tag = new
        if new == 'All assets in lines':
            layout1.children[1].children[0]  = draw_line_chart(new)
        else:
            data, labels, range = data_prepare_asset(select_pct_tag)
            source = create_source(data, labels)
            layout1.children[1].children[0] = draw_bar_chart(source, range)

    def update_inflation_chart(attr, old, new):
        #global select_inflation_tag, source2
        select_inflation_tag = new
        if new == 'All inflations in lines':
            layout2.children[1].children[0]  = draw_line_chart(new)
        else:
            data2, labels2, range2 = data_prepare_inflation(select_inflation_tag)
            source2 = create_source(data2, labels2)
            layout2.children[1].children[0] = draw_bar_chart(source2, range2)
    
    select_asset.on_change('value', update_asset_chart)
    select_inflation.on_change('value', update_inflation_chart)
    
    layout1 = column(column(select_asset), column(p))
    layout2 = column(column(select_inflation), column(h))
    
    doc.add_root(column(layout1,layout2))

In [15]:
show(plot, notebook_url="http://localhost:8888")

## **Proportion of Assets vs. Inflation Indicators Over Time**
- With "ALL" tag with different inflation indicators, a collection of line charts is shown to compare among different assets
- Feel free to adjust to any asset and match it with any inflation indicators to get a single line chart

In [16]:
def create_source_proportion(asset_tag, inflation_tag):
    merged_df = pd.merge(pct_asset, pct_inflation.drop(columns=['Time']), on=['Year', 'Quarter'], how='inner')
    merged_df['Datetime'] = merged_df['Year'].astype(str) + merged_df['Quarter']
    inf = merged_df[inflation_tag]
    datas = []

    if asset_tag == "ALL":
        for i, asset in enumerate(asset_pct):
            prop = merged_df[[asset, 'Datetime']].copy()
            prop['Proportion'] = prop[asset] / inf
            prop = prop.drop(columns=[asset])
            prop['text'] = 'The proportion of all assets vs. '  + inflation_tag
            prop['label'] = asset
            datas.append(ColumnDataSource(prop))
    else:
        merged_df['Proportion'] = merged_df[asset_tag] / merged_df[inflation_tag].replace({0: np.nan})
        data = merged_df[['Datetime', 'Proportion']].copy()
        data['text'] = 'The proportion of ' + asset_tag + ' and ' + inflation_tag
        data['label'] = asset_tag
        datas.append(ColumnDataSource(data))

    return datas

In [17]:
def draw_line_chart_prop(source):
    p = figure(
        width = 1000,
        height = 600,
        title = 'Proportion of Assets vs. Inflation Indicators',
        x_range = source[0].data['Datetime'],
        tools = "pan,wheel_zoom,box_zoom,reset,save",
        toolbar_location = 'right'
    )

    p.xgrid.grid_line_color = None
    p.xaxis.major_label_orientation = 'vertical'

    palettes = list(tol['Light'][7])
    
    for i, src in enumerate(source):
        p.line(
            x = 'Datetime',
            y = 'Proportion',
            source = src,
            color=palettes[i % len(palettes)],
            legend_label = src.data['label'][0],
            line_width=1.5
        )
        
        label = Label(
            x=700,
            y=650,
            x_units = 'screen',
            y_units = 'screen',
            text = src.data['text'][0],
            text_font_size = '10pt',
            text_font_style = 'bold', 
            text_color = 'black',
            text_align = 'center'
        )

    p.add_tools(HoverTool(tooltips=[('Label', '@label'), ('Time', '@Datetime'), ('Proportion', '@Proportion')]))

    p.xaxis.axis_label = 'Datetime'
    p.yaxis.axis_label = 'Proportion'
    p.legend.label_text_font_size = "10pt"
    p.legend.location = "bottom_left"

    p.add_layout(label)
        
    p.output_backend = 'svg'

    return p

In [18]:
def plot_proportion(doc):
    asset_tag = 'ALL'
    inflation_tag = 'Core_CPI_Change'

    source = create_source_proportion(asset_tag, inflation_tag)

    p = draw_line_chart_prop(source)

    select_asset2 = Select(title="Select Asset:", value="ALL", options=asset_pct+["ALL"])
    select_inflation2 = Select(title="Select Inflation:", value=inflation_pct[0], options=inflation_pct)

    def update_asset(attr, old, new):
        asset_tag = new
        source = create_source_proportion(asset_tag, inflation_tag)
        layout2.children[0] = draw_line_chart_prop(source)

    def update_inflation(attr, old, new):
        inflation_tag = new
        source = create_source_proportion(asset_tag, inflation_tag)
        layout2.children[0] = draw_line_chart_prop(source)

    select_asset2.on_change('value', update_asset)
    select_inflation2.on_change('value', update_inflation)

    layout1 = row(select_asset2, select_inflation2)
    layout2 = row(p)

    doc.add_root(column(layout1, layout2))

In [20]:
show(plot_proportion, notebook_url="http://localhost:8888")