In [2]:
import pandas as pd 
import numpy as np
import requests
import boto3
import os
import json
from io import BytesIO
import base64
from bs4 import BeautifulSoup
import json5

charts_path = "/Users/finn/Documents/GitHub/GrowthDiagnostics/charts"

In [10]:
def upload_series(df, name, bucket="eco-temp-cache"):
    csv = df.to_csv(index=False, date_format='%Y-%m-%d')
    s3 = boto3.client('s3')
    res = s3.put_object(Bucket=bucket, Key=f"gd/{name}.csv", Body=csv)
    return f"https://eco-temp-cache.s3.eu-west-2.amazonaws.com/gd/{name}.csv"

def prepare_spec(data_url, path, title, filter=None, parent_path="charts", multi_series=False, yAxisExpr=None):
    spec = {
        "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
        "title": {"text": title},
        "data": {
            "url": data_url,
        },
        "mark": {"type": "line"},
        "encoding": {
            "x": {
                "field": "date",
                "type": "temporal",
                "axis": {"title": "", "titleColor": "#122B39"}
            },
            "y": {
                "field": "value",
                "type": "quantitative",
                "axis": {"title": "", "titleColor": "#122B39"}
            }
        }
    }
    if filter:
        spec['transform'].append({"filter": filter})
    if multi_series:
        spec['encoding']['color'] = {"field": "series", "type": "nominal"}
    if yAxisExpr:
        spec['encoding']['y']['axis']['labelExpr'] = yAxisExpr
    with open(f"{parent_path}/{path}.json", "w") as f:
        f.write(json.dumps(spec))

def get_stats(df):
    df = df.sort_values('date')
    # get the most recent value and it's date
    latest = df.iloc[-1]
    latest_date = latest.date
    latest_value = latest.value
    # get the as close to 1 year ago as possible
    year_ago = df[df.date<=df.date.max()-pd.DateOffset(years=1)].iloc[-1]
    year_ago_date = year_ago.date
    year_ago_value = year_ago.value
    # get the average for the last 10 years (or as many as we have)
    avg = df[df.date>df.date.max()-pd.DateOffset(years=10)].value.mean()
    avg_year_count = df.iloc[-1].date.year-df[df.date>df.date.max()-pd.DateOffset(years=10)].iloc[0].date.year
    return {
        'latest': {'date': latest_date, 'value': latest_value},
        'latest_date': latest_date,
        'year_ago': {'date': year_ago_date, 'value': year_ago_value},
        'year_ago_date': year_ago_date,
        'avg': avg,
        'avg_year_count': avg_year_count
    }

def human_change(val1, val2, date1, date2, date_format="%b %Y", val_format=".1%", change_format=".3f", change_multiplier=1, change_suffix="", val_suffix="", val_multiplier=1):
    print(f"val1: {val1}, val2: {val2}, date1: {date1}, date2: {date2}")
    if np.round(val1, 3) == np.round(val2, 3):
        return f"unchanged from {date2.strftime(date_format)}"
    if val1 > val2:
        return f"up {(val1-val2)*change_multiplier:{change_format}}{change_suffix} from {date2.strftime(date_format)} when it was {val2*val_multiplier:{val_format}}{val_suffix}"
    if val1 < val2:
        return f"down {(val1-val2)*change_multiplier:{change_format}}{change_suffix} from {date2.strftime(date_format)} when it was {val2*val_multiplier:{val_format}}{val_suffix}"  

def temporal_description(data, name="it", preamble="", val_format=".1%", change_format=".3f", val_multiplier=1, change_multiplier=1, change_suffix="", date_format="%b %Y", val_suffix=""):
    if len(preamble)>0 and preamble[-1]!=" ":
        preamble += " "
    stats = get_stats(data)
    return f"{preamble}In {stats['latest']['date'].strftime('%B %Y')}, {name} was {stats['latest']['value']*val_multiplier:{val_format}}{val_suffix} . This is {human_change(stats['latest']['value'], stats['year_ago']['value'], stats['latest']['date'], stats['year_ago']['date'], date_format, val_format, change_format, change_multiplier, change_suffix, val_suffix=val_suffix, val_multiplier=val_multiplier)}. The average for the last {stats['avg_year_count']} years is {stats['avg']*val_multiplier:{val_format}}{val_suffix}."

def upload_description(description, name, bucket="eco-temp-cache"):
    s3 = boto3.client('s3')
    res = s3.put_object(Bucket=bucket, Key=f"gd/descriptions/{name}.txt", Body=json.dumps(description))
    return f"https://eco-temp-cache.s3.eu-west-2.amazonaws.com/gd/descriptions/{name}.txt"

def short_description(stats, var_name, date_format="%b %Y", val_format=".1%", val_multiplier=1, val_suffix="", suffix=""):
    direction = 'unchanged' if np.round(stats['latest']['value'], 3) == np.round(stats['year_ago']['value'], 3) else "up" if np.round(stats['latest']['value'], 3) > np.round(stats['year_ago']['value'], 3) else "down"
    desc = f"{var_name} is {stats['latest']['value']*val_multiplier:{val_format}}{val_suffix}. It is {direction} from {stats['year_ago_date'].strftime(date_format)} ({stats['year_ago']['value']*val_multiplier:{val_format}}{val_suffix}). The UK historical average (past {stats['avg_year_count']} years) is {stats['avg']*val_multiplier:{val_format}}{val_suffix}. {suffix}"
    return desc


In [4]:
def parse_quarters(df):
    df['quarter'] = pd.to_numeric(df.quarter.str.replace('Q', ''))
    df['month'] = (df.quarter-1)*3+1
    df['date'] = pd.to_datetime(df.year.astype(str) + '-' + df.month.astype(str) + '-01')
    return df

def parse_years(df):
    df['date'] = pd.to_datetime(df.year.astype(str) + '-01-01')
    return df

def clean_ons_series(dataset_id, series_id, date_parser, key):
    url = f'https://api.ons.gov.uk/timeseries/{series_id}/dataset/{dataset_id}/data'
    req = requests.get(url)
    data = req.json()
    data = data[key]
    df = pd.DataFrame(data)
    df = date_parser(df)
    df = df[['date', 'value']]
    df = df.dropna()
    return df

# Current Account Balance

In [5]:
curr_acc_ids = {
    # Total Gross Fixed Capital Formation CVM SA £m
    # from Business investment time series (CXNV)
    "dataset_id": 'UKEA',
    "series_id" : 'AA6H'
}

curr_act = clean_ons_series(curr_acc_ids['dataset_id'], curr_acc_ids['series_id'], parse_quarters, 'quarters')
curr_act['value'] = curr_act['value'].astype(float)/100

curr_act_url = upload_series(curr_act, 'macro_curr_act')
prepare_spec(curr_act_url, "macro_curr_act", "Current Account, % of GDP", parent_path=charts_path, yAxisExpr="format(datum.value, '.0%')")

In [6]:
stats = get_stats(curr_act)
description = short_description(stats, "The current account balance", val_format=".1%", val_multiplier=1, val_suffix=" of GDP")
description_url = upload_description(description, "macro_curr_act")
print(description)

The current account balance is -3.1% of GDP. It is down from Oct 2022 (0.6% of GDP). The UK historical average (past 9 years) is -3.5% of GDP. 


# Fiscal Balance

In [8]:
np.mean([-0.8, -4.8, -2.5, -4.7, -5.58, -6.4])

-4.13

In [9]:
fiscal_bal_ids = {
    #PS: Current Budget Deficit (exc PS Banks) as a % of GDP: CPNSA
    "dataset_id": 'PUSF',
    "series_id" : 'JW2V'
}

fiscal_bal = clean_ons_series(fiscal_bal_ids['dataset_id'], fiscal_bal_ids['series_id'], parse_quarters, 'quarters')
fiscal_bal['value'] = fiscal_bal['value'].astype(float)/100
fiscal_bal_url = upload_series(fiscal_bal, 'macro_fiscal_bal')

prepare_spec(fiscal_bal_url, "macro_fiscal_bal", "Fiscal Balance, % of GDP", parent_path=charts_path, yAxisExpr="format(datum.value, '.0%')")
stats = get_stats(fiscal_bal)
suffix="The G7 average is 4.13%."
description = short_description(stats, "The deficit", val_format=".1%", val_multiplier=1, val_suffix=" of GDP", suffix=suffix)
description_url = upload_description(description, "macro_fiscal_bal")
print(description)


The deficit is 2.5% of GDP. It is down from Oct 2022 (2.9% of GDP). The UK historical average (past 9 years) is 3.0% of GDP. The G7 average is 4.13%.


# GDP Data (for IIP)

In [45]:
gdp_cp_nsa_ids = {
    #Gross Domestic Product at market prices: CP: NSA £m
    "dataset_id": 'QNA',
    "series_id" : 'BKTL'
}

gdp_cp_nsa = clean_ons_series(gdp_cp_nsa_ids['dataset_id'], gdp_cp_nsa_ids['series_id'], parse_years, 'years')


# International Investment Position

In [46]:
iip_ids = {
    # BoP IIP Net NSA £m
    "dataset_id": 'PNBP',
    "series_id": "HBQC"
}

iip = clean_ons_series(iip_ids['dataset_id'], iip_ids['series_id'], parse_years, 'years')
iip = iip.merge(gdp_cp_nsa, on='date', how='left')
iip['value'] = iip['value_x'].astype(float)/iip['value_y'].astype(float)
iip = iip[['date', 'value']]
iip = iip.dropna()
iip_url = upload_series(iip, 'macro_iip')
prepare_spec(iip_url, "macro_iip", "International Investment Position, % of GDP", parent_path=charts_path, yAxisExpr="format(datum.value, '.0%')")

stats = get_stats(iip)
description = short_description(stats, "The International Investment Position", val_format=".1%", val_multiplier=1, val_suffix=" of GDP", date_format="%Y")
description_url = upload_description(description, "macro_iip")
print(description)


The International Investment Position is -30.7% of GDP. It is down from 2022 (-14.3% of GDP). The UK historical average (past 9 years) is -11.2% of GDP.


# Debt/GDP Ratio

In [47]:
debt_ids = {
    #PS: Net Debt (excluding public sector banks) as a % of GDP: NSA   
    "dataset_id": 'PUSF',
    "series_id": 'HF6X'
}

debt = clean_ons_series(debt_ids['dataset_id'], debt_ids['series_id'], parse_quarters, 'quarters')
debt['value'] = pd.to_numeric(debt['value'], errors='coerce')/100
debt = debt.dropna()

debt_url = upload_series(debt, 'macro_debt')
prepare_spec(debt_url, "macro_debt", "Net Debt, % of GDP", parent_path=charts_path, yAxisExpr="format(datum.value, '.0%')")

stats = get_stats(debt)
description = short_description(stats, "The debt ratio", val_format=".1%", val_multiplier=1, val_suffix="")
description_url = upload_description(description, "macro_debt")
print(description)

The debt ratio is 98.4%. It is up from Oct 2022 (95.7%). The UK historical average (past 9 years) is 87.6%.


In [48]:
-824993/2_687_186

-0.3070100097276482