In [29]:
import pandas as pd 
import numpy as np
import requests
import boto3
import os
import json
from io import BytesIO
import base64
from bs4 import BeautifulSoup
import json5

charts_path = "/Users/finn/Documents/GitHub/GrowthDiagnostics/charts"

In [30]:
def upload_series(df, name, bucket="eco-temp-cache"):
    csv = df.to_csv(index=False, date_format='%Y-%m-%d')
    s3 = boto3.client('s3')
    res = s3.put_object(Bucket=bucket, Key=f"gd/{name}.csv", Body=csv)
    return f"https://eco-temp-cache.s3.eu-west-2.amazonaws.com/gd/{name}.csv"

def prepare_spec(data_url, path, title, filter=None, parent_path="charts", multi_series=False, yAxisExpr=None):
    spec = {
        "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
        "title": {"text": title},
        "data": {
            "url": data_url,
        },
        "mark": {"type": "line"},
        "encoding": {
            "x": {
                "field": "date",
                "type": "temporal",
                "axis": {"title": "", "titleColor": "#122B39"}
            },
            "y": {
                "field": "value",
                "type": "quantitative",
                "axis": {"title": "", "titleColor": "#122B39"}
            }
        }
    }
    if filter:
        spec['transform'].append({"filter": filter})
    if multi_series:
        spec['encoding']['color'] = {"field": "series", "type": "nominal"}
    if yAxisExpr:
        spec['encoding']['y']['axis']['labelExpr'] = yAxisExpr
    with open(f"{parent_path}/{path}.json", "w") as f:
        f.write(json.dumps(spec))

def get_stats(df):
    df = df.sort_values('date')
    # get the most recent value and it's date
    latest = df.iloc[-1]
    latest_date = latest.date
    latest_value = latest.value
    # get the as close to 1 year ago as possible
    year_ago = df[df.date<=df.date.max()-pd.DateOffset(years=1)].iloc[-1]
    year_ago_date = year_ago.date
    year_ago_value = year_ago.value
    # get the average for the last 10 years (or as many as we have)
    avg = df[df.date>df.date.max()-pd.DateOffset(years=10)].value.mean()
    avg_year_count = df.iloc[-1].date.year-df[df.date>df.date.max()-pd.DateOffset(years=10)].iloc[0].date.year
    return {
        'latest': {'date': latest_date, 'value': latest_value},
        'latest_date': latest_date,
        'year_ago': {'date': year_ago_date, 'value': year_ago_value},
        'year_ago_date': year_ago_date,
        'avg': avg,
        'avg_year_count': avg_year_count
    }

def human_change(val1, val2, date1, date2, date_format="%b %Y", val_format=".1%", change_format=".3f", change_multiplier=1, change_suffix="", val_suffix="", val_multiplier=1):
    print(f"val1: {val1}, val2: {val2}, date1: {date1}, date2: {date2}")
    if np.round(val1, 3) == np.round(val2, 3):
        return f"unchanged from {date2.strftime(date_format)}"
    if val1 > val2:
        return f"up {(val1-val2)*change_multiplier:{change_format}}{change_suffix} from {date2.strftime(date_format)} when it was {val2*val_multiplier:{val_format}}{val_suffix}"
    if val1 < val2:
        return f"down {(val1-val2)*change_multiplier:{change_format}}{change_suffix} from {date2.strftime(date_format)} when it was {val2*val_multiplier:{val_format}}{val_suffix}"  

def temporal_description(data, name="it", preamble="", val_format=".1%", change_format=".3f", val_multiplier=1, change_multiplier=1, change_suffix="", date_format="%b %Y", val_suffix=""):
    if len(preamble)>0 and preamble[-1]!=" ":
        preamble += " "
    stats = get_stats(data)
    return f"{preamble}In {stats['latest']['date'].strftime('%B %Y')}, {name} was {stats['latest']['value']*val_multiplier:{val_format}}{val_suffix} . This is {human_change(stats['latest']['value'], stats['year_ago']['value'], stats['latest']['date'], stats['year_ago']['date'], date_format, val_format, change_format, change_multiplier, change_suffix, val_suffix=val_suffix, val_multiplier=val_multiplier)}. The average for the last {stats['avg_year_count']} years is {stats['avg']*val_multiplier:{val_format}}{val_suffix}."

def upload_description(description, name, bucket="eco-temp-cache"):
    s3 = boto3.client('s3')
    res = s3.put_object(Bucket=bucket, Key=f"gd/descriptions/{name}.txt", Body=json.dumps(description))
    return f"https://eco-temp-cache.s3.eu-west-2.amazonaws.com/gd/descriptions/{name}.txt"

def short_description(stats, var_name, date_format="%b %Y", val_format=".1%", val_multiplier=1, val_suffix=""):
    direction = 'unchanged' if np.round(stats['latest']['value'], 3) == np.round(stats['year_ago']['value'], 3) else "up" if np.round(stats['latest']['value'], 3) > np.round(stats['year_ago']['value'], 3) else "down"
    desc = f"{var_name} is {stats['latest']['value']*val_multiplier:{val_format}}{val_suffix}. It is {direction} from {stats['year_ago_date'].strftime(date_format)} ({stats['year_ago']['value']*val_multiplier:{val_format}}{val_suffix}). The UK historical average (past {stats['avg_year_count']} years) is {stats['avg']*val_multiplier:{val_format}}{val_suffix}."
    return desc


In [31]:
def parse_quarters(df):
    df['quarter'] = pd.to_numeric(df.quarter.str.replace('Q', ''))
    df['month'] = (df.quarter-1)*3+1
    df['date'] = pd.to_datetime(df.year.astype(str) + '-' + df.month.astype(str) + '-01')
    return df

def parse_years(df):
    df['date'] = pd.to_datetime(df.year.astype(str) + '-01-01')
    return df

def parse_months(df):
    df['date'] = pd.to_datetime(df.date, format='%Y %b', errors='coerce')
    return df

def clean_ons_series(dataset_id, series_id, date_parser, key):
    url = f'https://api.ons.gov.uk/timeseries/{series_id}/dataset/{dataset_id}/data'
    req = requests.get(url)
    data = req.json()
    data = data[key]
    df = pd.DataFrame(data)
    df = date_parser(df)
    df = df[['date', 'value']]
    df = df.dropna()
    return df

# 16-64 Employment

In [32]:
curr_acc_ids = {
    # Total Gross Fixed Capital Formation CVM SA £m
    # from Business investment time series (CXNV)
    "dataset_id": 'UKEA',
    "series_id" : 'AA6H'
}

curr_act = clean_ons_series(curr_acc_ids['dataset_id'], curr_acc_ids['series_id'], parse_quarters, 'quarters')
curr_act['value'] = curr_act['value'].astype(float)/100

curr_act_url = upload_series(curr_act, 'macro_curr_act')
prepare_spec(curr_act_url, "macro_curr_act", "Current Account, % of GDP", parent_path=charts_path, yAxisExpr="format(datum.value, '.0%')")

In [33]:
employment_ids = {
    #Employment rate (aged 16 to 64, seasonally adjusted): %
    "dataset_id": 'LMS',
    "series_id" : 'LF24'
}

employment = clean_ons_series(employment_ids['dataset_id'], employment_ids['series_id'], parse_quarters, 'quarters')
employment['value'] = employment['value'].astype(float)/100
employment_url = upload_series(employment, 'labour_employment')
prepare_spec(employment_url, "labour_employment", "Employment Rate, 16-64, SA, %", parent_path=charts_path, yAxisExpr="format(datum.value, '.0%')")

stats = get_stats(employment)
description = short_description(stats, "The employment rate", val_format=".1%", val_multiplier=1, val_suffix="")
description_url = upload_description(description, "labour_employment")
print(description)

The employment rate is 75.0%. It is down from Oct 2022 (75.2%). The UK historical average (past 9 years) is 74.6%.


# Unemployment

In [34]:
unemp_ids = {
    "dataset_id": 'LMS',
    "series_id" : 'MGSX'
}

unemp = clean_ons_series(unemp_ids['dataset_id'], unemp_ids['series_id'], parse_months, 'months')
unemp['value'] = unemp['value'].astype(float)/100

unemp_url = upload_series(unemp, 'labour_unemployment')
prepare_spec(unemp_url, "labour_unemployment", "Unemployment Rate, 16+, SA, %", parent_path=charts_path, yAxisExpr="format(datum.value, '.0%')")
stats = get_stats(unemp)
description = short_description(stats, "The unemployment rate", val_format=".1%", val_multiplier=1, val_suffix="")
description_url = upload_description(description, "labour_unemployment")
print(description)

The unemployment rate is 3.9%. It is up from Dec 2022 (3.8%). The UK historical average (past 9 years) is 4.6%.


# Inactivity

In [35]:
inac_ids = {
    "dataset_id": 'LMS',
    "series_id" : 'LF2S'
}

inac = clean_ons_series(inac_ids['dataset_id'], inac_ids['series_id'], parse_quarters, 'quarters')
inac['value'] = inac['value'].astype(float)/100
inac_url = upload_series(inac, 'labour_inactivity')

prepare_spec(inac_url, "labour_inactivity", "Inactivity Rate, 16-64, SA, %", parent_path=charts_path, yAxisExpr="format(datum.value, '.0%')")

stats = get_stats(inac)

description = short_description(stats, "The inactivity rate", val_format=".1%", val_multiplier=1, val_suffix="")
description_url = upload_description(description, "labour_inactivity")
print(description)


The inactivity rate is 21.9%. It is up from Oct 2022 (21.7%). The UK historical average (past 9 years) is 21.6%.


# Pay Growth

In [36]:
pay_ids = {
    #AWE: Whole Economy Real Terms Year on Year Three Month Growth (%): Seasonally Adjusted Total Pay
    "dataset_id": 'LMS',
    "series_id" : 'A3WW'
}

pay = clean_ons_series(pay_ids['dataset_id'], pay_ids['series_id'], parse_months, 'months')
pay['value'] = pay['value'].astype(float)/100
pay_url = upload_series(pay, 'labour_pay')
prepare_spec(pay_url, "labour_pay", "Real Pay Growth, 3m yoy, SA, %", parent_path=charts_path, yAxisExpr="format(datum.value, '.0%')")

stats = get_stats(pay)
description = short_description(stats, "Real pay growth", val_format=".1%", val_multiplier=1, val_suffix="") 
description_url = upload_description(description, "labour_pay")
print(description)


Real pay growth is 1.4%. It is up from Jan 2023 (-2.8%). The UK historical average (past 10 years) is 0.8%.


# Productivity

In [37]:
prod_ids = {
    #UK Whole Economy: Output per hour worked SA: Index 2019 = 100
    "dataset_id": 'PRDY',
    "series_id" : 'LZVB'
}

prod = clean_ons_series(prod_ids['dataset_id'], prod_ids['series_id'], parse_quarters, 'quarters')
prod['value'] = prod['value'].astype(float)
prod_url = upload_series(prod, 'labour_prod')
prepare_spec(prod_url, "labour_prod", "Labour Productivity, Index 2019=100", parent_path=charts_path)

stats = get_stats(prod)
description = short_description(stats, "The productivity index", val_format=".0f", val_multiplier=1)
description_url = upload_description(description, "labour_prod")
print(description)




The productivity index is 103. It is up from Jul 2022 (102). The UK historical average (past 10 years) is 100.


In [38]:
prod_url

'https://eco-temp-cache.s3.eu-west-2.amazonaws.com/gd/labour_prod.csv'

In [39]:

df = date_parser(df)
df = df[['date', 'value']]
df = df.dropna()



NameError: name 'date_parser' is not defined