In [42]:
import pandas as pd 
import numpy as np
import requests
import boto3
import os
import json

charts_path = "/Users/finn/Documents/GitHub/GrowthDiagnostics/charts"

In [36]:
def parse_quarters(df):
    df['quarter'] = pd.to_numeric(df.quarter.str.replace('Q', ''))
    df['month'] = (df.quarter-1)*3+1
    df['date'] = pd.to_datetime(df.year.astype(str) + '-' + df.month.astype(str) + '-01')
    return df

def parse_years(df):
    df['date'] = pd.to_datetime(df.year.astype(str) + '-01-01')
    return df

def clean_ons_series(dataset_id, series_id, date_parser, key):
    url = f'https://api.ons.gov.uk/timeseries/{series_id}/dataset/{dataset_id}/data'
    req = requests.get(url)
    data = req.json()
    data = data[key]
    df = pd.DataFrame(data)
    df = date_parser(df)
    df = df[['date', 'value']]
    df = df.dropna()
    return df

In [125]:
def upload_series(df, name, bucket="eco-temp-cache"):
    csv = df.to_csv(index=False, date_format='%Y-%m-%d')
    s3 = boto3.client('s3')
    res = s3.put_object(Bucket=bucket, Key=f"gd/{name}.csv", Body=csv)
    return f"https://eco-temp-cache.s3.eu-west-2.amazonaws.com/gd/{name}.csv"

def upload_description(description, name, bucket="eco-temp-cache"):
    s3 = boto3.client('s3')
    res = s3.put_object(Bucket=bucket, Key=f"gd/descriptions/{name}.txt", Body=json.dumps(description))
    return f"https://eco-temp-cache.s3.eu-west-2.amazonaws.com/gd/descriptions/{name}.txt"


In [48]:
def prepare_spec(data_url, path, title, filter=None, parent_path="charts"):
    spec = {
        "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
        "title": {"text": title},
        "transform": [{"filter": "datum.value>0"}],
        "data": {
            "url": data_url,
        },
        "mark": {"type": "line"},
        "encoding": {
            "x": {
                "field": "date",
                "type": "temporal",
                "axis": {"title": "", "titleColor": "#122B39"}
            },
            "y": {
                "field": "value",
                "type": "quantitative",
                "axis": {"title": "", "titleColor": "#122B39"}
            }
        }
    }
    if filter:
        spec['transform'].append({"filter": filter})
    with open(f"{parent_path}/{path}.json", "w") as f:
        f.write(json.dumps(spec))

In [187]:
def get_stats(df):
    # get the most recent value and it's date
    latest = df.iloc[-1]
    latest_date = latest.date
    latest_value = latest.value
    # get the as close to 1 year ago as possible
    year_ago = df[df.date<=df.date.max()-pd.DateOffset(years=1)].iloc[-1]
    year_ago_date = year_ago.date
    year_ago_value = year_ago.value
    # get the average for the last 10 years (or as many as we have)
    avg = df[df.date>df.date.max()-pd.DateOffset(years=10)].value.mean()
    avg_year_count = df.iloc[-1].date.year-df[df.date>df.date.max()-pd.DateOffset(years=10)].iloc[0].date.year
    return {
        'latest': {'date': latest_date, 'value': latest_value},
        'latest_date': latest_date,
        'year_ago': {'date': year_ago_date, 'value': year_ago_value},
        'year_ago_date': year_ago_date,
        'avg': avg,
        'avg_year_count': avg_year_count
    }

def human_change(val1, val2, date1, date2, date_format="%b %Y", val_format=".1%", change_format=".3f", change_multiplier=1, change_units=""):
    print(f"val1: {val1}, val2: {val2}, date1: {date1}, date2: {date2}")
    if np.round(val1, 3) == np.round(val2, 3):
        return f"unchanged from {date2.strftime(date_format)}"
    if val1 > val2:
        return f"up {(val1-val2)*change_multiplier:{change_format}}{change_units} from {date2.strftime(date_format)} when it was {val2:{val_format}}"
    if val1 < val2:
        return f"down {(val1-val2)*change_multiplier:{change_format}}{change_units} from {date2.strftime(date_format)} when it was {val2:{val_format}}"  

def temporal_description(data, name="it", preamble="", val_format=".1%", change_format=".3f", change_multiplier=1, change_units="", date_format="%b %Y"):
    if len(preamble)>0 and preamble[-1]!=" ":
        preamble += " "
    stats = get_stats(data)
    return f"{preamble}In {stats['latest']['date'].strftime('%B %Y')}, {name} was {stats['latest']['value']:.1%}. This is {human_change(stats['latest']['value'], stats['year_ago']['value'], stats['latest']['date'], stats['year_ago']['date'], date_format, val_format, change_format, change_multiplier, change_units)}. The average for the last {stats['avg_year_count']} years is {stats['avg']:.1%}."
    

# GDP Data

In [182]:
gdp_cvm_sa_ids = {
    # Gross Domestic Product: chained volume measures: Seasonally adjusted £m
    # from Quarterly National Accounts
    "dataset_id" : "qna",
    "series_id" : "ABMI",
}

gdp_cvm_sa = clean_ons_series(**gdp_cvm_sa_ids, date_parser=parse_quarters, key='quarters')

gdp_sa_cp_ids = {
    # Gross Domestic Product: chained volume measures: Seasonally adjusted £m
    # from Quarterly National Accounts
    "dataset_id" : "qna",
    "series_id" : "YBHA",
}

gdp_sa_cp = clean_ons_series(**gdp_sa_cp_ids, date_parser=parse_quarters, key='quarters')

# GFCF (% of GDP)

In [183]:
# Then get GFCF data
gfcf_ids = {
    # Total Gross Fixed Capital Formation CVM SA £m
    # from Business investment time series (CXNV)
    "dataset_id": 'cxnv',
    "series_id" : 'npqt'
}

gfcf = clean_ons_series(gfcf_ids['dataset_id'], gfcf_ids['series_id'], parse_quarters, 'quarters')

In [174]:
gfcf_gdp_ratio = gfcf.merge(gdp_cvm_sa, on='date', suffixes=('_gfcf', '_gdp'))
# force numerics
gfcf_gdp_ratio['value_gfcf'] = pd.to_numeric(gfcf_gdp_ratio.value_gfcf, errors='coerce')
gfcf_gdp_ratio['value_gdp'] = pd.to_numeric(gfcf_gdp_ratio.value_gdp, errors='coerce')
gfcf_gdp_ratio = gfcf_gdp_ratio.dropna()
gfcf_gdp_ratio['value'] = gfcf_gdp_ratio.value_gfcf / gfcf_gdp_ratio.value_gdp
gfcf_gdp_ratio = gfcf_gdp_ratio[['date', 'value']]
gfcf_gdp_url = upload_series(gfcf_gdp_ratio, 'gfcf_gdp_ratio')

# prepare the spec
prepare_spec(gfcf_gdp_url, "investment_gfcf_gdp", "GFCF, % of GDP, SA", parent_path=charts_path)

In [189]:
stats = get_stats(gfcf_gdp_ratio)
description = temporal_description(gfcf_gdp_ratio, 
                                   "the ratio of Gross Fixed Capital Formation to GDP", 
                                   "Gross Fixed Capital Formation as a percentage of GDP measures the proportion of GDP that is invested in fixed assets and is a primary indicator of investment.", 
                                   val_format=".1%",
                                   change_format=".2f",
                                   change_multiplier=100,
                                   change_units="pp")
description_url = upload_description(description, "investment_gfcf_gdp")
print(description)

val1: 0.18798565083583355, val2: 0.18695380169003165, date1: 2023-07-01 00:00:00, date2: 2022-07-01 00:00:00
Gross Fixed Capital Formation as a percentage of GDP measures the proportion of GDP that is invested in fixed assets and is a primary indicator of investment. In July 2023, the ratio of Gross Fixed Capital Formation to GDP was 18.8%. This is up 0.10pp from Jul 2022 when it was 18.7%. The average for the last 10 years is 18.1%.


# Gov GFCF (% of GDP)

In [176]:
central_gov_gfcf_ids = {
    # Central Government: Total gross fixed capital formation CP SA £m
    # UK Economic Accounts time series (UKEA)
    "dataset_id": 'ukea',
    "series_id" : 'RNCZ'
}

central_gov_gfcf = clean_ons_series(central_gov_gfcf_ids['dataset_id'], central_gov_gfcf_ids['series_id'], parse_quarters, 'quarters')
gov_gfcf_gdp_ratio = central_gov_gfcf.merge(gdp_sa_cp, on='date', suffixes=('_gfcf', '_gdp'))
# force numerics
gov_gfcf_gdp_ratio['value_gfcf'] = pd.to_numeric(gov_gfcf_gdp_ratio.value_gfcf, errors='coerce')
gov_gfcf_gdp_ratio['value_gdp'] = pd.to_numeric(gov_gfcf_gdp_ratio.value_gdp, errors='coerce')
gov_gfcf_gdp_ratio = gov_gfcf_gdp_ratio.dropna()
gov_gfcf_gdp_ratio['value'] = gov_gfcf_gdp_ratio.value_gfcf / gov_gfcf_gdp_ratio.value_gdp
gov_gfcf_gdp_ratio = gov_gfcf_gdp_ratio[['date', 'value']]
gov_gfcf_gdp_url = upload_series(gov_gfcf_gdp_ratio, 'gov_gfcf_gdp_ratio')

# prepare the spec
prepare_spec(gov_gfcf_gdp_url, "investment_gov_gfcf_gdp", "Central Government GFCF, % of GDP, SA", 
             parent_path=charts_path)


In [191]:
stats = get_stats(gov_gfcf_gdp_ratio)
description = temporal_description(gov_gfcf_gdp_ratio,
                                   "the ratio of Central Government Gross Fixed Capital Formation to GDP",
                                   "",
                                   val_format=".1%",
                                   change_format=".2f",
                                   change_multiplier=100,
                                   change_units="pp")
# upload_description(description, "investment_gov_gfcf_gdp")
print(description)

val1: 0.02404393436115376, val2: 0.02196725211116838, date1: 2023-07-01 00:00:00, date2: 2022-07-01 00:00:00
In July 2023, the ratio of Central Government Gross Fixed Capital Formation to GDP was 2.4%. This is up 0.21pp from Jul 2022 when it was 2.2%. The average for the last 10 years is 1.9%.


# Business GFCF (% of GDP)

In [61]:
bus_gfcf_ids = {
    # Business Investment: CP SA: £m
    # UK Economic Accounts time series (UKEA)
    "dataset_id": 'ukea',
    "series_id" : 'npek'
}

bus_gfcf = clean_ons_series(bus_gfcf_ids['dataset_id'], bus_gfcf_ids['series_id'], parse_quarters, 'quarters')
bus_gfcf_gdp_ratio = bus_gfcf.merge(gdp_sa_cp, on='date', suffixes=('_gfcf', '_gdp'))
# force numerics
bus_gfcf_gdp_ratio['value_gfcf'] = pd.to_numeric(bus_gfcf_gdp_ratio.value_gfcf, errors='coerce')
bus_gfcf_gdp_ratio['value_gdp'] = pd.to_numeric(bus_gfcf_gdp_ratio.value_gdp, errors='coerce')
bus_gfcf_gdp_ratio = bus_gfcf_gdp_ratio.dropna()
bus_gfcf_gdp_ratio['value'] = bus_gfcf_gdp_ratio.value_gfcf / bus_gfcf_gdp_ratio.value_gdp
bus_gfcf_gdp_ratio = bus_gfcf_gdp_ratio[['date', 'value']]
bus_gfcf_gdp_url = upload_series(bus_gfcf_gdp_ratio, 'bus_gfcf_gdp_ratio')

# prepare the spec
prepare_spec(bus_gfcf_gdp_url, "investment_bus_gfcf_gdp", "Business GFCF, % of GDP, SA", parent_path=charts_path)

In [192]:
stats = get_stats(bus_gfcf_gdp_ratio)
description = temporal_description(bus_gfcf_gdp_ratio, "the ratio of Business Gross Fixed Capital Formation to GDP", 
                                   "",
                                                                      val_format=".1%",
                                   change_format=".2f",
                                   change_multiplier=100,
                                   change_units="pp")

upload_description(description, "investment_bus_gfcf_gdp")
print(description)

val1: 0.09603709260307879, val2: 0.09911271458589382, date1: 2023-07-01 00:00:00, date2: 2022-07-01 00:00:00
In July 2023, the ratio of Business Gross Fixed Capital Formation to GDP was 9.6%. This is down -0.31pp from Jul 2022 when it was 9.9%. The average for the last 10 years is 10.0%.


# R&D (% of GDP)

In [201]:
rd_gdp_ids = {
    # Gross Expenditure on R&D (GERD): Total as a percentage of GDP
    # from GERD
    "dataset_id": 'gerd',
    "series_id" : 'GLBH'
}

rd_gdp = clean_ons_series(rd_gdp_ids['dataset_id'], rd_gdp_ids['series_id'], parse_years, 'years')
rd_gdp['value'] = pd.to_numeric(rd_gdp.value, errors='coerce')/100
rd_gdp_url = upload_series(rd_gdp, 'rd_gdp')


# prepare the spec
prepare_spec(rd_gdp_url, "investment_rd_gdp", "R&D, % of GDP", parent_path=charts_path)


In [202]:
stats = get_stats(rd_gdp)
description = temporal_description(rd_gdp, "the ratio of Gross Expenditure on R&D to GDP", 
                                   "", 
                                   val_format=".1%",
                                   change_format=".2f",
                                   change_multiplier=100,
                                   change_units="pp")
upload_description(description, "investment_rd_gdp")
print(description)

val1: 0.0174, val2: 0.0172, date1: 2019-01-01 00:00:00, date2: 2018-01-01 00:00:00
In January 2019, the ratio of Gross Expenditure on R&D to GDP was 1.7%. This is unchanged from Jan 2018. The average for the last 9 years is 1.6%.


In [22]:
gfcf_ids.keys()

dict_keys(['\n    Total Gross Fixed Capital Formation CVM SA £m\n    from Business investment time series (CXNV)\n    dataset_id', 'series_id'])

In [19]:
gfcf

Unnamed: 0,date,value
0,1955-01-01,14969
1,1955-04-01,15324
2,1955-07-01,15861
3,1955-10-01,16205
4,1956-01-01,16461
...,...,...
271,2022-10-01,106689
272,2023-01-01,109400
273,2023-04-01,108454
274,2023-07-01,106955
