In [101]:
import pandas as pd
import numpy as np

import sys
import eco_style
import json

import altair as alt
import importlib
importlib.reload(eco_style)

alt.themes.enable('report')

ThemeRegistry.enable('report')

# Investment by Sector

In [102]:
df = pd.read_csv("data/GFCF_raw.csv")
['STRUCTURE', 'STRUCTURE_ID', 'STRUCTURE_NAME', 'ACTION', 'LOCATION',
       'Country', 'INDICATOR', 'Indicator', 'SUBJECT', 'Subject', 'MEASURE',
       'Measure', 'FREQUENCY', 'Frequency', 'TIME_PERIOD', 'Time', 'OBS_VALUE',
       'Observation Value', 'OBS_STATUS', 'Observation Status', 'UNIT_MEASURE',
       'Unit of Measures', 'UNIT_MULT', 'Multiplier', 'BASE_PER',
       'Base reference period']

df = df.rename(columns={"OBS_VALUE": "Value", "Time" : "unused", "TIME_PERIOD": "Time"})

df = df[['LOCATION', 'Country', 'Time',  'Indicator', 'Value', 'MEASURE', 'SUBJECT']]

g7_iso3 = ['CAN', 'FRA', 'DEU', 'ITA', 'JPN', 'GBR', 'USA', 'ESP']

df = df[df.LOCATION.isin(g7_iso3)]
#df = df[df.Time <= 2021]
df['Value'] = df['Value'].astype(float)/100

# first get GFCF as a share of GDP
gdp_df = df.query("Indicator == 'Gross domestic product (GDP)' & MEASURE == 'MLN_USD'")[['LOCATION', 'Country', 'Time', 'Value']].rename(columns={'Value': 'GDP'})
gfcf_df = df.query("Indicator == 'Investment (GFCF)' & MEASURE == 'MLN_USD'")
gfcf_df = pd.merge(gfcf_df, gdp_df, on=['LOCATION', 'Country', 'Time'])
gfcf_df['GFCF_GDP_Share'] = gfcf_df['Value'] / gfcf_df['GDP']

# then get corporate GFCF as a share of total GFCF
corp_df = df.query("Indicator == 'Investment by sector' & SUBJECT == 'CORP'")
corp_df = corp_df.rename(columns={'Value': 'Corp_GFCF_Share'})
corp_df = pd.merge(corp_df, gfcf_df, on=['LOCATION', 'Country', 'Time'])
corp_df['Value'] = corp_df['Corp_GFCF_Share'] * corp_df['GFCF_GDP_Share'] # Corporate GFCF as a share of GDP
corp_df = corp_df[['LOCATION', 'Country', 'Time', 'Value']]

## get the 90% and 10% percentile
corp_df['low'] = corp_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(0))
corp_df['high'] = corp_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(1))


# then get government
gov_df = df.query("Indicator == 'Investment by sector' & SUBJECT == 'GG'")
gov_df = gov_df.rename(columns={'Value': 'Gov_GFCF_Share'})
gov_df = pd.merge(gov_df, gfcf_df, on=['LOCATION', 'Country', 'Time'])
gov_df['Value'] = gov_df['Gov_GFCF_Share'] * gov_df['GFCF_GDP_Share'] # Corporate GFCF as a share of GDP
gov_df = gov_df[['LOCATION', 'Country', 'Time', 'Value']]

# get the 90% and 10% percentile
gov_df['low'] = gov_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(0))
gov_df['high'] = gov_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(1))

# then get household
household_df = df.query("Indicator == 'Investment by sector' & SUBJECT == 'HH'")
household_df = household_df.rename(columns={'Value': 'Household_GFCF_Share'})
household_df = pd.merge(household_df, gfcf_df, on=['LOCATION', 'Country', 'Time'])
household_df['Value'] = household_df['Household_GFCF_Share'] * household_df['GFCF_GDP_Share'] # Corporate GFCF as a share of GDP
household_df = household_df[['LOCATION', 'Country', 'Time', 'Value']]

# get the 90% and 10% percentile
household_df['low'] = household_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(0))
household_df['high'] = household_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(1))

# and a total df
total_df = gfcf_df.drop(columns=['Value']).rename(columns={'GFCF_GDP_Share': 'Value'})
total_df = total_df[['LOCATION', 'Country', 'Time', 'Value']]

# get the 90% and 10% percentile
total_df['low'] = total_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(0))
total_df['high'] = total_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(1))


# Investment by Sector vs Peers

In [109]:
df = corp_df.copy()

dx = [
    {
        "Italy": -3,
        "United States":-10,
        "Canada": -5,
        "United Kingdom": 7,
        "Spain": -3,
    },
    {
        "Canada": -5,
        "Spain": -6,
        "United States": -5,
        "Germany": 5
    },{
        "Germany": 5,
        "France": 5,
        "United States": -2,
        "Spain": -2
    },
    {
        "Japan": -2,
        "Canada": -5,
        "Germany": -5,
        "United States": 7,
        "Spain": 5,
        "Italy": 5
    }
]

titles = ["corp", "gov", "household", "total"]
for i, df in enumerate([corp_df, gov_df, household_df, total_df]):
    df['label'] = np.where(df.Time == df.groupby('Country')[
                        'Time'].transform('max'), df.Country, '')
    df['order'] = np.where(df.Country == 'United Kingdom', 1,0)
    df['Time'] = pd.to_datetime(df.Time, format="%Y")
    df = df.sort_values(by="order", ascending=True)

    base = alt.Chart(df).encode(
        x=alt.X("Time:T", title=""),
        y=alt.Y("Value:Q", title="",axis=alt.Axis(format="%")),
        color=alt.Color("Country:N", scale={
            "domain": ["United Kingdom", "France", "Italy", "Spain", "Germany", "Japan", "Canada", "United States"],
            "range": ["#001f3f", "#4269d0", "#efb118", "#ff725c", "#6cc5b0", "#3ca951", "#ff8ab7", "#a463f2", "#97bbf5", "#9c6b4e"]
        }, legend=None)
    )

    line = base.mark_line().encode(
        strokeDash=alt.condition(alt.datum.LOCATION == "GBR", alt.value([0,0]), alt.value([5,2])),
        size=alt.condition(alt.datum.LOCATION == "GBR",
                        alt.value(2), alt.value(1.5)),
    )

    labels = base.mark_text(
        align='left',
        dx=5,
        dy=alt.expr(f"{json.dumps(dx[i])}[datum.Country]")
    ).encode(
        text=alt.Text('label:N'),
        x=alt.value(450),
        size=alt.condition(alt.datum.LOCATION == "GBR", alt.value(14), alt.value(12)),
    )

    print(titles[i])
    chart = (line + labels).properties(
        width=450,
        height=300
    )
    chart.display()
    chart.save(f"/Users/finn/Documents/GitHub/GrowthDiagnostics/report_figures/charts/gfcf_{titles[i]}.json")
    chart.save(f"/Users/finn/Documents/GitHub/GrowthDiagnostics/report_figures/charts/gfcf_{titles[i]}.png", scale_factor=3)

corp


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


gov


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


household


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


total


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [65]:
df

Unnamed: 0,LOCATION,Country,Time,Value,low,high,label,order
0,FRA,France,1990-01-01,0.120354,0.104003,0.120354,,1
1,CAN,Canada,1990-01-01,0.104003,0.104003,0.120354,,1
2,USA,United States,1992-01-01,0.097791,0.089180,0.115718,,1
3,CAN,Canada,1991-01-01,0.098176,0.098176,0.121987,,1
4,USA,United States,1993-01-01,0.099762,0.087399,0.106227,,1
...,...,...,...,...,...,...,...,...
205,ITA,Italy,2014-01-01,0.090859,0.090859,0.166137,,1
206,JPN,Japan,2014-01-01,0.166137,0.090859,0.166137,,1
207,FRA,France,2021-01-01,0.142732,0.101384,0.172112,,1
208,DEU,Germany,2020-01-01,0.122562,0.100204,0.171512,,1


In [51]:
f"{json.dumps(dx)}[datum.Country]"

'[{"United States": -10, "Canada": -5, "United Kingdom": 5}][datum.Country]'

In [None]:
for i, df in enumerate([corp_df, gov_df, household_df, total_df]):
