In [1]:
import pandas as pd
import numpy as np

import sys
import eco_style

import altair as alt
import importlib
importlib.reload(eco_style)

alt.themes.enable('report')

ThemeRegistry.enable('report')

# Investment by Sector

gfcf from: https://www.oecd-ilibrary.org/economics/investment-by-sector/indicator/english_abd72f11-en

gdp from: https://stats.oecd.org/Index.aspx?DataSetCode=NAAG

In [2]:
df = pd.read_csv("data/GFCF_raw.csv")
['STRUCTURE', 'STRUCTURE_ID', 'STRUCTURE_NAME', 'ACTION', 'LOCATION',
       'Country', 'INDICATOR', 'Indicator', 'SUBJECT', 'Subject', 'MEASURE',
       'Measure', 'FREQUENCY', 'Frequency', 'TIME_PERIOD', 'Time', 'OBS_VALUE',
       'Observation Value', 'OBS_STATUS', 'Observation Status', 'UNIT_MEASURE',
       'Unit of Measures', 'UNIT_MULT', 'Multiplier', 'BASE_PER',
       'Base reference period']

df = df.rename(columns={"OBS_VALUE": "Value", "Time" : "unused", "TIME_PERIOD": "Time"})

df = df[['LOCATION', 'Country', 'Time',  'Indicator', 'Value']]

g7_iso3 = ['CAN', 'FRA', 'DEU', 'ITA', 'JPN', 'GBR', 'USA']

df = df[df.LOCATION.isin(g7_iso3)]
#df = df[df.Time <= 2021]
df['Value'] = df['Value'].astype(float)/100

# first get GFCF as a share of GDP
gfcc_df = df.query("Indicator == 'Gross fixed capital formation, percentage of GDP'")
gfcc_df = gfcc_df.rename(columns={'Value': 'GFCF_GDP_Share'})

# then get corporate GFCF as a share of total GFCF
corp_df = df.query("Indicator == 'Gross fixed capital formation, Corporations, percentage of total GFCF'")
corp_df = corp_df.rename(columns={'Value': 'Corp_GFCF_Share'})
corp_df = pd.merge(corp_df, gfcc_df, on=['LOCATION', 'Country', 'Time'])
corp_df['Value'] = corp_df['Corp_GFCF_Share'] * corp_df['GFCF_GDP_Share'] # Corporate GFCF as a share of GDP
corp_df = corp_df[['LOCATION', 'Country', 'Time', 'Value']]

## get the 90% and 10% percentile
corp_df['low'] = corp_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(0.1))
corp_df['high'] = corp_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(0.9))

# then get government
gov_df = df.query("Indicator == 'Gross fixed capital formation, General government, percentage of total GFCF'")
gov_df = gov_df.rename(columns={'Value': 'Gov_GFCF_Share'})
gov_df = pd.merge(gov_df, gfcc_df, on=['LOCATION', 'Country', 'Time'])
gov_df['Value'] = gov_df['Gov_GFCF_Share'] * gov_df['GFCF_GDP_Share'] # Corporate GFCF as a share of GDP
gov_df = gov_df[['LOCATION', 'Country', 'Time', 'Value']]

# get the 90% and 10% percentile
gov_df['low'] = gov_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(0.1))
gov_df['high'] = gov_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(0.9))

# then get household
household_df = df.query("Indicator == 'Gross fixed capital formation, Households and NPISHs, percentage of total GFCF'")
household_df = household_df.rename(columns={'Value': 'Household_GFCF_Share'})
household_df = pd.merge(household_df, gfcc_df, on=['LOCATION', 'Country', 'Time'])
household_df['Value'] = household_df['Household_GFCF_Share'] * household_df['GFCF_GDP_Share'] # Corporate GFCF as a share of GDP
household_df = household_df[['LOCATION', 'Country', 'Time', 'Value']]

# get the 90% and 10% percentile
household_df['low'] = household_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(0.1))
household_df['high'] = household_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(0.9))

# and a total df
total_df = df.query("Indicator == 'Gross fixed capital formation, percentage of GDP'")
total_df = total_df[['LOCATION', 'Country', 'Time', 'Value']]

# get the 90% and 10% percentile
total_df['low'] = total_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(0.1))
total_df['high'] = total_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(0.9))

# corp_df.to_csv("data/institutional_shares/corporate_gfcf_pct_gdp.csv", index=False)
# gov_df.to_csv("data/institutional_shares/government_gfcf_pct_gdp.csv", index=False)
# household_df.to_csv("data/institutional_shares/household_gfcf_pct_gdp.csv", index=False)
# total_df.to_csv("data/institutional_shares/total_gfcf_pct_gdp.csv", index=False)

In [3]:
df = pd.read_csv("data/GFCF_raw.csv")
['STRUCTURE', 'STRUCTURE_ID', 'STRUCTURE_NAME', 'ACTION', 'LOCATION',
       'Country', 'INDICATOR', 'Indicator', 'SUBJECT', 'Subject', 'MEASURE',
       'Measure', 'FREQUENCY', 'Frequency', 'TIME_PERIOD', 'Time', 'OBS_VALUE',
       'Observation Value', 'OBS_STATUS', 'Observation Status', 'UNIT_MEASURE',
       'Unit of Measures', 'UNIT_MULT', 'Multiplier', 'BASE_PER',
       'Base reference period']

df = df.rename(columns={"OBS_VALUE": "Value", "Time" : "unused", "TIME_PERIOD": "Time"})

df = df[['LOCATION', 'Country', 'Time',  'Indicator', 'Value']]

g7_iso3 = ['CAN', 'FRA', 'DEU', 'ITA', 'JPN', 'GBR', 'USA']

df = df[df.LOCATION.isin(g7_iso3)]

df

Unnamed: 0,LOCATION,Country,Time,Indicator,Value
0,FRA,France,1990,Investment by sector,51.471705
7,GBR,United Kingdom,1990,Gross domestic product (GDP),976326.049000
9,CAN,Canada,1990,Gross domestic product (GDP),560184.877000
10,CAN,Canada,1990,Investment by sector,47.917274
11,CAN,Canada,1990,Investment by sector,34.776359
...,...,...,...,...,...
7981,GBR,United Kingdom,2001,Investment (GFCF),1.987057
7983,GBR,United Kingdom,2020,Investment (GFCF),-10.768583
7984,GBR,United Kingdom,2012,Investment (GFCF),1.594215
7985,GBR,United Kingdom,2004,Investment (GFCF),2.660605


In [4]:


df = df[df.LOCATION.isin(g7_iso3)]
df = df[df.Time <= 2021]
df['Value'] = df['Value'].astype(float)/100

# first get GFCF as a share of GDP
gfcc_df = df.query("Indicator == 'Gross fixed capital formation, percentage of GDP'")
gfcc_df = gfcc_df.rename(columns={'Value': 'GFCF_GDP_Share'})
gfcc_df

Unnamed: 0,LOCATION,Country,Time,Indicator,GFCF_GDP_Share


In [5]:
df = pd.read_csv("data/GFCF_raw.csv")
['STRUCTURE', 'STRUCTURE_ID', 'STRUCTURE_NAME', 'ACTION', 'LOCATION',
       'Country', 'INDICATOR', 'Indicator', 'SUBJECT', 'Subject', 'MEASURE',
       'Measure', 'FREQUENCY', 'Frequency', 'TIME_PERIOD', 'Time', 'OBS_VALUE',
       'Observation Value', 'OBS_STATUS', 'Observation Status', 'UNIT_MEASURE',
       'Unit of Measures', 'UNIT_MULT', 'Multiplier', 'BASE_PER',
       'Base reference period']

df = df.rename(columns={"OBS_VALUE": "Value", "Time" : "unused", "TIME_PERIOD": "Time"})

df = df[['LOCATION', 'Country', 'Time',  'Indicator', 'Value', 'MEASURE', 'SUBJECT']]

g7_iso3 = ['CAN', 'FRA', 'DEU', 'ITA', 'JPN', 'GBR', 'USA']

df = df[df.LOCATION.isin(g7_iso3)]
#df = df[df.Time <= 2021]
df['Value'] = df['Value'].astype(float)/100

# first get GFCF as a share of GDP
gdp_df = df.query("Indicator == 'Gross domestic product (GDP)' & MEASURE == 'MLN_USD'")[['LOCATION', 'Country', 'Time', 'Value']].rename(columns={'Value': 'GDP'})
gfcf_df = df.query("Indicator == 'Investment (GFCF)' & MEASURE == 'MLN_USD'")
gfcf_df = pd.merge(gfcf_df, gdp_df, on=['LOCATION', 'Country', 'Time'])
gfcf_df['GFCF_GDP_Share'] = gfcf_df['Value'] / gfcf_df['GDP']

# then get corporate GFCF as a share of total GFCF
corp_df = df.query("Indicator == 'Investment by sector' & SUBJECT == 'CORP'")
corp_df = corp_df.rename(columns={'Value': 'Corp_GFCF_Share'})
corp_df = pd.merge(corp_df, gfcf_df, on=['LOCATION', 'Country', 'Time'])
corp_df['Value'] = corp_df['Corp_GFCF_Share'] * corp_df['GFCF_GDP_Share'] # Corporate GFCF as a share of GDP
corp_df = corp_df[['LOCATION', 'Country', 'Time', 'Value']]

## get the 90% and 10% percentile
corp_df['low'] = corp_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(0))
corp_df['high'] = corp_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(1))


# then get government
gov_df = df.query("Indicator == 'Investment by sector' & SUBJECT == 'GG'")
gov_df = gov_df.rename(columns={'Value': 'Gov_GFCF_Share'})
gov_df = pd.merge(gov_df, gfcf_df, on=['LOCATION', 'Country', 'Time'])
gov_df['Value'] = gov_df['Gov_GFCF_Share'] * gov_df['GFCF_GDP_Share'] # Corporate GFCF as a share of GDP
gov_df = gov_df[['LOCATION', 'Country', 'Time', 'Value']]

# get the 90% and 10% percentile
gov_df['low'] = gov_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(0))
gov_df['high'] = gov_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(1))

# then get household
household_df = df.query("Indicator == 'Investment by sector' & SUBJECT == 'HH'")
household_df = household_df.rename(columns={'Value': 'Household_GFCF_Share'})
household_df = pd.merge(household_df, gfcf_df, on=['LOCATION', 'Country', 'Time'])
household_df['Value'] = household_df['Household_GFCF_Share'] * household_df['GFCF_GDP_Share'] # Corporate GFCF as a share of GDP
household_df = household_df[['LOCATION', 'Country', 'Time', 'Value']]

# get the 90% and 10% percentile
household_df['low'] = household_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(0))
household_df['high'] = household_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(1))

# and a total df
total_df = gfcf_df.drop(columns=['Value']).rename(columns={'GFCF_GDP_Share': 'Value'})
total_df = total_df[['LOCATION', 'Country', 'Time', 'Value']]

# get the 90% and 10% percentile
total_df['low'] = total_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(0))
total_df['high'] = total_df.groupby(by=["Time"])['Value'].transform(lambda x: x.quantile(1))


### G7 Stats

In [6]:
g7 = ['CAN', 'FRA', 'DEU', 'ITA', 'JPN', 'GBR', 'USA']


g7_total_median = total_df[total_df.LOCATION.isin(g7) & (total_df.Time == 2021)]['Value'].median()
g7_corp_median = corp_df[corp_df.LOCATION.isin(g7) & (corp_df.Time == 2021)]['Value'].median()
g7_gov_median = gov_df[gov_df.LOCATION.isin(g7) & (gov_df.Time == 2021)]['Value'].median()
g7_household_median = household_df[household_df.LOCATION.isin(g7) & (household_df.Time == 2021)]['Value'].median()

oecd_total_median = total_df[total_df.LOCATION.isin(eco_style.OECD_iso3) & (total_df.Time == 2021)]['Value'].median()
oecd_corp_median = corp_df[corp_df.LOCATION.isin(eco_style.OECD_iso3) & (corp_df.Time == 2021)]['Value'].median()
oecd_gov_median = gov_df[gov_df.LOCATION.isin(eco_style.OECD_iso3) & (gov_df.Time == 2021)]['Value'].median()
oecd_household_median = household_df[household_df.LOCATION.isin(eco_style.OECD_iso3) & (household_df.Time == 2021)]['Value'].median()

print(g7_total_median, g7_corp_median, g7_gov_median, g7_household_median)

# get the gap
uk_gdp = 2198.473 # ONS GDP Tables, CVM GDP 2021, billions of pounds

uk_total = total_df[(total_df.LOCATION == 'GBR') & (total_df.Time == 2021)]['Value'].iloc[0]
uk_corp = corp_df[(corp_df.LOCATION == 'GBR') & (corp_df.Time == 2021)]['Value'].iloc[0]
uk_gov = gov_df[(gov_df.LOCATION == 'GBR') & (gov_df.Time == 2021)]['Value'].iloc[0]
uk_household = household_df[(household_df.LOCATION == 'GBR') & (household_df.Time == 2021)]['Value'].iloc[0]

print("Total")
print(f"    UK: {np.round(uk_total, 2)*100}% of GDP, {np.round(uk_total*uk_gdp, 1)} billion pounds")
print(f"    G7: {np.round(g7_total_median, 2)*100}% of GDP")
print(f"    OECD: {np.round(oecd_total_median, 2)*100}% of GDP")

print("Corp")
print(f"    UK: {np.round(uk_corp, 2)*100}% of GDP, {np.round(uk_corp*uk_gdp, 1)} billion pounds")
print(f"    G7: {np.round(g7_corp_median, 2)*100}% of GDP")
print(f"    OECD: {np.round(oecd_corp_median, 2)*100}% of GDP")

print("Gov")
print(f"    UK: {np.round(uk_gov, 2)*100}% of GDP, {np.round(uk_gov*uk_gdp, 1)} billion pounds")
print(f"    G7: {np.round(g7_gov_median, 2)*100}% of GDP")
print(f"    OECD: {np.round(oecd_gov_median, 2)*100}% of GDP")

print("Household")
print(f"    UK: {np.round(uk_household, 2)*100}% of GDP, {np.round(uk_household*uk_gdp, 0)} billion pounds")
print(f"    G7: {np.round(g7_household_median, 2)*100}% of GDP")
print(f"    OECD: {np.round(oecd_household_median, 2)*100}% of GDP")


print(f"Total Gap: {np.round((uk_total - g7_total_median), 2)}% of GDP, {np.round((uk_total - g7_total_median)*uk_gdp, 0)} billion pounds")
print(f"Corp Gap: {np.round((uk_corp - g7_corp_median), 2)}% of GDP, {np.round((uk_corp - g7_corp_median)*uk_gdp, 0)} billion pounds")
print(f"Gov Gap: {np.round((uk_gov - g7_gov_median), 2)}% of GDP, {np.round((uk_gov - g7_gov_median)*uk_gdp, 0)} billion pounds")
print(f"Household Gap: {np.round((uk_household - g7_household_median), 2)}% of GDP, {np.round((uk_household - g7_household_median)*uk_gdp, 0)} billion pounds")

print("OECD Gaps")
print(f"Total Gap: {np.round((uk_total - oecd_total_median), 2)}% of GDP, {np.round((uk_total - oecd_total_median)*uk_gdp, 0)} billion pounds")
print(f"Corp Gap: {np.round((uk_corp - oecd_corp_median), 2)}% of GDP, {np.round((uk_corp - oecd_corp_median)*uk_gdp, 0)} billion pounds")
print(f"Gov Gap: {np.round((uk_gov - oecd_gov_median), 2)}% of GDP, {np.round((uk_gov - oecd_gov_median)*uk_gdp, 0)} billion pounds")
print(f"Household Gap: {np.round((uk_household - oecd_household_median), 2)}% of GDP, {np.round((uk_household - oecd_household_median)*uk_gdp, 0)} billion pounds")

0.21299451274607414 0.10756140240400064 0.032749216072470895 0.06720065240488526
Total
    UK: 18.0% of GDP, 388.7 billion pounds
    G7: 21.0% of GDP
    OECD: 21.0% of GDP
Corp
    UK: 10.0% of GDP, 222.9 billion pounds
    G7: 11.0% of GDP
    OECD: 11.0% of GDP
Gov
    UK: 3.0% of GDP, 69.0 billion pounds
    G7: 3.0% of GDP
    OECD: 3.0% of GDP
Household
    UK: 4.0% of GDP, 97.0 billion pounds
    G7: 7.000000000000001% of GDP
    OECD: 7.000000000000001% of GDP
Total Gap: -0.04% of GDP, -80.0 billion pounds
Corp Gap: -0.01% of GDP, -14.0 billion pounds
Gov Gap: -0.0% of GDP, -3.0 billion pounds
Household Gap: -0.02% of GDP, -51.0 billion pounds
OECD Gaps
Total Gap: -0.04% of GDP, -80.0 billion pounds
Corp Gap: -0.01% of GDP, -14.0 billion pounds
Gov Gap: -0.0% of GDP, -3.0 billion pounds
Household Gap: -0.02% of GDP, -51.0 billion pounds


In [7]:
# get median row of the corp_df
corp_df = corp_df[corp_df.LOCATION.isin(eco_style.OECD_iso3)]
corp_df = corp_df[corp_df.Time == 2021]
corp_df = corp_df.sort_values(by=['Value'])
corp_df = corp_df.reset_index(drop=True)
corp_df

Unnamed: 0,LOCATION,Country,Time,Value,low,high
0,GBR,United Kingdom,2021,0.101384,0.101384,0.172112
1,CAN,Canada,2021,0.106302,0.101384,0.172112
2,ITA,Italy,2021,0.106632,0.101384,0.172112
3,USA,United States,2021,0.107561,0.101384,0.172112
4,DEU,Germany,2021,0.120107,0.101384,0.172112
5,FRA,France,2021,0.142732,0.101384,0.172112
6,JPN,Japan,2021,0.172112,0.101384,0.172112


### R&D for Table

In [8]:
df = pd.read_csv("/Users/finn/Documents/GitHub/Growth/R&D/data/rd_gdp.csv")
df = df.query("TIME == 2021")
df.Value = df.Value.astype(float)/100
df = df[df.LOCATION.isin(eco_style.OECD_iso3)]
oecd_median = df.Value.median()
gbr_val = df[df.LOCATION == 'GBR'].Value.iloc[0]

print(f"R&D Gap: {np.round((gbr_val - oecd_median), 2)}% of GDP, {np.round((gbr_val - oecd_median)*uk_gdp, 0)} billion pounds")

R&D Gap: 0.01% of GDP, 21.0 billion pounds


In [9]:
uk_gdp*0.02914

64.06350322

In [10]:
uk_total

0.17680955853581726

### Chart: Panel

In [16]:
importlib.reload(eco_style)

label_dy = [
    {
    "GBR": 2,  
    },
    {
        "GBR": 5
    },
    {
        "FRA": 7
    },
    {
        "FRA": 0,
        "DEU": -10,
        "POL" : 5
    }
]

for i, df in enumerate([corp_df, gov_df, household_df, total_df]):
    if i!=3 or i==0:
        df = df.copy()
        df = df[df.Time <= 2021]
        df = df[(df.Value <= (df.high + 0.01)) | df.LOCATION.isin(["GBR", "FRA", "USA", "DEU"])]
        df = df[(df.Value >= (df.low - 0.01)) | df.LOCATION.isin(["GBR", "FRA", "USA", "DEU", "POL"])]
    else:
        df = df.copy()
        df = df[df.Time <= 2021]
        df = df[(df.Value <= (df.high + 0.01)) | df.LOCATION.isin(["GBR", "FRA", "USA", "DEU"])]
        df = df[(df.Value >= (df.low - 0.01)) | df.LOCATION.isin(["GBR", "FRA", "USA", "DEU", "POL"])]

    other_1 = "POL" if i==3 or i==0 else ""
    base = alt.Chart(df).encode(
        x=alt.X('Time:Q', axis=alt.Axis(format="d"), scale=alt.Scale(domainMax= 2021)),
        y=alt.Y('Value:Q', axis=alt.Axis(format="%"), scale=alt.Scale(zero=True)),
        color=alt.Color('LOCATION:N', legend=None, scale=alt.Scale()),
        opacity=alt.condition(alt.datum.LOCATION == "GBR", alt.value(1), alt.value(0.8)),
    ).properties()

    line = base.mark_line(clip=True).encode(
            size=alt.condition(alt.datum.LOCATION == "GBR", alt.value(3), alt.value(1.5)),
    )


    end_base = base.transform_filter(
        f"datum.Time == 2021"
    )

    end_circles = end_base.mark_circle()

    print(f"{label_dy[i]}[datum.LOCATION]")
    
    end_labels = end_base.mark_text().encode(
        text=alt.Text('Country:N'),
    ).encode(
        x=alt.value(401)
    )

    # add an area chart for the highest and lowest values
    area = base.mark_area().encode(
        y=alt.Y('low:Q'),
        y2=alt.Y2('high:Q'),
        color=alt.value(eco_style.pallete["shadow"]),
    )

    chart = area + line + end_circles + end_labels

    chart = chart.configure_view(
        strokeWidth=0
    )

    type = ["Corporate", "Government", "Household", "Total"][i]
    print(type)
    chart.display()

    chart.save(f"json/{type}_investment_panel.json")
    chart.save(f"png/{type}_investment_panel.png", scale_factor=3)
    chart.save(f"svg/{type}_investment_panel.svg")


{'GBR': 2}[datum.LOCATION]
Corporate


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


{'GBR': 5}[datum.LOCATION]
Government


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


{'FRA': 7}[datum.LOCATION]
Household


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


{'FRA': 0, 'DEU': -10, 'POL': 5}[datum.LOCATION]
Total


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


# Full Charts

In [13]:
corp_df.LOCATION.unique()

array(['FRA', 'CAN', 'USA', 'JPN', 'GBR', 'DEU', 'ITA'], dtype=object)

In [14]:
temp = corp_df.copy()
# sort GBR into the first position
temp['is_gbr'] = temp.LOCATION == "GBR"
temp = temp.sort_values(by='is_gbr', ascending=True)

# add an end label to the final value for each country
temp['label'] = np.where(temp.Time == temp.groupby(by="LOCATION")['Time'].transform('max'), temp.Country, "")

base = alt.Chart(temp).encode(
    x=alt.X('Time:Q', axis=eco_style.x_axis({'format': 'd'}), scale=alt.Scale(domainMax= 2022)),
    y=alt.Y('Value:Q', axis=eco_style.y_axis({'format': '%'}), scale=alt.Scale(zero=True)),
    #color=alt.Color('LOCATION:N', legend=None, scale=alt.Scale(range=["#b30000", "#7c1158", "#4421af", "#1a53ff", "#0d88e6", "#00b7c7", "#5ad45a", "#8be04e", "#ebdc78"])),
    color=alt.Color('LOCATION:N', legend=None, scale=alt.Scale(scheme="observable10", reverse=False)),
    opacity=alt.condition(alt.datum.LOCATION == "GBR", alt.value(1), alt.value(1)),
    strokeDash=alt.condition(alt.datum.LOCATION == "GBR", alt.value([0,0]), alt.value([4,3])),
)

line = base.mark_line(clip=True).encode(
        size=alt.condition(alt.datum.LOCATION == "GBR", alt.value(3), alt.value(1.5)),
)

end_labels = base.mark_text(
    dx=5,
    dy=alt.expr("datum.LOCATION == 'USA' ? -6 : datum.LOCATION == 'CAN' ? 10 : 0"),
    align='left',
    baseline='middle',
).encode(
    text=alt.Text('label:N'),
    x=alt.value(400),
    size=alt.condition(alt.datum.LOCATION == "GBR", alt.value(14), alt.value(12)),
    opacity=alt.condition(alt.datum.LOCATION == "GBR", alt.value(1), alt.value(0.7)),
)

line + end_labels


AttributeError: module 'eco_style' has no attribute 'x_axis'

In [15]:
eco_style.pallete["shadow"]

'rgba(24, 42, 56, 0.1)'

In [16]:
corp_df.query("Time == 2021").sort_values(by="Value", ascending=False)

Unnamed: 0,LOCATION,Country,Time,Value,low,high
98,JPN,Japan,2021,0.172112,0.101384,0.172112
207,FRA,France,2021,0.142732,0.101384,0.172112
132,DEU,Germany,2021,0.120107,0.101384,0.172112
27,USA,United States,2021,0.107561,0.101384,0.172112
28,ITA,Italy,2021,0.106632,0.101384,0.172112
180,CAN,Canada,2021,0.106302,0.101384,0.172112
29,GBR,United Kingdom,2021,0.101384,0.101384,0.172112


In [17]:
total_df.query("Time == 2021").sort_values(by="Value", ascending=False)

Unnamed: 0,LOCATION,Country,Time,Value,low,high
104,JPN,Japan,2021,0.25594,0.17681,0.25594
143,FRA,France,2021,0.244672,0.17681,0.25594
103,CAN,Canada,2021,0.240091,0.17681,0.25594
142,DEU,Germany,2021,0.212995,0.17681,0.25594
30,USA,United States,2021,0.211862,0.17681,0.25594
189,ITA,Italy,2021,0.204912,0.17681,0.25594
31,GBR,United Kingdom,2021,0.17681,0.17681,0.25594


## Chart: 2021 Cross Section

In [18]:
importlib.reload(eco_style)

for i, df in enumerate([corp_df, gov_df, household_df]):

    df = df[df.Time == 2021]
    df['color'] = df.LOCATION.apply(lambda x: eco_style.pallete['bar']['accent_1'] if x == "GBR" else eco_style.pallete['bar']['other'])

    base = alt.Chart(df).encode(
        x=alt.Y('LOCATION:N', sort='-y', axis=eco_style.x_axis({'labelAlign': 'right', 'labelAngle' : 270, 'labelPadding': 5, 'labelFontSize':11})),
        y=alt.X('Value:Q', axis=eco_style.y_axis({'format': '%'})),
        color=alt.Color('color:N', scale=None),
    )


    bars = base.mark_bar()

    labels = base.mark_text(**eco_style.line_label({
        "dy": -8,
        "align": "center",
        "dx": 4 if i == 0 else 0,
    })).encode(
        text=alt.Text('Value:Q', format=".1%")
    ).encode(
        opacity=alt.condition(alt.datum.LOCATION == "GBR", alt.value(1), alt.value(0))
    )
    
    
    chart = bars + labels

    chart = chart.properties(**eco_style.properties({
        "width": 600,
        "height": 300,
    })).configure_view(
        strokeWidth=0
    )

    type = ["Corporate", "Government", "Household"][i]
    print(type)
    chart.display()

    chart.save(f"json/{type}_investment_crosssection.json")
    chart.save(f"png/{type}_investment_crosssection.png", scale_factor=3)
    chart.save(f"svg/{type}_investment_crosssection.svg")


#chart.save(f"png/corporate_investment_{approach}.png", scale_factor=3)
#chart.save(f"json/corporate_investment_{approach}.json")
#chart.save(f"svg/corporate_investment_{approach}.svg")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['color'] = df.LOCATION.apply(lambda x: eco_style.pallete['bar']['accent_1'] if x == "GBR" else eco_style.pallete['bar']['other'])


AttributeError: module 'eco_style' has no attribute 'x_axis'

In [None]:
labels

In [50]:
household_df.query("Time == 2021").sort_values(by="Value", ascending=False)

Unnamed: 0,LOCATION,Country,Time,Value,low,high,color
87,CAN,Canada,2021,0.09901,0.033664,0.069139,#a8c0de
21,AUS,Australia,2021,0.08234,0.033664,0.069139,#a8c0de
623,USA,United States,2021,0.071551,0.033664,0.069139,#a8c0de
156,FIN,Finland,2021,0.06922,0.033664,0.069139,#a8c0de
398,NLD,Netherlands,2021,0.068811,0.033664,0.069139,#a8c0de
201,DEU,Germany,2021,0.065948,0.033664,0.069139,#a8c0de
179,FRA,France,2021,0.064739,0.033664,0.069139,#a8c0de
290,ITA,Italy,2021,0.064195,0.033664,0.069139,#a8c0de
421,NZL,New Zealand,2021,0.059567,0.033664,0.069139,#a8c0de
43,AUT,Austria,2021,0.058955,0.033664,0.069139,#a8c0de


In [4]:
pd.read_csv("data/NAAG_20102023110801481.csv").Indicator.value_counts()

Gross fixed capital formation, percentage of GDP                                   1043
Gross fixed capital formation by institutional sector, percentage of total GFCF     881
Gross fixed capital formation, General government, percentage of total GFCF         880
Gross fixed capital formation, Households and NPISHs, percentage of total GFCF      858
Gross fixed capital formation, Corporations, percentage of total GFCF               850
Name: Indicator, dtype: int64

In [33]:
df = pd.read_csv("/Users/finn/Desktop/equity.csv")

df = df[['Country', 'COU', 'Value']]

df = df.sort_values(by="Value", ascending=False)
df = df.reset_index(drop=True)

df

Unnamed: 0,Country,COU,Value
0,Colombia,COL,12.831
1,Iceland,ISL,8.97
2,Netherlands,NLD,5.277
3,Austria,AUT,3.81
4,Israel,ISR,3.525
5,Switzerland,CHE,2.322
6,Spain,ESP,1.214
7,United Kingdom,GBR,1.135
8,Germany,DEU,0.892
9,Hungary,HUN,0.0
