In [1]:
import json
import altair as alt
from altair import expr, datum
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests

In [75]:
LOCAL=False

if LOCAL: local_suffix='_local'
else: local_suffix=''

In [96]:
uid = "2021-05-05-which-firms-and-industries-have-been-most-affected-by-covid-update"  # article unique ID
eco_git_home = "https://raw.githubusercontent.com/EconomicsObservatory/ECOvisualisations/main/"
eco_git_path = (
    eco_git_home+"articles/"
    + uid
    + "/data/"
)
vega_embed = requests.get(eco_git_home+'guidelines/html/vega-embed.html').text
colors = json.loads(requests.get(eco_git_home+"guidelines/colors/eco-colors.json").content)
category_color = json.loads(
    requests.get(eco_git_home+"guidelines/colors/eco-category-color.json").content
)
hue_color = json.loads(
    requests.get(eco_git_home+"guidelines/colors/eco-single-hue-color.json").content
)
mhue_color = json.loads(
    requests.get(eco_git_home+"guidelines/colors/eco-multi-hue-color.json").content
)
div_color = json.loads(
    requests.get(eco_git_home+"guidelines/colors/eco-diverging-color.json").content
)
config = json.loads( requests.get(eco_git_home+"guidelines/charts/eco-global-config.json").content)
height = config["height"]
width = config["width"]
height, width

(300, 500)

# Fig 1

In [97]:
df = (
    pd.read_excel("raw/Charts for May 2020 ESRC blog Update.xlsx", 
                  sheet_name="Sales & emp by industry_Update", skiprows=1)
    .dropna(how="all", axis=1)
    .dropna(how="all", axis=0)
)
df=df[['Unnamed: 16','Sales.6','Employment.6']]
df.columns = ["Industry", "Sales", "Employment"]
df=df.set_index('Industry').stack().reset_index()
df.columns=["Industry", "Category", "Value"]

In [98]:
f = "fig1_sales_employment_by_industry"
f1 = eco_git_path + f + ".csv"
df.to_csv("data/" + f + ".csv")
f += local_suffix
open("visualisation/" + f + ".html", "w").write(
    vega_embed.replace(
        "JSON_PATH", f1.replace("/data/", "/visualisation/").replace(".csv", ".json")
    )
)
if LOCAL: f1=df
df.head()

Unnamed: 0,Industry,Category,Value
0,Other Production,Sales,-8.265892
1,Other Production,Employment,-2.625413
2,Info & Comms,Sales,-10.744785
3,Info & Comms,Employment,-1.41899
4,Real Estate,Sales,-12.639382


In [99]:
bars = (
    alt.Chart(f1)
    .encode(
        x=alt.X('Value:Q',stack=False,title='',axis=alt.Axis(
            grid=False,title='Percentage impact of Covid-19 from 2020 Q2 to 2021 Q1',
            titleAnchor='end',labelColor=colors['eco-gray'],titleColor=colors['eco-gray'],
            tickColor=colors['eco-gray'],domainColor=colors['eco-gray'],
            titleFontSize=10,titleFontWeight="normal",
            tickCount=6,
        )),
        y=alt.Y('Industry:N',title='',axis=alt.Axis(orient='right',labelColor=colors['eco-gray'],titleColor=colors['eco-gray'],
            tickColor=colors['eco-gray'],domainColor=colors['eco-gray'],
            ))
    )
)

bars1=bars.mark_bar(size=8,yOffset=4,color=colors['eco-pink'],opacity=0.8).transform_filter("datum.Category=='Sales'")
bars2=bars.mark_bar(size=8,yOffset=-4,color=colors['eco-mid-blue'],opacity=0.8).transform_filter("datum.Category=='Employment'")

text1=bars1.mark_text(yOffset=5,xOffset=-3,size=10,align='right',color=colors['eco-pink']).encode(
    text=alt.Text('Value:Q',format='.1f')
)
text2=bars2.mark_text(yOffset=-4,xOffset=-3,size=10,align='right',color=colors['eco-mid-blue']).encode(
    text=alt.Text('Value:Q',format='.1f')
)
label1=alt.Chart(pd.DataFrame([{'x':-29.5,'y':'Admin & Support','t':'Sales','c':colors['eco-pink']}]))\
.mark_text(yOffset=5,size=10,align='right').encode(
    text='t',x='x:Q',y='y:N',color=alt.Color('c:N',scale=None)
)
label2=alt.Chart(pd.DataFrame([{'x':-23.5,'y':'Accom & Food','t':'Employment','c':colors['eco-mid-blue']}]))\
.mark_text(yOffset=-5,size=10,align='right').encode(
    text='t',x='x:Q',y='y:N',color=alt.Color('c:N',scale=None)
)

layer = (
    bars1+bars2+text1+text2+label1+label2
).configure_view(stroke=None).properties(title="").properties(height=300, width=400)
layer.save("visualisation/" + f + ".json")
layer

# Fig 2

In [100]:
df = (
    pd.read_excel("raw/Charts for May 2020 ESRC blog Update.xlsx", 
                  sheet_name="Employment vs prod_Update")
    .dropna(how="all", axis=1)
    .dropna(how="all", axis=0)
)

In [101]:
f = "fig2_labour_productivity"
f2 = eco_git_path + f + ".csv"
df.to_csv("data/" + f + ".csv")
f += local_suffix
open("visualisation/" + f + ".html", "w").write(
    vega_embed.replace(
        "JSON_PATH", f2.replace("/data/", "/visualisation/").replace(".csv", ".json")
    )
)
if LOCAL: f2=df
df.head()

Unnamed: 0,lnprod_last,covid_emp21q2
0,2.575868,-8.39726
1,2.849422,-8.040956
2,3.102955,-4.755137
3,3.274105,-5.693772
4,3.393059,-4.946575


In [102]:
base = alt.Chart(f2).mark_circle(color=colors['eco-pink'],size=50,opacity=0.9).encode(
        alt.X("lnprod_last:Q",scale=alt.Scale(domain=(2,5.5)),
              axis=alt.Axis(grid=False,title='Log labour productivity (last set of accounts)',
                           labelColor=colors['eco-gray'],titleColor=colors['eco-gray'],
            tickColor=colors['eco-gray'],domainColor=colors['eco-gray'],
            titleAnchor='end',titleY=-15,titleFontSize=10,titleFontWeight="normal",
            )),
    alt.Y("covid_emp21q2:Q",scale=alt.Scale(domain=(-10,-1)),
          axis=alt.Axis(grid=True,title='Expected impact of COVID-19 on employment in 2021 Q2 (%)',
                       labelColor=colors['eco-gray'],titleColor=colors['eco-gray'],
            tickColor=colors['eco-gray'],domainColor=colors['eco-gray'],
                        
#                         domain=False,
            ticks=False,
            labelAlign="left",
            labelBaseline="middle",
            labelPadding=-5,
            labelOffset=-10,
            titleX=0,
            titleY=-5,
            titleAngle=0,
            titleFontSize=10,
            titleFontWeight="normal",
            titleAlign="left",
            tickCount=4,
            format=".0f",
            ))
)

polynomial_fit = base.transform_regression(
        "lnprod_last", "covid_emp21q2", method="poly", order=1
    ).mark_line(color=colors['eco-mid-blue'],opacity=1)
    
layer = (
    base+polynomial_fit
).configure_view(stroke=None).properties(title="").properties(height=300, width=400)
layer.save("visualisation/" + f + ".json")
layer

# Fig 3

In [103]:
df = (
    pd.read_excel("raw/Charts for May 2020 ESRC blog Update.xlsx", 
                  sheet_name="Employees by industry_Update",skiprows=3)
    .dropna(how="all", axis=1)
    .dropna(how="all", axis=0)
)
df=df[['Sorted',
       'Furloughed.1', 'Unable to work (eg sick, self isolating).1',
       'Working on business premises.1', 'Working from home.1']].set_index('Sorted').stack().reset_index()
df.columns=['Industry','Employee','Value']
df['Employee']=df['Employee'].str.replace('.1','')
df=df.sort_values(by=['Industry','Employee'])

  df['Employee']=df['Employee'].str.replace('.1','')


In [104]:
f = "fig3_employment_industry"
f3 = eco_git_path + f + ".csv"
df.to_csv("data/" + f + ".csv")
f += local_suffix
open("visualisation/" + f + ".html", "w").write(
    vega_embed.replace(
        "JSON_PATH", f3.replace("/data/", "/visualisation/").replace(".csv", ".json")
    )
)
if LOCAL: f3=df
df.head()

Unnamed: 0,Industry,Employee,Value
0,Accom & Food,Furloughed,52.374547
1,Accom & Food,"Unable to work (eg sick, self isolating)",2.168636
3,Accom & Food,Working from home,9.689855
2,Accom & Food,Working on business premises,35.766962
24,Admin & Support,Furloughed,15.962058


In [105]:
bars = (
    alt.Chart(f3)
    .mark_bar(size=14,opacity=0.9)
    .encode(
        y=alt.Y("Industry:N", sort=[], axis=alt.Axis(grid=False, title="",
                                                    labelColor=colors['eco-gray'],titleColor=colors['eco-gray'],
            tickColor=colors['eco-gray'],domainColor=colors['eco-gray'])),
        color=alt.Color(
            "Employee:N",
            scale=alt.Scale(
                domain=df["Employee"].unique(),
                range=mhue_color[::-1] + div_color[::-1][2:],
            )
        ),
        order="Industry:N",
    )
)
text = bars.mark_text(dx=-15, dy=1, color="white").encode(
    text=alt.Text("SAmmount:Q", format=".0f"),
    color=alt.condition(
        datum.SAmmount < 8,
        alt.ColorValue(None),
        alt.ColorValue("white"),
    ),
)

layer1 = (
    (bars + text)
    .properties(height=alt.Step(20), width=250)
    .transform_aggregate(SAmmount="sum(Value)", groupby=["Industry", "Employee"])
)

layer2=alt.vconcat(
layer1.transform_filter("datum.Industry!='All firms '").encode(x=alt.X(
            "SAmmount:Q",
            stack="zero",
            axis=None,
            scale=alt.Scale(domain=[0, 100]),
        )),
    layer1.transform_filter("datum.Industry=='All firms '").encode(x=alt.X(
            "SAmmount:Q",
            stack="zero",
            axis=alt.Axis(grid=False, title="",labelColor=colors['eco-gray'],titleColor=colors['eco-gray'],
            tickColor=colors['eco-gray'],domainColor=colors['eco-gray'],),
            scale=alt.Scale(domain=[0, 100]),
        )),spacing=10).configure_view(
    height=height,
    width=width - 150,
).configure_legend(titleColor=colors['eco-gray'],labelColor=colors['eco-gray'])
layer2.save("visualisation/" + f + ".json")
layer2

# Fig 4

In [106]:
df = (
    pd.read_excel("raw/Charts for May 2020 ESRC blog Update.xlsx", 
                  sheet_name="Sales & supply_Update",skiprows=3)
    .dropna(how="all", axis=1)
    .dropna(how="all", axis=0)
)
df.columns=['Industry','Sales','Disruption']

In [107]:
f = "fig4_sales_disruption"
f4 = eco_git_path + f + ".csv"
df.to_csv("data/" + f + ".csv")
f += local_suffix
open("visualisation/" + f + ".html", "w").write(
    vega_embed.replace(
        "JSON_PATH", f4.replace("/data/", "/visualisation/").replace(".csv", ".json")
    )
)
if LOCAL: f4=df
df.head()

Unnamed: 0,Industry,Sales,Disruption
0,Manufacturing,-32.343627,10.4
1,Other Production,-16.102013,8.86
2,Construction,-40.108311,23.3
3,Wholesale & Retail,-25.346815,18.69
4,Transport & Storage,-21.021598,5.17


In [108]:
base = alt.Chart(f4).mark_circle(color=colors['eco-pink'],size=50,opacity=0.9).encode(
        alt.X("Sales:Q",
              axis=alt.Axis(grid=False,title='Percentage impact of COVID-19 on sales in 2020 Q2',
                           labelColor=colors['eco-gray'],titleColor=colors['eco-gray'],
            tickColor=colors['eco-gray'],domainColor=colors['eco-gray'],
            titleAnchor='end',titleY=-15,titleFontSize=10,titleFontWeight="normal",)),
    alt.Y("Disruption:Q",
          axis=alt.Axis(grid=True,title='Percentage of non-labour inputs disrupted',
                       ticks=False,
            labelColor=colors['eco-gray'],titleColor=colors['eco-gray'],
            tickColor=colors['eco-gray'],domainColor=colors['eco-gray'],
            labelAlign="left",
            labelBaseline="middle",
            labelPadding=-5,
            labelOffset=-10,
            titleX=0,
            titleY=-5,
            titleAngle=0,
            titleFontSize=10,
            titleFontWeight="normal",
            titleAlign="left",
            tickCount=4,
            format=".0f",))
)

text1 = base.mark_text(dx=6, dy=4, size=10,color=colors['eco-pink'],align='left').encode(
    text=alt.Text("Industry:N")
).transform_filter(alt.FieldOneOfPredicate(field="Industry", oneOf=['Other Production', 'Construction',
       'Wholesale & Retail', 
        'Real Estate',
       'Recreational Services', 'Other Services']))
text2 = base.mark_text(dx=7, dy=-2, size=10,color=colors['eco-pink'],align='left').encode(
    text=alt.Text("Industry:N")
).transform_filter(alt.FieldOneOfPredicate(field="Industry", oneOf=['Accom & Food',
       'Health','Transport & Storage','Prof & Scientific']))
text3 = base.mark_text(dx=-6, dy=4, size=10,color=colors['eco-pink'],align='right').encode(
    text=alt.Text("Industry:N")
).transform_filter(alt.FieldOneOfPredicate(field="Industry", oneOf=['Manufacturing','Finance & Insurance']))
text4 = base.mark_text(dx=-13, dy=-13, size=10,color=colors['eco-pink'],align='left').encode(
    text=alt.Text("Industry:N")
).transform_filter(alt.FieldOneOfPredicate(field="Industry", oneOf=['Admin & Support']))
text5 = base.mark_text(dx=3, dy=13, size=10,color=colors['eco-pink'],align='center').encode(
    text=alt.Text("Industry:N")
).transform_filter(alt.FieldOneOfPredicate(field="Industry", oneOf=['Info & Comms']))

polynomial_fit = base.transform_regression(
        "Sales", "Disruption", method="poly", order=1
    ).mark_line(color=colors['eco-mid-blue'],opacity=1)
    
layer = (
    base+polynomial_fit+text1+text2+text3+text4+text5
).configure_view(stroke=None).properties(title="").properties(height=300, width=400)
layer.save("visualisation/" + f + ".json")
layer

# Fig 5

In [109]:
df = (
    pd.read_excel("raw/Charts for May 2020 ESRC blog Update.xlsx", 
                  sheet_name="Uncertainty_Update",skiprows=2)
    .dropna(how="all", axis=1)
    .dropna(how="all", axis=0)
)
df=df[['Unnamed: 0','Single month','3 month rolling average']].dropna()
df.columns=['date','single','rolling3']

In [110]:
f = "fig5_uncertainty"
f5 = eco_git_path + f + ".csv"
df.to_csv("data/" + f + ".csv")
f += local_suffix
open("visualisation/" + f + ".html", "w").write(
    vega_embed.replace(
        "JSON_PATH", f5.replace("/data/", "/visualisation/").replace(".csv", ".json")
    )
)
if LOCAL: f5=df
df.head()

Unnamed: 0,date,single,rolling3
2,2017-01-31,4.33918,4.414233
3,2017-02-28,4.012655,4.380962
4,2017-03-31,4.376527,4.242787
5,2017-04-30,4.603159,4.33078
6,2017-05-31,4.770117,4.583268


In [111]:
base = (
    alt.Chart(f5).encode(x=alt.X("date:T", axis=alt.Axis(grid=False, title="",format='%B %Y',
                                                        labelColor=colors['eco-gray'],titleColor=colors['eco-gray'],
            tickColor=colors['eco-gray'],domainColor=colors['eco-gray'],)))
)
area = base.mark_area(
    interpolate="monotone",
    fillOpacity=0.7,
    stroke=colors["eco-gray"],
    strokeWidth=0.5,
    color=alt.Gradient(
        gradient="linear",
        stops=[
            alt.GradientStop(color="white", offset=0),
            alt.GradientStop(
                color=colors["eco-pink"], offset=0.9
            ),
        ],
        x1=0.8,
        x2=1,
        y1=1,
        y2=0,
    ),
).encode(
    y=alt.Y(
        "single:Q",
        axis=alt.Axis(grid=True, title="%",labelColor=colors['eco-gray'],titleColor=colors['eco-gray'],
            tickColor=colors['eco-gray'],domainColor=colors['eco-gray'],
            labelAlign="left",
                      ticks=False,
            labelBaseline="middle",
            labelPadding=-5,
            labelOffset=-10,
            titleX=0,
            titleY=-5,
            titleAngle=0,
            titleFontSize=10,
            titleFontWeight="normal",
            titleAlign="left",
            tickCount=4,
            format=".0f",),
        # scale=alt.Scale(domain=series_domain[serie]),
    )
)
line=base.mark_line(color=colors['eco-mid-blue']).encode(
    y=alt.Y(
        "rolling3:Q",
        axis=alt.Axis(grid=False, title="",
                     ),
        # scale=alt.Scale(domain=series_domain[serie]),
    )
)
label1=alt.Chart(pd.DataFrame([{'x':'2020-03-15','y':7,'t':'single month','c':colors['eco-pink']}]))\
.mark_text(yOffset=5,size=10,align='right').encode(
    text='t',x='x:T',y='y:Q',color=alt.Color('c:N',scale=None)
)
label2=alt.Chart(pd.DataFrame([{'x':'2020-03-01','y':5.5,'t':'3 month rolling average','c':colors['eco-mid-blue']}]))\
.mark_text(yOffset=-5,size=10,align='right').encode(
    text='t',x='x:T',y='y:Q',color=alt.Color('c:N',scale=None)
)
layer = (
    area+line+label1+label2
).configure_view(stroke=None).properties(title="").properties(height=300, width=400)
layer.save("visualisation/" + f + ".json")
layer

# Fig 6

In [112]:
df = (
    pd.read_excel("raw/Charts for May 2020 ESRC blog Update.xlsx", 
                  sheet_name="Uncertainty by industry_Update",skiprows=1)
    .dropna(how="all", axis=1)
    .dropna(how="all", axis=0)
)
df=df[['Unnamed: 16','2019.1',' April 2020 - March 2021 (avg)']]
df.columns=['Industry','2019','April 2020 - March 2021']

In [113]:
f = "fig6_uncertainty_industry"
f6 = eco_git_path + f + ".csv"
df.to_csv("data/" + f + ".csv")
f += local_suffix
open("visualisation/" + f + ".html", "w").write(
    vega_embed.replace(
        "JSON_PATH", f6.replace("/data/", "/visualisation/").replace(".csv", ".json")
    )
)
if LOCAL: f6=df
df.head()

Unnamed: 0,Industry,2019,April 2020 - March 2021
0,Other Services,3.934506,6.196518
1,Health,4.11016,6.729009
2,Other Production,6.264915,6.736793
3,Wholesale & Retail,3.941647,7.119036
4,Real Estate,4.349479,7.374896


In [114]:
base = (
    alt.Chart(f6)
    .encode(
        x=alt.X('April 2020 - March 2021:Q',title='',axis=alt.Axis(
            grid=False,title='%',
            titleAnchor='end',titleY=-15,labelColor=colors['eco-gray'],titleColor=colors['eco-gray'],
            tickColor=colors['eco-gray'],domainColor=colors['eco-gray'],
            
        )),
        y=alt.Y('Industry:N',axis=None)
    )
)
bars=base.mark_point(size=40,color=colors['eco-pink'],fill=colors['eco-pink'],opacity=0.8)
points=base.mark_point(size=40,color=colors['eco-mid-blue'],fill=colors['eco-mid-blue'],opacity=0.8).encode(
        x=alt.X('2019:Q')
    )
text=base.mark_text(color=colors['eco-gray'],size=10,align='right',xOffset=-10).encode(
        x=alt.X('2019:Q'),text='Industry:N'
    )
lines=base.mark_errorbar(color=colors['eco-pink']).encode(
        x=alt.X('2019:Q',title='%'),
        x2=alt.X2('April 2020 - March 2021:Q'),
        y=alt.Y('Industry:N',title='')
    )
label=alt.Chart(pd.DataFrame([{'x':8.1,'y':'Other Production','t':'— April 2020 - March 2021',
                               'c':colors['eco-pink']},
                              {'x':7.3,'y':'Other Production','t':'2019',
                               'c':colors['eco-mid-blue']}]))\
.mark_text(yOffset=1,align='left',size=10,baseline='middle').encode(
    text='t',x='x:Q',y='y:N',color=alt.Color('c:N',scale=None)
)
layer = (
    lines+bars+points+label+text
).configure_view(stroke=None).properties(title="").properties(height=300, width=400)
layer.save("visualisation/" + f + ".json")
layer

In [115]:
base = (
    alt.Chart(f6)
    .encode(
        x=alt.X('April 2020 - March 2021:Q',title='',axis=alt.Axis(
            grid=False,title='%',
            labelColor=colors['eco-gray'],titleColor=colors['eco-gray'],
            tickColor=colors['eco-gray'],domainColor=colors['eco-gray'],
            titleAnchor='end',titleY=-15
        )),
        y=alt.Y('Industry:N',title='',axis=alt.Axis(labelColor=colors['eco-gray'],titleColor=colors['eco-gray'],
            tickColor=colors['eco-gray'],domainColor=colors['eco-gray'],
            ))
    )
)
bars=base.mark_bar(size=14,color=colors['eco-pink'],opacity=0.8)
points=base.mark_point(size=80,color=colors['eco-mid-blue'],fill=colors['eco-mid-blue'],opacity=0.8).encode(
        x=alt.X('2019:Q')
    )
label=alt.Chart(pd.DataFrame([{'x':6.3,'y':'Other Services','t':'April 2020 - March 2021',
                               'c':colors['eco-pink']},
                              {'x':6.9,'y':'Other Production','t':'2019',
                               'c':colors['eco-mid-blue']}]))\
.mark_text(yOffset=1,align='left',size=10,baseline='middle').encode(
    text='t',x='x:Q',y='y:N',color=alt.Color('c:N',scale=None)
)
layer = (
    bars+points+label
).configure_view(stroke=None).properties(title="").properties(height=300, width=400)
layer.save("visualisation/" + f + "_b.json")
layer