In [21]:
import altair as alt
import pandas as pd
import eco_style

alt.themes.enable('report')

ThemeRegistry.enable('report')

In [22]:
df = pd.read_csv('LPI_data.csv')
df

Unnamed: 0,Country,Code,Score,Year
0,Singapore,SGP,4.27,2007
1,Netherlands,NLD,4.29,2007
2,Germany,DEU,4.19,2007
3,Sweden,SWE,4.11,2007
4,Austria,AUT,4.06,2007
...,...,...,...,...
1074,Cameroon,,2.10,2023
1075,Haiti,,1.80,2023
1076,Somalia,,1.90,2023
1077,Afghanistan,,1.70,2023


In [23]:
df['Year'] = df['Year'].astype(str) + '-01-01' 
df.head()

Unnamed: 0,Country,Code,Score,Year
0,Singapore,SGP,4.27,2007-01-01
1,Netherlands,NLD,4.29,2007-01-01
2,Germany,DEU,4.19,2007-01-01
3,Sweden,SWE,4.11,2007-01-01
4,Austria,AUT,4.06,2007-01-01


In [24]:
df = df.sort_values(by=['Country', 'Year'])

# Fill missing Code values by forward and backward fill within each Country group
df['Code'] = df.groupby('Country')['Code'].ffill().bfill()

# Display the result
df

Unnamed: 0,Country,Code,Score,Year
149,Afghanistan,AFG,1.10,2007-01-01
292,Afghanistan,AFG,1.87,2010-01-01
439,Afghanistan,AFG,2.00,2012-01-01
617,Afghanistan,AFG,1.82,2014-01-01
769,Afghanistan,AFG,1.84,2016-01-01
...,...,...,...,...
407,Zimbabwe,ZWE,2.20,2012-01-01
596,Zimbabwe,ZWE,2.25,2014-01-01
770,Zimbabwe,ZWE,2.21,2016-01-01
931,Zimbabwe,ZWE,1.83,2018-01-01


In [25]:
to_filter = ['GBR', 'FRA','DEU', 'USA', 'ITA', 'JPN', 'ESP', 'CAN']

df2= df[df['Code'].isin(to_filter)]
df2

Unnamed: 0,Country,Code,Score,Year
9,Canada,CAN,3.95,2007-01-01
163,Canada,CAN,4.03,2010-01-01
318,Canada,CAN,3.99,2012-01-01
471,Canada,CAN,4.05,2014-01-01
633,Canada,CAN,4.14,2016-01-01
799,Canada,CAN,3.75,2018-01-01
948,Canada,CAN,4.3,2023-01-01
17,France,FRA,3.82,2007-01-01
166,France,FRA,4.0,2010-01-01
316,France,FRA,3.96,2012-01-01


In [26]:
chart = alt.Chart(df2).mark_line(point=True).encode(
    x=alt.X('Year:T', title=''),
    y=alt.Y('Score:Q', title='', scale=alt.Scale(domain=[3.5, 4.5])),
    color=alt.Color('Country:N', legend=None, scale=alt.Scale(domain=["United Kingdom", "France", "Italy", "Spain", "Germany", "Japan", "Canada", "United States"], range=["#001f3f", "#4269d0", "#efb118", "#ff725c", "#6cc5b0", "#3ca951", "#ff8ab7", "#a463f2", "#97bbf5", "#9c6b4e"] 
    )),
    size=alt.condition(
        alt.datum.Country == " United Kingdom",
        alt.value(1.5),  
        alt.value(1.5)  
    ),
    strokeDash=alt.condition(
        alt.datum.Country == " United Kingdom",
        alt.value([0]),
        alt.value([0])   
    )
).properties(
    width=500,
    height=300,
    title={
        "text": "Infrastructure Score",
        "anchor": "start",
        "subtitle": [ "Source: Logistics Performance Index (LPI), World Bank", ""],
        "subtitleColor": "#676A86"})

points = alt.Chart(df2).mark_point(filled=True, size=30).encode(
    x=alt.X('Year:T'),
    y=alt.Y('Score:Q'),
    color=alt.Color('Country:N', legend=None)
)

text = alt.Chart(df2).mark_text(
    align='left',
    dx=5,
    dy={
            "expr": "datum.Country == 'Germany' ? -4 : datum.Country == 'Canada' ? 4 : datum.Country == 'Spain' ? -8 : datum.Country == 'Italy' ? 8 : 0"
          }
).encode(
    alt.X('Year:T', aggregate='max'),
    alt.Y('Score:Q', aggregate={'argmax': 'Year'}),
    text='Country:N',
    color = 'Country:N'
)  


chart2 = chart + text + points
chart2

In [27]:
chart2.save('LPI_infrastructure.png', scale_factor=2.0)

In [28]:
chart2.save("LPI_infrastructure.json")

In [29]:
df0 = pd.read_csv('LPI_score_data.csv')
df0

Unnamed: 0,Country,Code,Score,Year
0,Singapore,SGP,4.19,2007
1,Netherlands,NLD,4.18,2007
2,Germany,DEU,4.10,2007
3,Sweden,SWE,4.08,2007
4,Austria,AUT,4.06,2007
...,...,...,...,...
1074,Cameroon,,2.10,2023
1075,Haiti,,2.10,2023
1076,Somalia,,2.00,2023
1077,Afghanistan,,1.90,2023


In [30]:
df0['Year'] = df0['Year'].astype(str) + '-01-01' 
df0.head()

Unnamed: 0,Country,Code,Score,Year
0,Singapore,SGP,4.19,2007-01-01
1,Netherlands,NLD,4.18,2007-01-01
2,Germany,DEU,4.1,2007-01-01
3,Sweden,SWE,4.08,2007-01-01
4,Austria,AUT,4.06,2007-01-01


In [31]:
df0 = df0.sort_values(by=['Country', 'Year'])

# Fill missing Code values by forward and backward fill within each Country group
df0['Code'] = df0.groupby('Country')['Code'].ffill().bfill()

# Display the result
df0

Unnamed: 0,Country,Code,Score,Year
149,Afghanistan,AFG,1.21,2007-01-01
292,Afghanistan,AFG,2.24,2010-01-01
439,Afghanistan,AFG,2.30,2012-01-01
617,Afghanistan,AFG,2.07,2014-01-01
769,Afghanistan,AFG,2.14,2016-01-01
...,...,...,...,...
407,Zimbabwe,ZWE,2.55,2012-01-01
596,Zimbabwe,ZWE,2.34,2014-01-01
770,Zimbabwe,ZWE,2.08,2016-01-01
931,Zimbabwe,ZWE,2.12,2018-01-01


In [32]:
to_filter = ['GBR', 'FRA','DEU', 'USA', 'ITA', 'JPN', 'ESP', 'CAN']

df20= df0[df0['Code'].isin(to_filter)]
df20

Unnamed: 0,Country,Code,Score,Year
9,Canada,CAN,3.92,2007-01-01
163,Canada,CAN,3.87,2010-01-01
318,Canada,CAN,3.85,2012-01-01
471,Canada,CAN,3.86,2014-01-01
633,Canada,CAN,3.93,2016-01-01
799,Canada,CAN,3.73,2018-01-01
948,Canada,CAN,4.0,2023-01-01
17,France,FRA,3.76,2007-01-01
166,France,FRA,3.84,2010-01-01
316,France,FRA,3.85,2012-01-01


In [33]:
chart = alt.Chart(df20).mark_line().encode(
    x=alt.X('Year:T', title=''),
    y=alt.Y('Score:Q', title='', scale=alt.Scale(domain=[3.5, 4.3])),
    color=alt.Color('Country:N', legend=None, scale=alt.Scale(domain=["United Kingdom", "France", "Italy", "Spain", "Germany", "Japan", "Canada", "United States"], range=["#001f3f", "#4269d0", "#efb118", "#ff725c", "#6cc5b0", "#3ca951", "#ff8ab7", "#a463f2", "#97bbf5", "#9c6b4e"] 
    )),
    size=alt.condition(
        alt.datum.Country == " United Kingdom",
        alt.value(1.5),  
        alt.value(1.5)  
    ),
    strokeDash=alt.condition(
        alt.datum.Country == " United Kingdom",
        alt.value([0, 0]),
        alt.value([0])   
    )
).properties(
    width=500,
    height=300,
    title={
        "text": "Logistics Performance Index Score",
        "anchor": "start",
        "subtitle": [ "Source: Logistics Performance Index (LPI), World Bank", ""],
        "subtitleColor": "#676A86"})

points = alt.Chart(df20).mark_point(filled=True, size=30).encode(
    x=alt.X('Year:T'),
    y=alt.Y('Score:Q'),
    color=alt.Color('Country:N', legend=None)
)
text = alt.Chart(df20).mark_text(
    align='left',
    dx=5,
    dy={
            "expr": "datum.Country == 'Japan' ? -7 : datum.Country == 'France' ? 7 : datum.Country == 'Spain' ? 0 : datum.Country == 'Italy' ? 8 : 0"
          }
).encode(
    alt.X('Year:T', aggregate='max'),
    alt.Y('Score:Q', aggregate={'argmax': 'Year'}),
    text='Country:N',
    color = 'Country:N'
)  


chart3 = chart + text + points
chart3

In [34]:
chart3.save('LPI.png', scale_factor=2.0)

In [35]:
chart3.save("LPI.json")

In [36]:
train_df = pd.read_csv('non_formal_education_participation.csv')
train_df

Unnamed: 0,Country,participation
0,Austria,13
1,Belgium,8
2,Czech Republic,8
3,Denmark,23
4,Estonia,17
5,Finland,17
6,France,11
7,Germany,5
8,Greece,2
9,Hungary,7


In [37]:
to_filter = ['United Kingdom', 'France','Germany', 'United States', 'Italy', 'Japan', 'Spain', 'Canada']

train_df2= train_df[train_df['Country'].isin(to_filter)]
train_df2


Unnamed: 0,Country,participation
6,France,11
7,Germany,5
12,Italy,7
22,Spain,12
26,United Kingdom,14


In [38]:


chart4 = alt.Chart(train_df).mark_bar(width = 20).encode(
    x=alt.X('participation:Q', title='', axis=alt.Axis(labelExpr="format(datum.value,',') + '%'")),
    y=alt.Y('Country:N', title='', sort='-x'),
    tooltip=['Country:N', 'participation:Q'],
    color=alt.condition(
        alt.datum.Country == "United Kingdom",
        alt.value('#A8C0DE'),  
        alt.value('#001f3f')
    ) 

).properties(
    title={
        "text": "Participation in non-formal education and training",
        "anchor": "start",
        "subtitle": ["Source: EU Labour Force Survey (OECD)", ""],
        "subtitleColor": "#676A86"
    },
    width=450,  # Adjust width as needed
    height=300  # Adjust height as needed
)

chart4



In [39]:
chart4.save('part_train_eu.png', scale_factor=2.0)

In [40]:
chart4.save('part_train_eu.json')