In [1]:
import json 
import vega_datasets

import pandas as pd
import numpy as np
import geopandas as gpd
import altair as alt

from pathlib import Path
from collections import Counter
from functools import partial

## Data loading and preprocessing

In [2]:
data_folder = Path("data")
df = pd.read_excel(data_folder / "Religious_Composition_by_Country_2010-2050.xlsx", \
                   engine='openpyxl')


religious_cols = [
    "Christians", "Muslims", "Unaffiliated", "Hindus", 
    "Buddhists", "Folk Religions", "Other Religions", "Jews",
]


codes_df = pd.read_csv(data_folder / "country-codes_csv.csv")
codes_df = codes_df[["official_name_en", "ISO3166-1-numeric"]]
codes_df = codes_df[codes_df.official_name_en.notna()].reset_index(drop=True)
codes_df.official_name_en = codes_df["official_name_en"].apply(lambda s: s.lower())
with open(data_folder / "country_fix_name.json", "r") as f:
    fix_name_dict = json.load(f)
codes_df["official_name_en"] = codes_df.official_name_en \
    .apply(lambda n: fix_name_dict[n] if n in fix_name_dict else n)


df_with_codes = df.copy()
df_with_codes.Country = df_with_codes.Country.apply(lambda c: c.lower())
df_with_codes = df_with_codes.merge(codes_df, how="inner", left_on="Country", \
                                    right_on="official_name_en")
df_with_codes = df_with_codes.drop(columns="official_name_en")
df_with_codes.Country = df_with_codes.Country.apply(lambda c: c.title())
for col in religious_cols + ["All Religions"]:
    df_with_codes[col] = df_with_codes[col] \
        .apply(lambda s: float(str(s).replace("<", "").replace(",", "")))

df_with_codes = df_with_codes.melt(
    [col for col in df_with_codes.columns  if col not in religious_cols], 
    var_name="religion", value_name="num_of_parishioners"
)

df_with_codes["percent"] = df_with_codes["num_of_parishioners"] / df_with_codes["All Religions"]

  warn(msg)


## Chart

In [3]:
alt.data_transformers.disable_max_rows()

map_source = alt.topo_feature(vega_datasets.data.world_110m.url, 'countries')

religion_dropdown = alt.binding_select(
    options=df_with_codes.religion.unique(),
    name="Choose religion: "
)
religion_select = alt.selection_single(
    fields=['religion'],
    bind=religion_dropdown,
    init={'religion': "Christians"}
)

mapchart = alt.Chart(df_with_codes).mark_geoshape(
    stroke='black'
).encode(
    color=alt.Color(
        "percent:Q",
        legend=alt.Legend(title="Share of people", orient="left"),
        scale=alt.Scale(scheme="blues")
    ),
    tooltip=[
        alt.Tooltip("Country:N", title="Country"),
        alt.Tooltip("percent:Q", title="Share of parishioners",),
        alt.Tooltip("num_of_parishioners:Q", title="Num of parishioners",),
    ],
).transform_lookup(
    lookup="ISO3166-1-numeric",
    from_=alt.LookupData(map_source, "id", ["type", "properties", "geometry"]),
).add_selection(
    religion_select
).transform_filter(
    religion_select
).transform_filter(
    alt.FieldEqualPredicate(field='Year', equal=2010)
).project(
    'naturalEarth1'
).properties(
    width=800, height=300, title="Share of people for selected religion in 2010",
)

linechart = alt.Chart(df_with_codes).mark_line(
    point=True
).encode(
    y=alt.Y(
        "num_of_parishioners:Q",
        aggregate="sum",
        scale=alt.Scale(zero=False),
        title="overall people"
    ),
    x=alt.X("Year:O"),
    color=alt.value("steelblue"),
    tooltip=[
        alt.Tooltip("Year:O", title="Year"),
        alt.Tooltip("num_of_parishioners:Q", aggregate="sum", title="Sum of people",),
    ],
).add_selection(
    religion_select
).transform_filter(
    religion_select
).properties(
    width=800, height=300,
    title="Change in time of number of people for selected religion from 2010 to 2050",
)

chart = mapchart & linechart

chart.configure_view(
    stroke=None
)