In [1]:
import json 
import vega_datasets

import pandas as pd
import numpy as np
import geopandas as gpd
import altair as alt

from pathlib import Path
from collections import Counter
from functools import partial


## Data loading and preprocssing

In [2]:
data_folder = Path("data")
df = pd.read_excel(data_folder / "Religious_Composition_by_Country_2010-2050.xlsx", engine='openpyxl')
df = df[df.level == 1].reset_index(drop=True).copy()

religious_cols = [
    "Christians", "Muslims", "Unaffiliated", "Hindus", 
    "Buddhists", "Folk Religions", "Other Religions", "Jews",
]

for col in religious_cols + ["All Religions"]:
    df[col] = df[col].apply(lambda s: float(str(s).replace("<", "").replace(",", "")))

df["top_religion"] = df[religious_cols].idxmax(1)


codes_df = pd.read_csv(data_folder / "country-codes_csv.csv")
codes_df = codes_df[["official_name_en", "ISO3166-1-numeric", "ISO3166-1-Alpha-2"]]
codes_df = codes_df[codes_df.official_name_en.notna()].reset_index(drop=True)
codes_df.official_name_en = codes_df["official_name_en"].apply(lambda s: s.lower())
with open(data_folder / "country_fix_name.json", "r") as f:
    fix_name_dict = json.load(f)
codes_df["official_name_en"] = codes_df.official_name_en \
    .apply(lambda n: fix_name_dict[n] if n in fix_name_dict else n)

df.Country = df.Country.apply(lambda s: s.lower())
df = df.merge(codes_df, how="inner", left_on="Country", right_on="official_name_en")
df = df.drop(columns="official_name_en")
df.Country = df.Country.apply(lambda c: c.title())



centroids_df = pd.read_csv(data_folder / "average-latitude-longitude-countries.csv")
df = df.merge(centroids_df.drop(columns="Country"), how="inner", left_on="ISO3166-1-Alpha-2", right_on="ISO 3166 Country Code")
df = df.drop(columns="ISO 3166 Country Code")

  warn(msg)


## Dominant religion in each country in 2010

In [3]:
map_source = alt.topo_feature(vega_datasets.data.world_110m.url, 'countries')

chart = alt.Chart(df).mark_geoshape(
    stroke='black'
) .encode(
    color=alt.Color(
        "top_religion:N",
        legend=alt.Legend(title="Religions"),
        scale=alt.Scale(range=[
            "#33a02c", "#1f78b4", "#ff7f00", "#6a3d9a", 
            "#b15928", "#e31a1c", "#d8b5a5"
        ])
    ),
    tooltip=[
        alt.Tooltip("Country:N", title="Country"),
        alt.Tooltip("top_religion:N", title="Religion",),
    ],
).transform_lookup(
    lookup="ISO3166-1-numeric",
    from_=alt.LookupData(map_source, "id", ["type", "properties", "geometry"]),

).transform_filter(
    alt.FieldEqualPredicate(field='Year', equal=2010)
).project(
    'naturalEarth1'
).properties(
    width=800, height=600,
    title="Dominant religion in each country in 2010"
).configure_view(
    stroke=None
)

chart

On this chart, I choose the color encoding to represent the most popular religion in each country.

From this chart, it is clear that there are two major religions (Christians and Muslims) that split the world into two parts. 

A bunch of other religions (Buddhists, Hindus, etc) is more popular in Asia.

## Difference between number of Christians by country in 2010 and 2050

In [4]:
christ_diff_df = df[["Year", "Country", "ISO3166-1-numeric", "Christians"]].copy()
christ_diff_df = pd.merge(
    christ_diff_df[christ_diff_df.Year == 2050].drop(columns="Year"), 
    christ_diff_df[christ_diff_df.Year == 2010].drop(columns="Year"), 
    on=["Country", "ISO3166-1-numeric"], 
    suffixes=("_2050", "_2010")
)
christ_diff_df["Christians_diff"] = \
    christ_diff_df["Christians_2050"] - christ_diff_df["Christians_2010"]
christ_diff_df["Christians_increase"] = \
    christ_diff_df["Christians_diff"] / christ_diff_df["Christians_2010"]
christ_diff_df["Christians_increase"] = \
    christ_diff_df["Christians_increase"].apply(lambda v: round(v, 2))

map_source = alt.topo_feature(vega_datasets.data.world_110m.url, 'countries')

chart = alt.Chart(christ_diff_df).mark_geoshape(stroke='black').encode(
    color=alt.Color(
        "Christians_increase:Q",
        legend=alt.Legend(title="Magnification"),
        scale=alt.Scale(scheme="pinkyellowgreen", domain=[-2, 2])
    ),
    tooltip=[
        alt.Tooltip("Country:N", title="Country"),
        alt.Tooltip("Christians_increase:Q", title="Increase since 2010, times",),
        alt.Tooltip("Christians_2010:Q", title="Num of Christians in 2010",),
        alt.Tooltip("Christians_2050:Q", title="Num of Christians in 2050",),
        alt.Tooltip("Christians_diff:Q", title="Diff from 2010 till 2050",),
    ],
).transform_lookup(
    lookup="ISO3166-1-numeric",
    from_=alt.LookupData(map_source, "id", ["type", "properties", "geometry"]),
).project(
    'naturalEarth1'
).properties(
    width=800, height=600, 
    title="Difference between number of Christians by country in 2010 and 2050"
).configure_view(
    stroke=None
)

chart

To show the difference in the number of Christians between 2010 and 2050 years I choose to plot the percent of increment since 2010.
So the countries with an increasing amount of Christians label in green colors meanwhile countries with decreasing amount of Christians are in pink/red range. 

It could be concluded that the number of Christians is decreasing almost everywhere along in Europe (including Russia). And there is an expanding amount of Christians in central Africa, possibly caused by high temps of development of the region.

## Number of people unaffiliated with any religion by country in 2010

In [5]:
map_source = alt.topo_feature(vega_datasets.data.world_110m.url, 'countries')

chart = alt.layer(
    alt.Chart(df).mark_geoshape(stroke='black').encode(
        color=alt.value("white"),
        tooltip=[
            alt.Tooltip("Country:N", title="Country"),
            alt.Tooltip("Unaffiliated:O", title="Number of unaffiliated",),
        ],
    ).transform_lookup(
        lookup="ISO3166-1-numeric",
        from_=alt.LookupData(map_source, "id", ["type", "properties", "geometry"]),
    ),
    
    alt.Chart(df).mark_circle().encode(
        longitude = alt.Longitude('Longitude:Q'),
        latitude = alt.Latitude('Latitude:Q'),
        color=alt.value("red"),
        size=alt.Size('Unaffiliated:Q', scale=alt.Scale(range=(14, 1000), domain=[10e6, 7e8])),
        tooltip=[
            alt.Tooltip("Country:N", title="Country"),
            alt.Tooltip("Unaffiliated:O", title="Number of unaffiliated",),
        ],
    )
).transform_filter(
    alt.FieldEqualPredicate(field='Year', equal=2010)

).project(
    'naturalEarth1'
).properties(
    width=800, height=600, 
    title="Number of people unaffiliated with any religion by country in 2010"
).configure_view(
    stroke=None
)

chart

There is a bubble chart that represents the number of people unaffiliated with any religion. There some disadvantages of such an approach - small amounts are not visible, some points overlapping. But I think this type of chart better suited for comparison number of people in different regions.

It could be concluded that the major amount of irreligion people located in China, possibly caused by the fact that the main political party here - Communistic Party, and it is officially atheistic. 

## Share of Muslims by country in 2010

In [6]:
df["share_of_mulsims"] = df.Muslims / df["All Religions"]
df["share_of_mulsims"] = df["share_of_mulsims"].apply(lambda v: round(v, 2))

map_source = alt.topo_feature(vega_datasets.data.world_110m.url, 'countries')

chart = alt.Chart(df).mark_geoshape(stroke='black').encode(
    color=alt.Color(
        "share_of_mulsims:Q",
        legend=alt.Legend(title="Percent"),
        scale=alt.Scale(scheme="lightgreyred")
    ),
    tooltip=[
        alt.Tooltip("Country:N", title="Country"),
        alt.Tooltip("share_of_mulsims:Q", title="Share of muslim peoples",),
        alt.Tooltip("Muslims:O", title="Number of muslim people",),
    ],
).transform_lookup(
    lookup="ISO3166-1-numeric",
    from_=alt.LookupData(map_source, "id", ["type", "properties", "geometry"]),
).transform_filter(
    alt.FieldEqualPredicate(field='Year', equal=2010)
).project(
    'naturalEarth1'
).properties(
    width=800, height=600, 
    title="Share of Muslims by country in 2010"
).configure_view(
    stroke=None
)

chart

The last chart represents the share of Muslims as of the 2010 year.
Islam is a major religion in areas of the Near and Middle East and North Africa.