In [1]:
import altair as alt
import numpy as np
import pandas as pd

In [2]:
#importing the data for population, birth rates, and death rates for each country
data = pd.read_csv('birth-rate-vs-death-rate.csv')


# creating a list of years frmo 1920 to 2016 to narrow down the data
years = list(range(1920, 2017, 1))

#filtering the data to the range of years from 1920-2016
years_filtered = data.loc[data['Year'].isin(years)]

#filtering down the data to the columns needed for visualization which are 
# Year, Country, and Total Population

# filtering the data only ot include the most powerful countries and ones with the largest economies
all_countries = years_filtered.loc[years_filtered['Entity'].isin(['United States','China','United Kingdom','Germany','India'])]

all_countries = all_countries[['Year', 'Entity','Total population (Gapminder, HYDE & UN)']]





In [3]:


# Create a selection that chooses the nearest point & selects based on Year
nearest = alt.selection(type='single', nearest=True, on='mouseover',
                        fields=['Year'], empty='none')

# creates the base for the line chart for each country
line = alt.Chart(all_countries).mark_line(interpolate='basis').encode(
    x='Year:O',
    y='Total population (Gapminder, HYDE & UN):Q',
    color='Entity:N'
)

# allows the cursor to showcase the year of a data point
selectors = alt.Chart(all_countries).mark_point().encode(
    x='Year:O',
    opacity=alt.value(0),
).add_selection(
    nearest
)

# adds in the data points (population for each country)
points = line.mark_point().encode(
    opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)

# adds labels to the data points to showcase population for that year 
text = line.mark_text(align='left', dx=5, dy=-5).encode(
    text=alt.condition(nearest, 'Total population (Gapminder, HYDE & UN):Q', alt.value(' '))
)

# adds a rule
rules = alt.Chart(all_countries).mark_rule(color='lightblue').encode(
    x='Year:O',
).transform_filter(
    nearest
)

# combine all layers together
alt.layer(
    line, selectors, points, rules, text
).properties(
    width=500, height=300
)

   Use 'selection_point()' or 'selection_interval()' instead; these functions also include more helpful docstrings.
        combined and should be specified using "selection_point()".
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [4]:
#narrows down the data to the year marking the beginning of each decade for the last 100 years
# and filters it down to the world's most populous countries
decade_lst = [1920, 1930, 1940, 1950, 1960, 1970, 1980, 1990, 2000, 2010, 2016] 
countries = ['China','India','United States','United Kingdom','Germany','Japan','Indonesia','Brazil']
by_country = data.loc[data['Year'].isin(decade_lst)].loc[data['Entity'].isin(countries)]
by_country = by_country[['Year','Entity','Total population (Gapminder, HYDE & UN)']]

#creates a stacked bar chart showing the composition of the world population by each country
alt.Chart(by_country).mark_bar().encode(
    x='Year:O',
    y='Total population (Gapminder, HYDE & UN)',
    color='Entity'
).properties(
    width=500, height=300
)

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
