# Submission

Put the ipynb file and html file in the github branch you created in the last assignment and submit the link to the commit in brightspace

In [1]:
from plotly.offline import init_notebook_mode
import plotly.io as pio
import plotly.express as px

init_notebook_mode(connected=True)
pio.renderers.default = "plotly_mimetype+notebook"

In [2]:
#load data
df = px.data.gapminder()
df.head()

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
0,Afghanistan,Asia,1952,28.801,8425333,779.445314,AFG,4
1,Afghanistan,Asia,1957,30.332,9240934,820.85303,AFG,4
2,Afghanistan,Asia,1962,31.997,10267083,853.10071,AFG,4
3,Afghanistan,Asia,1967,34.02,11537966,836.197138,AFG,4
4,Afghanistan,Asia,1972,36.088,13079460,739.981106,AFG,4


## Question 1:

Recreate the barplot below that shows the population of different continents for the year 2007. 

*Hints:*

- Extract the 2007 year data from the dataframe. You have to process the data accordingly
- use [plotly bar](https://plotly.com/python-api-reference/generated/plotly.express.bar)
- Add different colors for different continents
- Sort the order of the continent for the visualisation. Use [axis layout setting](https://plotly.com/python/reference/layout/xaxis/)
- Add text to each bar that represents the population 

In [10]:
# YOUR CODE HERE
df = df[df["year"]==2007]

grouped_df = df.groupby("continent").agg({"pop": "sum"}).reset_index()

colors = px.colors.qualitative.Set1

fig = px.bar(grouped_df, x="pop", y="continent",
             color="continent", color_discrete_sequence=colors)

fig.update_traces(showlegend=False)

fig.show()

## Question 2:

Sort the order of the continent for the visualisation

Hint: Use [axis layout setting](https://plotly.com/python/reference/layout/xaxis/)

In [16]:
# YOUR CODE HERE
fig = px.bar(grouped_df, x="pop", y="continent",
             color="continent", color_discrete_sequence=colors)

fig.update_traces(selector=dict(type='bar'))
fig.update_yaxes(categoryorder='total ascending')
fig.update_traces(showlegend=False)

fig.show()

## Question 3:

Add text to each bar that represents the population 

In [33]:
# YOUR CODE HERE
def format_population(value):
    if value >= 1_000_000_000:
        return f'{value / 1_000_000_000:.1f}B'
    elif value >= 1_000_000:
        return f'{value / 1_000_000:.0f}M'
    else:
        return f'{value}'
    
grouped_df['Population_Text'] = grouped_df['pop'].apply(format_population)

fig = px.bar(grouped_df, x="pop", y="continent", text="Population_Text",
             color="continent", color_discrete_sequence=colors)

fig.update_traces(selector=dict(type='bar'))
fig.update_yaxes(categoryorder='total ascending')
fig.update_traces(showlegend=False)

fig.update_traces(textposition='outside')
fig.show()





## Question 4:

Thus far we looked at data from one year (2007). Lets create an animation to see the population growth of the continents through the years


In [85]:
# YOUR CODE HERE
import plotly.graph_objects as go
import numpy as np


fig = go.Figure()


continent_colors = {
    "Asia": "rgb(44, 160, 44)",    
    "Americas": "rgb(255, 0, 0)",   
    "Africa": "rgb(31, 119, 180)",  
    "Europe": "rgb(148, 0, 211)",   
    "Oceania": "rgb(255, 165, 0)"   
}

for year in np.arange(1952, 2008, 5):
    df = px.data.gapminder()
    df = df[df["year"] == year]
    grouped_df = df.groupby("continent").agg({"pop": "sum"}).reset_index()
    
    sorted_df = grouped_df.sort_values(by="pop", ascending=True)
    colors = [continent_colors[continent] for continent in sorted_df["continent"]]

    fig.add_trace(
        go.Bar(
            x=sorted_df["pop"],
            y=sorted_df["continent"],
            orientation='h',
            name=str(year),
            marker_color=colors
        ) 
    )
    

fig.data[10].visible = True

steps = []
for i in range(len(fig.data)):
    step = dict(
        method="restyle",
        args=["visible", [False] * len(fig.data)],
        label=str(1952 + 5*i)
    )
    step["args"][1][i] = True 
    steps.append(step)

sliders = [dict(
    active=10,
    steps=steps,
    pad={"t": 50},
    currentvalue={"prefix": "Year: "}
)]

xaxis_range = [0, sorted_df["pop"].max() * 1.1] 

fig.update_layout(
    sliders=sliders,
    xaxis_title="Population",
    yaxis_title="Continent",
    xaxis=dict(range=xaxis_range)
)


fig.show()

## Question 5:

Instead of the continents, lets look at individual countries. Create an animation that shows the population growth of the countries through the years

In [105]:
# YOUR CODE HERE
import plotly.graph_objects as go
import numpy as np
import random as random

fig = go.Figure()

df = px.data.gapminder()
country_colors = {}
 
for i, country in enumerate(df["country"]):
    if country not in country_colors:
        country_colors[country] = f'rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})'

print(country_colors)

for year in np.arange(1952, 2008, 5):
    df = px.data.gapminder()
    df = df[df["year"] == year]
    
    sorted_df = df.sort_values(by="pop", ascending=True)

    colors = [country_colors[country] for country in sorted_df["country"]]

    fig.add_trace(
        go.Bar(
            x=sorted_df["pop"],
            y=sorted_df["country"],
            orientation='h',
            name=str(year),
            marker_color=colors
        ) 
    )
    

fig.data[10].visible = True

steps = []
for i in range(len(fig.data)):
    step = dict(
        method="restyle",
        args=["visible", [False] * len(fig.data)],
        label=str(1952 + 5*i)
    )
    step["args"][1][i] = True 
    steps.append(step)

sliders = [dict(
    active=10,
    steps=steps,
    pad={"t": 50},
    currentvalue={"prefix": "Year: "}
)]

xaxis_range = [0, sorted_df["pop"].max() * 1.1] 

fig.update_layout(
    sliders=sliders,
    xaxis_title="Population",
    yaxis_title="Continent",
    xaxis=dict(range=xaxis_range)
)


fig.show()

{'Afghanistan': 'rgb(95, 88, 46)', 'Albania': 'rgb(23, 10, 213)', 'Algeria': 'rgb(63, 73, 236)', 'Angola': 'rgb(44, 17, 37)', 'Argentina': 'rgb(160, 119, 17)', 'Australia': 'rgb(175, 41, 81)', 'Austria': 'rgb(216, 159, 197)', 'Bahrain': 'rgb(228, 43, 212)', 'Bangladesh': 'rgb(25, 165, 29)', 'Belgium': 'rgb(190, 42, 67)', 'Benin': 'rgb(119, 0, 143)', 'Bolivia': 'rgb(176, 146, 85)', 'Bosnia and Herzegovina': 'rgb(39, 86, 255)', 'Botswana': 'rgb(88, 51, 168)', 'Brazil': 'rgb(235, 51, 192)', 'Bulgaria': 'rgb(189, 229, 65)', 'Burkina Faso': 'rgb(32, 83, 38)', 'Burundi': 'rgb(115, 219, 219)', 'Cambodia': 'rgb(3, 145, 133)', 'Cameroon': 'rgb(73, 8, 231)', 'Canada': 'rgb(8, 13, 247)', 'Central African Republic': 'rgb(243, 5, 191)', 'Chad': 'rgb(177, 155, 212)', 'Chile': 'rgb(146, 7, 173)', 'China': 'rgb(60, 70, 145)', 'Colombia': 'rgb(253, 64, 109)', 'Comoros': 'rgb(20, 245, 226)', 'Congo, Dem. Rep.': 'rgb(44, 66, 213)', 'Congo, Rep.': 'rgb(219, 29, 56)', 'Costa Rica': 'rgb(120, 242, 206)', "C

## Question 6:

Clean up the country animation. Set the height size of the figure to 1000 to have a better view of the animation

In [106]:
# YOUR CODE HERE
import plotly.graph_objects as go
import numpy as np
import random as random

fig = go.Figure()

df = px.data.gapminder()
country_colors = {}


for i, country in enumerate(df["country"]):
    if country not in country_colors:
        country_colors[country] = f'rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})'

for year in np.arange(1952, 2008, 5):
    df = px.data.gapminder()
    df = df[df["year"] == year]
    
    sorted_df = df.sort_values(by="pop", ascending=True)

    colors = [country_colors[country] for country in sorted_df["country"]]

    fig.add_trace(
        go.Bar(
            x=sorted_df["pop"],
            y=sorted_df["country"],
            orientation='h',
            name=str(year),
            marker_color=colors
        ) 
    )
    

fig.data[10].visible = True

steps = []
for i in range(len(fig.data)):
    step = dict(
        method="restyle",
        args=["visible", [False] * len(fig.data)],
        label=str(1952 + 5*i)
    )
    step["args"][1][i] = True  
    steps.append(step)

sliders = [dict(
    active=10,
    steps=steps,
    pad={"t": 50},
    currentvalue={"prefix": "Year: "}
)]

xaxis_range = [0, sorted_df["pop"].max() * 1.1]  

fig.update_layout(
    sliders=sliders,
    xaxis_title="Population",
    yaxis_title="Continent",
    xaxis=dict(range=xaxis_range),
    height = 1000
)


fig.show()

## Question 7:

Show only the top 10 countries in the animation

Hint: Use the axis limit to set this. 

In [110]:
# YOUR CODE HERE
import plotly.graph_objects as go
import numpy as np
import random as random

fig = go.Figure()

df = px.data.gapminder()
country_colors = {}

for i, country in enumerate(df["country"]):
    if country not in country_colors:
        country_colors[country] = f'rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})'

for year in np.arange(1952, 2008, 5):
    df = px.data.gapminder()
    df = df[df["year"] == year]
    
    sorted_df = df.sort_values(by="pop", ascending=False)
    top_sorted_df = sorted_df.head(10)
    double_sorted_df = top_sorted_df.sort_values(by="pop", ascending=True)

    colors = [country_colors[country] for country in double_sorted_df["country"]]


    fig.add_trace(
        go.Bar(
            x=double_sorted_df["pop"],
            y=double_sorted_df["country"],
            orientation='h',
            name=str(year),
            marker_color=colors
        ) 
    )
    

fig.data[10].visible = True

steps = []
for i in range(len(fig.data)):
    step = dict(
        method="restyle",
        args=["visible", [False] * len(fig.data)],
        label=str(1952 + 5*i)
    )
    step["args"][1][i] = True
    steps.append(step)

sliders = [dict(
    active=10,
    steps=steps,
    pad={"t": 50},
    currentvalue={"prefix": "Year: "}
)]

xaxis_range = [0, sorted_df["pop"].max() * 1.1]

fig.update_layout(
    sliders=sliders,
    xaxis_title="Population",
    yaxis_title="Continent",
    xaxis=dict(range=xaxis_range),
)


fig.show()