<a href="https://colab.research.google.com/github/FintradeLab/Data_Viz_Class/blob/main/06_Animation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **1. Libraries**

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# **2. Textbook example**

In [None]:
tmp = px.data.gapminder()
px.scatter(tmp, x = 'gdpPercap', y = 'lifeExp',
           size = 'pop', color = 'continent', hover_name = 'country',
           log_x = True, range_x = [200,100000], range_y = [30,100],size_max = 40,
           animation_group = 'country', animation_frame = 'year')

# **3. Real World**

## Load Data

In [None]:
# read the file

## Save the path
p = '/content/data.xlsx'

## Read y create the dataframe
d = pd.read_excel(p)
d.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1085 entries, 0 to 1084
Data columns (total 8 columns):
 #   Column                                                    Non-Null Count  Dtype  
---  ------                                                    --------------  -----  
 0   Country Name                                              1085 non-null   object 
 1   Country Code                                              1085 non-null   object 
 2   Year                                                      1085 non-null   int64  
 3   Population, total [SP.POP.TOTL]                           1085 non-null   int64  
 4   Population growth (annual %) [SP.POP.GROW]                1085 non-null   float64
 5   Surface area (sq. km) [AG.SRF.TOTL.K2]                    1077 non-null   object 
 6   Life expectancy at birth, total (years) [SP.DYN.LE00.IN]  1046 non-null   float64
 7   Net migration [SM.POP.NETM]                               1085 non-null   int64  
dtypes: float64(2), int6

In [None]:
# Year to year
d.rename(columns = {'Year':'year'}, inplace = True)

# Country Name to country
d.rename(columns = {'Country Name':'country'}, inplace = True)

# Country Code to code
d.rename(columns = {'Country Code':'code'}, inplace = True)

# Population to pop
d.rename(columns = {'Population, total [SP.POP.TOTL]':'pop'}, inplace = True)

# Population Growth to pop_g
d.rename(columns = {'Population growth (annual %) [SP.POP.GROW]':'pop_g'}, inplace = True)

# Surface area to sur
d.rename(columns = {'Surface area (sq. km) [AG.SRF.TOTL.K2]':'sur'}, inplace = True)

# Life to lexp
d.rename(columns = {'Life expectancy at birth, total (years) [SP.DYN.LE00.IN]':'lexp'},
         inplace = True)

# Net Migration to net
d.rename(columns = {'Net migration [SM.POP.NETM]':'net'}, inplace = True)

d.head()

Unnamed: 0,country,code,year,pop,pop_g,sur,lexp,net
0,Afghanistan,AFG,2017,35643418,2.866492,652860,63.016,-47090
1,Albania,ALB,2017,2873457,-0.091972,28750,79.047,-9768
2,Algeria,DZA,2017,41136546,1.957002,2381741,75.743,-36448
3,American Samoa,ASM,2017,49463,-1.971819,200,,-1652
4,Andorra,AND,2017,73837,1.772183,470,,887


In [None]:
d = d.dropna()
d.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1038 entries, 0 to 1084
Data columns (total 8 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   country  1038 non-null   object 
 1   code     1038 non-null   object 
 2   year     1038 non-null   int64  
 3   pop      1038 non-null   int64  
 4   pop_g    1038 non-null   float64
 5   sur      1038 non-null   object 
 6   lexp     1038 non-null   float64
 7   net      1038 non-null   int64  
dtypes: float64(2), int64(3), object(3)
memory usage: 73.0+ KB


In [None]:
# Duplicate year as string / How to create a column
d['year_str'] = d['year'].astype(str)
d.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1038 entries, 0 to 1084
Data columns (total 9 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   country   1038 non-null   object 
 1   code      1038 non-null   object 
 2   year      1038 non-null   int64  
 3   pop       1038 non-null   int64  
 4   pop_g     1038 non-null   float64
 5   sur       1038 non-null   object 
 6   lexp      1038 non-null   float64
 7   net       1038 non-null   int64  
 8   year_str  1038 non-null   object 
dtypes: float64(2), int64(3), object(4)
memory usage: 81.1+ KB




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



## Replicating the text book example

In [None]:
px.scatter(d, x = 'sur', y = 'net',
           size = 'pop', hover_name = 'country',
           log_x = True, range_x = [10000,12000000], size_max = 40,
           animation_group = 'country', animation_frame = 'year')

## Select a subsample

In [None]:
## Create a list to select countries
c_list = ['Colombia','Chile','Peru','Mexico','Brazil','Argentina',
          'Venezuela', 'Ecuador', 'Uruguay','Paraguay']

## Select from the dataframe (unindexed)
latam = d[d['country'].isin(c_list)]

latam.head()



Unnamed: 0,country,code,year,pop,pop_g,sur,lexp,net,year_str
7,Argentina,ARG,2017,44044811,1.037134,2780400,76.833,4926,2017
26,Brazil,BRA,2017,208504960,0.792263,8515770,74.827,56362,2017
40,Chile,CHL,2017,18368577,1.562055,756700,80.35,214697,2017
42,Colombia,COL,2017,48351671,1.51229,1141749,76.646,415618,2017
57,Ecuador,ECU,2017,16696944,1.553356,256370,76.972,49359,2017


In [None]:
px.scatter(latam, x = 'sur', y = 'net',
           size = 'pop', hover_name = 'country', color = 'country',
           log_x = True, range_x = [10000,12000000], size_max = 40,
           animation_group = 'country', animation_frame = 'year')

## Problem

With line does not work.

In [None]:
px.line(latam, x = 'sur', y = 'net',
           hover_name = 'country', color = 'country',
           log_x = True, range_x = [10000,12000000],
           animation_group = 'country', animation_frame = 'year')

# **4. Animation from Scratch**

## 4.1. Adjust the data frame

In [None]:
### rows    = years
### columns = country
### values  = net

l2 = latam.pivot(index   = "year",
                    columns = "country",
                    values  = "net")
l2 = l2.reset_index()
l2['year_str'] = l2['year'].astype(str)
l2.head()

country,year,Argentina,Brazil,Chile,Colombia,Ecuador,Mexico,Paraguay,Peru,Uruguay,year_str
0,2017,4926,56362,214697,415618,49359,-12438,-16628,104242,-3090,2017
1,2018,4690,67934,237358,494364,153372,-99430,-16470,326831,-2963,2018
2,2019,4478,69186,230162,395803,71801,-47764,-16270,136558,-2902,2019
3,2020,2344,56880,98439,229437,35894,-9949,-8230,79456,-1486,2020
4,2021,2344,20376,113709,211978,35894,-52649,-8230,68012,-1486,2021


## 4.2. Create the frame

In [None]:
# The frame
fig = go.Figure(
    layout = go.Layout(
        updatemenus = [dict(type = 'buttons', direction = 'right', x = 0.9, y = 1.16),],
        xaxis = dict(range = ['2017', '2021'],
                     autorange = False,
                     tickwidth = 2, dtick = 1,
                     title_text = 'Year'),
        yaxis = dict(range = [-1000000, 1000000],
                     autorange = False,
                     title_text = ''),
        title = 'Net Migration to principal LATAM countries',
        title_font_size = 30,
        title_x = 0.5
    )
)
fig.show()

In [None]:
## Add Traces
init = 1

### Colombia
fig.add_trace(
    go.Scatter(
        x = l2.year[:init],
        y = l2.Colombia[:init],
        name = 'Colombia',
        line = dict(color='black'),
        mode = 'lines'
    )
)

### Chile
fig.add_trace(
    go.Scatter(
        x = l2.year[:init],
        y = l2.Chile[:init],
        name = 'Chile',
        line = dict(color='red'),
        mode = 'lines'
    )
)

In [None]:
## Frames
frames = [
    go.Frame(
        data = [
            go.Scatter(x=l2.year[:k], y=l2.Colombia[:k]),
            go.Scatter(x=l2.year[:k], y=l2.Chile[:k])
        ]
    )
    for k in range(init,len(l2)+1)
]

## Animation
fig.update(frames=frames)

In [None]:
## Play button
fig.update_layout(
    updatemenus = [
        dict(
            buttons = list([
                dict(
                label = 'Play',
                method = 'animate',
                args = [None,{'frame':{'duration':800}}]
                )
            ]

            )
        )
    ]
)