In [2]:
import pandas as pd
from raceplotly.plots import barplot

In [4]:
data = pd.read_csv('https://raw.githubusercontent.com/lc5415/raceplotly/main/example/FAOSTAT_data.csv')

my_raceplot = barplot(data,  item_column='Item', value_column='Value', time_column='Year')

my_raceplot.plot(item_label = 'Top 10 crops',
                 value_label = 'Production quantity (tonnes)',
                 time_label = 'Year: ', ## overwrites default `Date: `
                 frame_duration = 300)

In [7]:
df = pd.read_csv('25_noms_padro_any_sexe_1996_2019.csv')



In [8]:
df.head()

Unnamed: 0,Ordre,Nom,Sexe,Any,Nombre
0,1,LAURA,Dona,1996,237
1,2,MARIA,Dona,1996,219
2,3,MARTA,Dona,1996,206
3,4,ANDREA,Dona,1996,199
4,5,ANNA,Dona,1996,195


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1200 entries, 0 to 1199
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Ordre   1200 non-null   int64 
 1   Nom     1200 non-null   object
 2   Sexe    1200 non-null   object
 3   Any     1200 non-null   int64 
 4   Nombre  1200 non-null   int64 
dtypes: int64(3), object(2)
memory usage: 47.0+ KB


In [10]:

cat_to_eng = {'Ordre': 'Order', 'Nom': 'Name', 'Sexe': 'Sex', 'Any': 'Year', 'Nombre': 'Number'}


df.rename(columns=cat_to_eng, inplace=True)

In [11]:
df.Sex.unique()

array(['Dona', 'Home'], dtype=object)

In [12]:




df.replace({'Dona': 'Woman', 'Home': 'Man'}, inplace=True)

df.head()

Unnamed: 0,Order,Name,Sex,Year,Number
0,1,LAURA,Woman,1996,237
1,2,MARIA,Woman,1996,219
2,3,MARTA,Woman,1996,206
3,4,ANDREA,Woman,1996,199
4,5,ANNA,Woman,1996,195


In [13]:
df.Name.unique()

array(['LAURA', 'MARIA', 'MARTA', 'ANDREA', 'ANNA', 'PAULA', 'ALBA',
       'SARA', 'LAIA', 'CLAUDIA', 'JULIA', 'CARLA', 'MIREIA', 'MARINA',
       'CRISTINA', 'CLARA', 'JUDITH', 'ARIADNA', 'NURIA', 'PATRICIA',
       'RAQUEL', 'ANA', 'IRENE', 'JUDIT', 'SANDRA', 'BERTA', 'MAR',
       'AINA', 'HELENA', 'NATALIA', 'CARLOTA', 'EMMA', 'NEREA', 'LUCIA',
       'MARTINA', 'AINHOA', 'SOFIA', 'NOA', 'JANA', 'ABRIL', 'DANIELA',
       'INES', 'ONA', 'NORA', 'OLIVIA', 'VALENTINA', 'ARLET', 'MIA',
       'CHLOE', 'VALERIA', 'GALA', 'BRUNA', 'LIA', 'MARC', 'DAVID',
       'DANIEL', 'ALBERT', 'VICTOR', 'JORDI', 'SERGI', 'ALEJANDRO', 'POL',
       'ALEX', 'PAU', 'GUILLEM', 'CARLOS', 'ORIOL', 'ADRIA', 'ROGER',
       'OSCAR', 'JAVIER', 'GERARD', 'ARNAU', 'IVAN', 'XAVIER', 'JOAN',
       'ERIC', 'PABLO', 'ADRIAN', 'SERGIO', 'JOEL', 'MARTI', 'RAUL',
       'JAN', 'ALVARO', 'HUGO', 'IKER', 'BIEL', 'NICOLAS', 'BRUNO',
       'LUCAS', 'MAX', 'NIL', 'ALEIX', 'LEO', 'MATEO', 'MARIO', 'ROC',
       'GAEL', 

In [14]:
df.Name.nunique()

106

In [15]:
df['Name'] = df['Name'].apply(lambda x: x.strip())

In [16]:
df.Name.nunique()

106

In [21]:
df.count()

Order     1200
Name      1200
Sex       1200
Year      1200
Number    1200
dtype: int64

In [20]:
df_woman = df[(df['Order'] <= 10) & (df['Sex'] == 'Woman')]

df_woman.count()

Order     240
Name      240
Sex       240
Year      240
Number    240
dtype: int64

In [23]:
df.Order.unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25], dtype=int64)

In [24]:
df_man = df[(df['Order'] <= 10) & (df['Sex'] == 'Man')]

df_man.head()

Unnamed: 0,Order,Name,Sex,Year,Number
600,1,MARC,Man,1996,295
601,2,DAVID,Man,1996,198
602,3,DANIEL,Man,1996,174
603,4,ALBERT,Man,1996,151
604,5,VICTOR,Man,1996,145


In [25]:
import random

def name_to_color(names, r_min=0, r_max=255, g_min=0, g_max=255, b_min=0, b_max=255):
    """Mapping of names to random rgb colors.
    Parameters:
    df (Series): Pandas Series containing names.
    r_min (int): Mininum intensity of the red channel (default 0).
    r_max (int): Maximum intensity of the red channel (default 255).
    g_min (int): Mininum intensity of the green channel (default 0).
    g_max (int): Maximum intensity of the green channel (default 255).
    b_min (int): Mininum intensity of the blue channel (default 0).
    b_max (int): Maximum intensity of the blue channel (default 255).

    Returns:
    dictionary: Mapping of names (keys) to random rgb colors (values)

    """
    mapping_colors = dict()
    
    for name in names.unique():
        red = random.randint(r_min, r_max)
        green = random.randint(g_min, g_max)
        blue = random.randint(b_min, b_max)
        rgb_string = 'rgb({}, {}, {})'.format(red, green, blue)
    
        mapping_colors[name] = rgb_string
    
    return mapping_colors

In [26]:
# dictionary for female names
mapping_colors_woman = name_to_color(df_woman.Name, 125, 255, 0, 185, 0, 185)

# show the mapping
print(mapping_colors_woman)

{'LAURA': 'rgb(223, 4, 38)', 'MARIA': 'rgb(177, 34, 36)', 'MARTA': 'rgb(132, 136, 19)', 'ANDREA': 'rgb(205, 40, 126)', 'ANNA': 'rgb(152, 133, 135)', 'PAULA': 'rgb(133, 75, 35)', 'ALBA': 'rgb(217, 141, 4)', 'SARA': 'rgb(158, 151, 172)', 'LAIA': 'rgb(165, 10, 135)', 'CLAUDIA': 'rgb(247, 126, 0)', 'JULIA': 'rgb(235, 1, 55)', 'CARLA': 'rgb(238, 131, 95)', 'MARINA': 'rgb(253, 99, 98)', 'LUCIA': 'rgb(145, 34, 77)', 'MARTINA': 'rgb(181, 133, 117)', 'JANA': 'rgb(236, 45, 102)', 'AINA': 'rgb(161, 131, 14)', 'SOFIA': 'rgb(182, 156, 178)', 'NOA': 'rgb(229, 93, 148)', 'EMMA': 'rgb(138, 28, 180)', 'VALENTINA': 'rgb(190, 32, 109)', 'CHLOE': 'rgb(239, 180, 145)', 'ONA': 'rgb(234, 161, 43)', 'MIA': 'rgb(125, 57, 145)', 'OLIVIA': 'rgb(175, 131, 105)'}


In [27]:
# dictionary for males names
mapping_colors_man = name_to_color(df_man.Name, 0, 185, 0, 185, 125, 255)
# show the mapping
print(mapping_colors_man)

{'MARC': 'rgb(80, 7, 230)', 'DAVID': 'rgb(163, 37, 158)', 'DANIEL': 'rgb(28, 95, 195)', 'ALBERT': 'rgb(135, 100, 197)', 'VICTOR': 'rgb(121, 182, 151)', 'JORDI': 'rgb(104, 159, 125)', 'SERGI': 'rgb(168, 109, 230)', 'ALEJANDRO': 'rgb(71, 100, 182)', 'POL': 'rgb(101, 48, 131)', 'ALEX': 'rgb(125, 55, 229)', 'PAU': 'rgb(149, 97, 174)', 'ARNAU': 'rgb(165, 83, 178)', 'GERARD': 'rgb(73, 170, 221)', 'JOAN': 'rgb(31, 50, 161)', 'PABLO': 'rgb(82, 140, 155)', 'JAN': 'rgb(169, 40, 253)', 'MARTI': 'rgb(73, 64, 185)', 'ERIC': 'rgb(71, 113, 158)', 'HUGO': 'rgb(9, 174, 250)', 'BIEL': 'rgb(81, 69, 154)', 'NIL': 'rgb(46, 116, 136)', 'BRUNO': 'rgb(63, 74, 158)', 'LUCAS': 'rgb(71, 16, 248)', 'LEO': 'rgb(4, 105, 181)'}


In [28]:
# create a column Color - matching colors with names (according to the mapping_colors_woman dictionary)
df_woman['Color'] = df_woman['Name'].map(mapping_colors_woman)

# create a column Color - matching colors with names (according to the mapping_colors_man dictionary)
df_man['Color'] = df_man['Name'].map(mapping_colors_man)

# first five rows of the df_man data frame
df_man.head()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,Order,Name,Sex,Year,Number,Color
600,1,MARC,Man,1996,295,"rgb(80, 7, 230)"
601,2,DAVID,Man,1996,198,"rgb(163, 37, 158)"
602,3,DANIEL,Man,1996,174,"rgb(28, 95, 195)"
603,4,ALBERT,Man,1996,151,"rgb(135, 100, 197)"
604,5,VICTOR,Man,1996,145,"rgb(121, 182, 151)"


In [29]:
import plotly.graph_objects as go


def frames_animation(df, title):
    """Creation of a sequence of frames.
    Parameters:
    df (DataFrame): Pandas data frame containing the categorical variable ['Name'],
    the count ['Number'], the year ['Year'], and the color['Color'] (separated columns).
    title (string): Title of each frame.
    Returns:
    list_of_frames (list): List of frames. Each frame contains a bar plot of a year.
    """
    
    list_of_frames = []
    initial_year = df['Year'].min()
    final_year = df['Year'].max()

    for year in range(initial_year, final_year):
            fdata = df[df['Year'] == year]
            list_of_frames.append(go.Frame(data=[go.Bar(x=fdata['Name'], y=fdata['Number'],
                                                        marker_color=fdata['Color'], hoverinfo='none',
                                                        textposition='outside', texttemplate='%{x}<br>%{y}',
                                                        cliponaxis=False)],
                                           layout=go.Layout(font={'size': 14},
                                                            plot_bgcolor = '#FFFFFF',
                                                            xaxis={'showline': False, 'visible': False},
                                                            yaxis={'showline': False, 'visible': False},
                                                            bargap=0.15,
                                                            title=title + str(year))))
    return list_of_frames 

In [30]:
def bar_race_plot (df, title, list_of_frames):
    """Creation of the bar chart race figure.
    Parameters:
    df (DataFrame): Pandas data frame containing the categorical variable ['Name'],
    the count ['Number'], the year ['Year'], and the color ['Color'] (separated columns).
    title (string): Title of the initial bar plot.
    list_of_frames (list): List of frames. Each frame contains a bar plot of a year.
    Returns:
    fig (figure instance): Bar chart race
    """
    
    # initial year - names (categorical variable), number of babies (numerical variable), and color
    initial_year = df['Year'].min()
    initial_names = df[df['Year'] == initial_year].Name
    initial_numbers = df[df['Year'] == initial_year].Number
    initial_color = df[df['Year'] == initial_year].Color
    range_max = df['Number'].max()
    
    fig = go.Figure(
        data=[go.Bar(x=initial_names, y=initial_numbers,
                       marker_color=initial_color, hoverinfo='none',
                       textposition='outside', texttemplate='%{x}<br>%{y}',
                       cliponaxis=False)],
        layout=go.Layout(font={'size': 14}, plot_bgcolor = '#FFFFFF',
                         xaxis={'showline': False, 'visible': False},
                         yaxis={'showline': False, 'visible': False, 'range': (0, range_max)},
                         bargap=0.15, title=title + str(initial_year),
                         updatemenus=[dict(type="buttons",
                                           buttons=[dict(label="Play",
                                                         method="animate",
                                                         args=[None,{"frame": {"duration": 2000, "redraw": True}, "fromcurrent": True}]),
                                                    dict(label="Stop",
                                                         method="animate",
                                                         args=[[None],{"frame": {"duration": 0, "redraw": False}, "mode": "immediate","transition": {"duration": 0}}])])]),
        frames=list(list_of_frames))
    
    return fig

In [31]:
title = 'Most popular baby names of '
list_of_frames = frames_animation(df_woman, title)
fig = bar_race_plot(df_woman, title, list_of_frames)
fig.show()

In [32]:
title = 'Most popular baby names of '
list_of_frames = frames_animation(df_man, title)
fig = bar_race_plot(df_man, title, list_of_frames)
fig.show()

In [33]:
df_man

Unnamed: 0,Order,Name,Sex,Year,Number,Color
600,1,MARC,Man,1996,295,"rgb(80, 7, 230)"
601,2,DAVID,Man,1996,198,"rgb(163, 37, 158)"
602,3,DANIEL,Man,1996,174,"rgb(28, 95, 195)"
603,4,ALBERT,Man,1996,151,"rgb(135, 100, 197)"
604,5,VICTOR,Man,1996,145,"rgb(121, 182, 151)"
...,...,...,...,...,...,...
1180,6,POL,Man,2019,81,"rgb(101, 48, 131)"
1181,7,ERIC,Man,2019,79,"rgb(71, 113, 158)"
1182,8,NIL,Man,2019,77,"rgb(46, 116, 136)"
1183,9,PAU,Man,2019,77,"rgb(149, 97, 174)"


In [34]:
my_raceplot = barplot(df_man,  item_column='Name', value_column='Number', time_column='Year')

my_raceplot.plot(item_label = 'Top 10 crops',
                 value_label = 'Production quantity (tonnes)',
                 time_label = 'Year: ', ## overwrites default `Date: `
                 frame_duration = 300)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [35]:
import pandas as pd
import numpy as np
import chart_studio.plotly as py
import cufflinks as cf
import seaborn as sns
import plotly.express as px

In [36]:
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
# Use Plotly locally
cf.go_offline()

In [40]:
px.bar(df_man, x="Name", y="Number", color="Name",
 animation_frame="Year" )