In [83]:
# Load libraries
import pandas as pd
import plotly.express as px

import warnings
warnings.filterwarnings('ignore')

In [84]:
# Load data
# Example data
data =  px.data.gapminder()
data.head()

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
0,Afghanistan,Asia,1952,28.801,8425333,779.445314,AFG,4
1,Afghanistan,Asia,1957,30.332,9240934,820.85303,AFG,4
2,Afghanistan,Asia,1962,31.997,10267083,853.10071,AFG,4
3,Afghanistan,Asia,1967,34.02,11537966,836.197138,AFG,4
4,Afghanistan,Asia,1972,36.088,13079460,739.981106,AFG,4


In [85]:
# Check for missing values
data.isnull().sum().sum()

np.int64(0)

In [86]:
# Data inofrmation
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1704 entries, 0 to 1703
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   country    1704 non-null   object 
 1   continent  1704 non-null   object 
 2   year       1704 non-null   int64  
 3   lifeExp    1704 non-null   float64
 4   pop        1704 non-null   int64  
 5   gdpPercap  1704 non-null   float64
 6   iso_alpha  1704 non-null   object 
 7   iso_num    1704 non-null   int64  
dtypes: float64(2), int64(3), object(3)
memory usage: 106.6+ KB


In [87]:
# Convert year to datetime object
data['country'] = data['country'].astype('category')
data['continent'] = data['continent'].astype('category')

In [88]:
data.describe()

Unnamed: 0,year,lifeExp,pop,gdpPercap,iso_num
count,1704.0,1704.0,1704.0,1704.0,1704.0
mean,1979.5,59.474439,29601210.0,7215.327081,425.880282
std,17.26533,12.917107,106157900.0,9857.454543,248.305709
min,1952.0,23.599,60011.0,241.165876,4.0
25%,1965.75,48.198,2793664.0,1202.060309,208.0
50%,1979.5,60.7125,7023596.0,3531.846989,410.0
75%,1993.25,70.8455,19585220.0,9325.462346,638.0
max,2007.0,82.603,1318683000.0,113523.1329,894.0


Population by Continent

In [89]:
fig = px.bar(data, x='continent', y='pop', title='Population by continent')
fig.update_traces(opacity=1, marker_line_width=0)

fig.show()

__Univariate Visualization__

This plot display only one variable. This can be:
- Bar chart
- Histogram
- Box plot
- Density plots

In [90]:
# Histogram of sales
fig = px.histogram(data, x='lifeExp', nbins=20, title='Life Expectancy Distribution')
fig.show()

Box plot

In [91]:
# Box plot of sales
fig = px.box(data, y='iso_num', title='ISO num Distribution')
fig.show()

Customizing Colours

Using _color_ argument to set color for each bar automatically

In [92]:
# Plot sales by country colour by category
fig = px.bar(data, x='continent', y='pop', color='continent', title='Population by Country')
fig.update_traces(opacity=1, marker_line_width=0)

fig.show()

Using *color_discrete_map* to define a specific color for each member in a column

In [93]:
# Plot sales by country
fig = px.bar(data, x='continent', y='pop', title='Population by continent',
             color_discrete_map = {'Asia': 'blue', 'Europe': 'red', 'Africa':'green',
                                   'Americas':'black', 'Oceania':'purple'},
             color='continent')
fig.update_traces(opacity=1, marker_line_width=0)
fig.show()

Using *color_continuous_scale* for color gradient

In [94]:
fig = px.bar(data, x='continent', y='pop', title='Sleep Hours by Stress Level',
             color= 'pop', color_continuous_scale = 'inferno')
fig.update_traces(opacity=1, marker_line_width=0)
fig.show()

Constructing color range

In [95]:
scale = ['rgb(255,0,0)', 'rgb(255,255,0)', 'rgb(0,255,0)', 'rgb(0,0,255)']
fig = px.bar(data, x='continent', y='pop', title='Sleep Hours by Stress Level',
             color= 'pop', color_continuous_scale = scale)
fig.update_traces(opacity=1, marker_line_width=0)
fig.show()

__Bivariate Visualization__

Display a comparison of two variables. This can be
- scatter plots
- Line plot
- Correlation plot

Scatter Plot

In [96]:
# Scatter plot
fig = px.scatter(data, 
                 x = 'lifeExp', y='gdpPercap', 
                 title="Scatter plot of GDP per cap Versus Life Expectancy")
fig.show()

Line plot

In [97]:
# Let plot population over of a Nigeria
# Filter rows where country is Nigeria
nigeria_df = data[data['country'] == 'Nigeria']
fig = px.line(nigeria_df, x='year', y='pop',
              title = 'Nigerian Population Growth')
fig.show()

Correlation plot

In [98]:
numerical_column = data[data.select_dtypes(include=['int', 'float']).columns]
data_corr = numerical_column.corr(method='pearson')
round(data_corr,3)

Unnamed: 0,year,lifeExp,pop,gdpPercap,iso_num
year,1.0,0.436,0.082,0.227,0.0
lifeExp,0.436,1.0,0.065,0.584,-0.007
pop,0.082,0.065,1.0,-0.026,-0.06
gdpPercap,0.227,0.584,-0.026,1.0,0.008
iso_num,0.0,-0.007,-0.06,0.008,1.0


In [99]:
# Correlation plot
fig = px.imshow(data_corr, text_auto=True,
                color_continuous_scale=scale,
                zmin=-1, zmax=1)
fig.show()

Customizing hover information and legends

*Hover information* is the text that appears when a mouse moves over a data point. The default hovermode is *closest*, however this can be change.

In [100]:
# Nigerian population
nigeria_df = data[data['country'] == 'Nigeria']
fig = px.line(nigeria_df, x='year', y='pop',
              hover_name= 'country',
              hover_data=['pop'],
              title = 'Nigerian Population Growth')
fig.show()

In [101]:
# Create the scatterplot
fig = px.scatter(
  data_frame=data, 
  x="lifeExp", 
  y="gdpPercap",
  color="continent",
  # Add columns to the hover information
  hover_data=["continent", "lifeExp", "gdpPercap"],
  # Add a bold variable in hover information
  hover_name = "country"
)

# Show the plot
fig.show()

In [102]:
# Plot sales by country
fig = px.bar(data, x='continent', y='pop', title='Population by continent',
             color_discrete_map = {'Asia': 'blue', 'Europe': 'red', 'Africa':'green',
                                   'Americas':'black', 'Oceania':'purple'},
             color='continent')
fig.update_traces(opacity=1, marker_line_width=0)
fig.update_layout(
    showlegend=True,
    legend_title_text="continent",
    legend=dict(
        x=0.9,
        y=1.0,
        bgcolor="orange"
    )
)
fig.show()

In [103]:
# Create the scatterplot
fig = px.scatter(
        data_frame=data, 
        x="lifeExp", 
        y="gdpPercap",
  		color="continent")

# Create legend dictionary
my_legend = dict(x=0.2, y=0.95)

# Update the figure
fig.update_layout(showlegend=True, legend=my_legend)

# Show the plot
fig.show()

Adding annotations

In [104]:
# Create the scatterplot
fig = px.scatter(
  data_frame=data, 
  x="lifeExp", 
  y="gdpPercap",
  color="continent",
  # Add columns to the hover information
  hover_data=["continent", "lifeExp", "gdpPercap"],
  # Add a bold variable in hover information
  hover_name = "country"
)
annot = dict(x=82.603, y=31656.06806,
             showarrow=True, arrowhead=3,
             text='Highest Life Expectancy',
             font=dict(size=10, color='red'))
fig.update_layout(annotations=[annot])
# Show the plot
fig.show()

In [105]:
# Create the scatterplot
fig = px.scatter(
  data_frame=data, 
  x="lifeExp", 
  y="gdpPercap",
  color="continent",
  # Add columns to the hover information
  hover_data=["continent", "lifeExp", "gdpPercap"],
  # Add a bold variable in hover information
  hover_name = "country"
)
annot = dict(x=72.961, y=4959.114854,
             showarrow=True, arrowhead=5,
             text='Largest Population',
             font=dict(size=15, color='red'))
fig.update_layout(annotations=[annot])
# Show the plot
fig.show()

Floating annotation

In [106]:
# Create the scatterplot
fig = px.scatter(
  data_frame=data, 
  x="lifeExp", 
  y="gdpPercap",
  color="continent",
  # Add columns to the hover information
  hover_data=["continent", "lifeExp", "gdpPercap"],
  # Add a bold variable in hover information
  hover_name = "country"
)

float_annotation = dict(xref='paper', yref='paper',
                        x=0.5, y=0.8, showarrow=False,
                        text="Largest Population",
                        font=dict(size=15,color='black'), bgcolor="rgb(250,0,0)")
fig.update_layout(annotations=[float_annotation])
# Show the plot
fig.show()

In [107]:
data[data['pop'] == data['pop'].max()]

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
299,China,Asia,2007,72.961,1318683096,4959.114854,CHN,156


In [108]:
data['continent'].value_counts()

continent
Africa      624
Asia        396
Europe      360
Americas    300
Oceania      24
Name: count, dtype: int64

In [109]:
fig = px.bar(data['continent'].value_counts())
# Create the first annotation
high_annotation = dict(
    x='Africa', y=624, text="African Countries",
    font=dict(color="black"), showarrow=True, arrowhead=4
)

# Create the second annotation
low_annotation = dict(
    x='Oceania', y=24, text="Oceanic countries",
    font=dict(color="black"), showarrow=True, arrowhead=4,
)

# Add annotations to the figure
fig.update_layout(annotations=[high_annotation, low_annotation])

# Show the plot
fig.show()

Editing plot axes

In [110]:
# Let update this bar plot of population by continent
fig=px.bar(data, x='continent', y='pop', log_y=True, title='Population by continent')
fig.update_traces(opacity=1, marker_line_width=0)
fig.show()

In [111]:
# Create and show the plot
fig = px.scatter(
  data_frame=data,
  x="lifeExp", 
  y="gdpPercap",
  title="Scatter plot of GDP per cap Versus Life Expectancy",
  color="continent",
  # Add columns to the hover information
  hover_data=["continent", "lifeExp", "gdpPercap"],
  # Add a bold variable in hover information
  hover_name = "country"
)

# Update the figure
fig.update_layout(xaxis_title="Life Expectancy", yaxis_title="GDP per Capita")
fig.show()