# Plotly

Plotly is a Data Viz library by the company Plotly based out of Canada with support in languages such as Python, Js, Julia etc.

- reference : https://plotly.com/python/plotly-express/

## Advantages

- Multi-language support
- Lots of graphs
- Interactive plots that can be embedded in web pages.
- Beautiful plots

Does not work with live data streams. Dash can be explored for that.

## The Plotly Roadmap

- Plotly Go
- Plotly Express
- Dash

---

## Working with Plotly Go

In [2]:
# pip install plotly

In [3]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import plotly.express as px

In [4]:
# import datasets
tip = px.data.tips()
iris = px.data.iris()
gap = px.data.gapminder()

In [5]:
gap.head()

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
0,Afghanistan,Asia,1952,28.801,8425333,779.445314,AFG,4
1,Afghanistan,Asia,1957,30.332,9240934,820.85303,AFG,4
2,Afghanistan,Asia,1962,31.997,10267083,853.10071,AFG,4
3,Afghanistan,Asia,1967,34.02,11537966,836.197138,AFG,4
4,Afghanistan,Asia,1972,36.088,13079460,739.981106,AFG,4


In [6]:
# Scatter plot using Plotly Go
temp = gap[gap['year'] == 2007]
temp

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
11,Afghanistan,Asia,2007,43.828,31889923,974.580338,AFG,4
23,Albania,Europe,2007,76.423,3600523,5937.029526,ALB,8
35,Algeria,Africa,2007,72.301,33333216,6223.367465,DZA,12
47,Angola,Africa,2007,42.731,12420476,4797.231267,AGO,24
59,Argentina,Americas,2007,75.320,40301927,12779.379640,ARG,32
...,...,...,...,...,...,...,...,...
1655,Vietnam,Asia,2007,74.249,85262356,2441.576404,VNM,704
1667,West Bank and Gaza,Asia,2007,73.422,4018332,3025.349798,PSE,275
1679,"Yemen, Rep.",Asia,2007,62.698,22211743,2280.769906,YEM,887
1691,Zambia,Africa,2007,42.384,11746035,1271.211593,ZMB,894


In [7]:
trace1 = go.Scatter(x = temp['lifeExp'] , y = temp['gdpPercap'] , mode='markers')
data = [trace1]
layout = go.Layout(title = 'Life Expectancy vs GDP per Capita for 2007' , xaxis = {'title' : 'Life Expectancy'} , yaxis = {'title' : 'GDP per Capita'})
fig = go.Figure(data, layout)
fig.show()

- we can add more traces to the same plot
- we can add lines, shapes, annotations etc.

In [8]:
trace1 = go.Scatter(x = temp['lifeExp'] , y = temp['gdpPercap'] , mode='markers')
trace2 = go.Scatter(x = [0 , 25 ,40 ,55 ,70 ,78] , y = [0 , 15000, 18000, 33000, 12000, 48000] , mode='lines')
data = [trace1 , trace2]
layout = go.Layout(title = 'Life Expectancy vs GDP per Capita for 2007' , xaxis = {'title' : 'Life Expectancy' , } , yaxis = {'title' : 'GDP per Capita'})
fig = go.Figure(data, layout)
fig.show()

## Plotly Express
- Plotly Express is a high level wrapper around Plotly Go
- Less code
- Less flexibility
- Easier to use and Good for quick visualisations .

In [9]:
# plot life expectancy vs gdp scatter plot using plotly express -> continent as color and population as size -> hover name ->range_x and range_y -> log_x and log_y
# for year 2007
fig = px.scatter(temp , x = 'lifeExp' , y = 'gdpPercap' , color = 'continent' , size = 'pop', size_max=80 , hover_name = 'country' ,  range_x = [35 , 90] , range_y = [-20000 , 60000] , title = 'Life Expectancy vs GDP per Capita for 2007')
fig.show()

In [10]:
# plot animation of above plot for all years
fig = px.scatter(gap , x = 'lifeExp' , y = 'gdpPercap' , color = 'continent' , size = 'pop', size_max=80 , hover_name = 'country' , range_x = [35 , 90] , range_y = [-20000 , 60000] , title = 'Life Expectancy vs GDP per Capita quick visualisations for all years' , animation_frame = 'year' , animation_group = 'country')
fig.show()

**`line plot`**

In [11]:
 # plot a line plot for india pop growth over the years
ind = gap[gap['country'] == 'India']
fig = px.line(ind , x = 'year' , y = 'pop' , title = 'Population Growth of India over the years')
fig.show()

In [12]:
# temp = gap[gap['country'].isin(['India' , 'China' , 'Pakistan'])]
# px.line(temp , x = temp.index , y = temp.columns , title = 'Life Expectancy of India , China and Pakistan over the years')

`ValueError :` Plotly Express cannot process wide-form data with columns of different type.

In [13]:
# plot life expectancy of india , china and pakistan over the years using line plot
# we can do it directly using px.line but here we will do it using pivot table
# but since here we have one column for ex
temp = gap[gap['country'].isin(['India' , 'China' , 'Pakistan'])].pivot(index='year' , columns='country' , values='lifeExp')
temp

country,China,India,Pakistan
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1952,44.0,37.373,43.436
1957,50.54896,40.249,45.557
1962,44.50136,43.605,47.67
1967,58.38112,47.193,49.8
1972,63.11888,50.651,51.929
1977,63.96736,54.208,54.043
1982,65.525,56.596,56.158
1987,67.274,58.553,58.245
1992,68.69,60.223,60.838
1997,70.426,61.765,61.818


In [14]:
px.line(temp , x = temp.index , y = temp.columns , title = 'Life Expectancy of India , China and Pakistan over the years')

**`Bar chart`**

In [15]:
# ind population over the years using bar chart
fig = px.bar(ind , x = 'year' , y = 'pop' , title = 'Population Growth of India over the years')

# set the layout to have white background
fig.update_layout(
    plot_bgcolor='white',
    paper_bgcolor='white',
    font_color='black'
)

fig.show()

In [16]:
# Grouped bar chart
# plot pop growth of india , china and pakistan over the years using bar chart
temp = gap[gap['country'].isin(['India' , 'China' , 'Pakistan'])]
fig = px.bar(temp , x = 'year' , y = 'pop' , color = 'country' , barmode = 'group' , title = 'Population Growth of India , China and Pakistan over the years' , log_y= True , text_auto=True)
fig.show()

In [17]:
# Stacked bar chart
# plot pop growth of india , china and pakistan over the years using bar chart
temp = gap[gap['country'].isin(['India' , 'China' , 'Pakistan'])]
fig = px.bar(temp , x = 'year' , y = 'pop' , color = 'country' , title = 'Population Growth of India , China and Pakistan over the years')
fig.show()

In [18]:
# pop comp of 3 countries
temp_df = gap[gap['country'].isin(['India','China','Pakistan'])].pivot(index='year',columns='country',values='gdpPercap')
temp_df

country,China,India,Pakistan
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1952,400.448611,546.565749,684.597144
1957,575.987001,590.061996,747.083529
1962,487.674018,658.347151,803.342742
1967,612.705693,700.770611,942.408259
1972,676.900092,724.032527,1049.938981
1977,741.23747,813.337323,1175.921193
1982,962.421381,855.723538,1443.429832
1987,1378.904018,976.512676,1704.686583
1992,1655.784158,1164.406809,1971.829464
1997,2289.234136,1458.817442,2049.350521


In [19]:
# plot Grouped bar chart for gdp per capita of india , china and pakistan over the years
px.bar(temp_df , x = temp_df.index , y = temp_df.columns , title = 'GDP per Capita of India , China and Pakistan over the years' , barmode = 'group' , text_auto=True , log_y=True)

In [20]:
# plot Stacked bar chart for gdp per capita of india , china and pakistan over the years
px.bar(temp_df , x = temp_df.index , y = temp_df.columns , title = 'GDP per Capita of India , China and Pakistan over the years' , text_auto=True )

In [21]:
# stacked bar chart
# pop contribution per country to a continents pop stacked for a particular year(2007)
temp_df = gap[gap['year'] == 2007]
px.bar(temp_df, x='continent', y='pop', color='country' , log_y=True , title = 'Population contribution of countries to a continent for the year 2007')

In [22]:
# Bar chart with animation
px.bar(gap, x='continent', y='pop', color='continent', title = 'Population contribution of countries to a continent for all years' , animation_frame='year', animation_group='country',  range_y=[0,4500000000])

In [23]:
# histogram
# plot histogram of life expt of all countries in 2007 -> nbins -> text_auto
temp_df = gap[gap['year'] == 2007]
px.histogram(temp_df, x='lifeExp',nbins=10,text_auto=True , color_discrete_sequence=['indianred'] , title = 'Life Expectancy of all countries in 2007')

In [24]:
# plot histogram of sepal length of all iris species
px.histogram(iris, x='sepal_length', color='species' , nbins=30 , title = 'Sepal Length of all Iris Species' , text_auto=True)

In [25]:
# Pie -> values -> names
# find the pie chart of pop of european countries in 2007
temp = gap[(gap['continent'] == 'Europe') & (gap['year'] == 2007)]
px.pie(temp , values = 'pop' , names = 'country' , title = 'Population of European countries in 2007')

In [26]:
# plot pie chart of world pop in 1952 continent wise -> explode(pull) asia
temp = gap[gap['year'] == 1952]
px.pie(temp , values = 'pop' , names = 'continent' , title ='Population of World in 1952 continent wise' )

# alternative way
# temp = gap[gap['year'] == 1952].groupby('continent')['pop'].sum().reset_index()
# px.pie(temp , values = 'pop' , names = 'continent' , title ='Population of World in 1952 continent wise' )

# but here we are not able to explode a particular continent because we need go (graph objects) for that

`Sunburst Plot`
- Sunburst plots visualize hierarchical data spanning outwards radially from root to leaves.
- The hierarchy is represented by a sequence of rings, where the root node is the innermost circle and the leaves are the outermost rings.
- here we can only use categorical variables in path

In [27]:
# Sunburst plot -> Sunburst plots visualize hierarchical data spanning outwards radially from root to leaves. -> color
# path -> [], values

temp_df = gap[gap['year'] == 2007]
px.sunburst(temp_df, path=['continent','country'], values='pop' , color='lifeExp',title='Population of countries in each continent for the year 2007')

# more levels
# temp_df['planet'] = 'earth'
# px.sunburst(temp_df, path=['planet','continent','country'], values='pop')

In [28]:
import seaborn as sns
tips = sns.load_dataset('tips')
tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [29]:
px.sunburst(tips , path = ['sex', 'smoker' , 'day' , 'time' ] , values = 'total_bill' , color = 'tip' , title = 'Sunburst plot of tips dataset')

`treemap`
- Treemaps display hierarchical data as a set of nested rectangles.
- The area of each rectangle is proportional to a specified dimension of the data.

In [30]:
temp_df = gap[gap['year'] == 2007]
px.treemap(temp_df, path=[px.Constant('World'),'continent','country'], values='pop' , color='lifeExp',title='Population of countries in each continent for the year 2007')

- here color is just like hue in seaborn

In [31]:
# Heatmap -> find heatmap of all continents with year on avg life exp
temp_df = tips.pivot_table(index='day', columns='sex', values='total_bill', aggfunc='sum')

px.imshow(temp_df)





In [32]:
# find heatmap of all continents with year on avg life exp
temp_df = gap.pivot_table(index='year', columns='continent', values='lifeExp', aggfunc='mean')
px.imshow(temp_df , title = 'Heatmap of avg life expectancy of continents over the years')

`3D scatter plot`
- A three-dimensional scatter plot is a graphical representation of data points in a three-dimensional space, where each point is defined by three coordinates (x, y, z).
- It is used to visualize the relationship between three continuous variables and can help identify patterns, trends, and clusters within the data.
- generally used with numeric data

In [33]:
# plot a 3D scatter plot of all countries in 2007 with life exp on x , gdp per cap on y and pop on z axis
temp_df = gap[gap['year'] == 2007]
px.scatter_3d(temp_df , x = 'lifeExp' , y = 'pop' , z = 'gdpPercap' , color = 'continent' , size_max = 60 , hover_name = 'country' , title = '3D scatter plot of all countries in 2007' , log_y = True)

In [34]:
px.scatter_3d(iris , x = 'sepal_length' , y = 'sepal_width' , z = 'petal_width' , color = 'species' , hover_name = 'species' , title = '3D scatter plot of all iris species')

In [35]:
# scatter_matrix -> dimensions
px.scatter_matrix(iris, dimensions=['sepal_length', 'sepal_width', 'petal_length', 'petal_width'] , color='species' , title = 'Scatter matrix of all iris species' , symbol='species' , height=700)
# here symbol is used to differentiate the species using different shapes and height is used to increase the size of the plot .

In [41]:
# facet plot : 
px.scatter(tips , x = 'total_bill' , y = 'tip' , facet_col='smoker' , facet_row='sex')

In [42]:
px.histogram(tips , x = 'total_bill' , facet_col='sex')

In [47]:
px.scatter(gap , x = 'lifeExp' , y = 'gdpPercap' , color = 'continent' , size = 'pop', size_max=80 , hover_name = 'country' , range_x = [35 , 90] , range_y = [-20000 , 60000] , title = 'Life Expectancy vs GDP per Capita ' , facet_col='year' , facet_col_wrap=4)

- this is different from sub plots since here all the plots are need to be of same type
- sub plots can have different types of plots

In [53]:
# 3d surface plot
# can not be done using plotly express , need to use plotly go
import plotly.graph_objects as go

x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
xx, yy = np.meshgrid(x, y)
z = xx**2 + yy**2
trace = go.Surface(z=z, x=x, y=y)
data = [trace]
layout = go.Layout(title='3D Surface Plot', autosize=True, width=800, height=800)
fig = go.Figure(data=data, layout=layout)
fig.show()

In [55]:
import plotly.graph_objects as go

x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
xx, yy = np.meshgrid(x, y)
z = np.sin(xx) + np.cos(yy)
trace = go.Surface(z=z, x=x, y=y)
data = [trace]
layout = go.Layout(title='3D Surface Plot', autosize=True, width=800, height=800)
fig = go.Figure(data=data, layout=layout)
fig.show()

In [None]:
# contour plot
# can not be done using plotly express , need to use plotly go
import plotly.graph_objects as go

x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
xx, yy = np.meshgrid(x, y)
z = np.sin(xx) + np.cos(yy)
trace = go.Contour(z=z, x=x, y=y)
data = [trace]
layout = go.Layout(title='Contour Plot', autosize=True, width=800, height=800)
fig = go.Figure(data=data, layout=layout)
fig.show()

In [None]:
import plotly.graph_objects as go

x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
xx, yy = np.meshgrid(x, y)
z = xx**2 + yy**2
trace = go.Contour(z=z, x=x, y=y)
data = [trace]
layout = go.Layout(title='Contour Plot', autosize=True, width=800, height=800)
fig = go.Figure(data=data, layout=layout)
fig.show()

In [96]:
# subplots
# sub plots can have different types of plots
from plotly.subplots import make_subplots

In [97]:
fig = make_subplots(rows=2, cols=2 )

In [98]:
fig.add_trace(
    go.Scatter(x=[1,9,5], y=[2,10,1] , mode='lines'),
    row=1,
    col=1
)

fig.add_trace(
    go.Histogram(x=[111,99,86,86,109,134,86,78,112,134,187] , xbins=dict(size=10)),
    row=1,
    col=2
)

fig.add_trace(
    go.Scatter(x=[1,9,5], y=[2,10,1] , mode='markers'),
    row=2,
    col=1
)

fig.add_trace(
    go.Histogram(x=[111,99,86,86,109,134,86,78,112,134,187] , xbins=dict(size=10)),
    row=2,
    col=2
)

fig.update_layout(title='Subplot Demo')

fig.show()
