# Plotly

Plotly is a Data Viz library by the company Plotly based out of Canada with support in languages such as Python, Js, Julia etc.

- reference : https://plotly.com/python/plotly-express/

## Advantages

- Multi-language support
- Lots of graphs
- Interactive plots that can be embedded in web pages.
- Beautiful plots

Does not work with live data streams. Dash can be explored for that.

## The Plotly Roadmap

- Plotly Go
- Plotly Express
- Dash

---

## Working with Plotly Go

In [None]:
# pip install plotly

In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import plotly.express as px

In [None]:
# import datasets
tip = px.data.tips()
iris = px.data.iris()
gap = px.data.gapminder()

In [None]:
gap.head()

In [None]:
# Scatter plot using Plotly Go
temp = gap[gap['year'] == 2007]
temp

In [None]:
trace1 = go.Scatter(x = temp['lifeExp'] , y = temp['gdpPercap'] , mode='markers')
data = [trace1]
layout = go.Layout(title = 'Life Expectancy vs GDP per Capita for 2007' , xaxis = {'title' : 'Life Expectancy'} , yaxis = {'title' : 'GDP per Capita'})
fig = go.Figure(data, layout)
fig.show()

- we can add more traces to the same plot
- we can add lines, shapes, annotations etc.

In [None]:
trace1 = go.Scatter(x = temp['lifeExp'] , y = temp['gdpPercap'] , mode='markers')
trace2 = go.Scatter(x = [0 , 25 ,40 ,55 ,70 ,78] , y = [0 , 15000, 18000, 33000, 12000, 48000] , mode='lines')
data = [trace1 , trace2]
layout = go.Layout(title = 'Life Expectancy vs GDP per Capita for 2007' , xaxis = {'title' : 'Life Expectancy' , } , yaxis = {'title' : 'GDP per Capita'})
fig = go.Figure(data, layout)
fig.show()

## Plotly Express
- Plotly Express is a high level wrapper around Plotly Go
- Less code
- Less flexibility
- Easier to use and Good for quick visualisations .

In [None]:
# plot life expectancy vs gdp scatter plot using plotly express -> continent as color and population as size -> hover name ->range_x and range_y -> log_x and log_y
# for year 2007
fig = px.scatter(temp , x = 'lifeExp' , y = 'gdpPercap' , color = 'continent' , size = 'pop', size_max=80 , hover_name = 'country' ,  range_x = [35 , 90] , range_y = [-20000 , 60000] , title = 'Life Expectancy vs GDP per Capita for 2007')
fig.show()

In [None]:
# plot animation of above plot for all years
fig = px.scatter(gap , x = 'lifeExp' , y = 'gdpPercap' , color = 'continent' , size = 'pop', size_max=80 , hover_name = 'country' , range_x = [35 , 90] , range_y = [-20000 , 60000] , title = 'Life Expectancy vs GDP per Capita quick visualisations for all years' , animation_frame = 'year' , animation_group = 'country')
fig.show()

**`line plot`**

In [None]:
 # plot a line plot for india pop growth over the years
ind = gap[gap['country'] == 'India']
fig = px.line(ind , x = 'year' , y = 'pop' , title = 'Population Growth of India over the years')
fig.show()

In [None]:
# temp = gap[gap['country'].isin(['India' , 'China' , 'Pakistan'])]
# px.line(temp , x = temp.index , y = temp.columns , title = 'Life Expectancy of India , China and Pakistan over the years')

`ValueError :` Plotly Express cannot process wide-form data with columns of different type.

In [None]:
# plot life expectancy of india , china and pakistan over the years using line plot
# we can do it directly using px.line but here we will do it using pivot table
# but since here we have one column for ex
temp = gap[gap['country'].isin(['India' , 'China' , 'Pakistan'])].pivot(index='year' , columns='country' , values='lifeExp')
temp

In [None]:
px.line(temp , x = temp.index , y = temp.columns , title = 'Life Expectancy of India , China and Pakistan over the years')

**`Bar chart`**

In [None]:
# ind population over the years using bar chart
fig = px.bar(ind , x = 'year' , y = 'pop' , title = 'Population Growth of India over the years')

# set the layout to have white background
fig.update_layout(
    plot_bgcolor='white',
    paper_bgcolor='white',
    font_color='black'
)

fig.show()

In [None]:
# Grouped bar chart
# plot pop growth of india , china and pakistan over the years using bar chart
temp = gap[gap['country'].isin(['India' , 'China' , 'Pakistan'])]
fig = px.bar(temp , x = 'year' , y = 'pop' , color = 'country' , barmode = 'group' , title = 'Population Growth of India , China and Pakistan over the years' , log_y= True , text_auto=True)
fig.show()

In [None]:
# Stacked bar chart
# plot pop growth of india , china and pakistan over the years using bar chart
temp = gap[gap['country'].isin(['India' , 'China' , 'Pakistan'])]
fig = px.bar(temp , x = 'year' , y = 'pop' , color = 'country' , title = 'Population Growth of India , China and Pakistan over the years')
fig.show()

In [None]:
# pop comp of 3 countries
temp_df = gap[gap['country'].isin(['India','China','Pakistan'])].pivot(index='year',columns='country',values='gdpPercap')
temp_df

In [None]:
# plot Grouped bar chart for gdp per capita of india , china and pakistan over the years
px.bar(temp_df , x = temp_df.index , y = temp_df.columns , title = 'GDP per Capita of India , China and Pakistan over the years' , barmode = 'group' , text_auto=True , log_y=True)

In [None]:
# plot Stacked bar chart for gdp per capita of india , china and pakistan over the years
px.bar(temp_df , x = temp_df.index , y = temp_df.columns , title = 'GDP per Capita of India , China and Pakistan over the years' , text_auto=True )

In [None]:
# stacked bar chart
# pop contribution per country to a continents pop stacked for a particular year(2007)
temp_df = gap[gap['year'] == 2007]
px.bar(temp_df, x='continent', y='pop', color='country' , log_y=True , title = 'Population contribution of countries to a continent for the year 2007')

In [None]:
# Bar chart with animation
px.bar(gap, x='continent', y='pop', color='continent', title = 'Population contribution of countries to a continent for all years' , animation_frame='year', animation_group='country',  range_y=[0,4500000000])

In [None]:
# histogram
# plot histogram of life expt of all countries in 2007 -> nbins -> text_auto
temp_df = gap[gap['year'] == 2007]
px.histogram(temp_df, x='lifeExp',nbins=10,text_auto=True , color_discrete_sequence=['indianred'] , title = 'Life Expectancy of all countries in 2007')

In [None]:
# plot histogram of sepal length of all iris species
px.histogram(iris, x='sepal_length', color='species' , nbins=30 , title = 'Sepal Length of all Iris Species' , text_auto=True)

In [None]:
# Pie -> values -> names
# find the pie chart of pop of european countries in 2007
temp = gap[(gap['continent'] == 'Europe') & (gap['year'] == 2007)]
px.pie(temp , values = 'pop' , names = 'country' , title = 'Population of European countries in 2007')

In [None]:
# plot pie chart of world pop in 1952 continent wise -> explode(pull) asia
temp = gap[gap['year'] == 1952]
px.pie(temp , values = 'pop' , names = 'continent' , title ='Population of World in 1952 continent wise' )

# alternative way
# temp = gap[gap['year'] == 1952].groupby('continent')['pop'].sum().reset_index()
# px.pie(temp , values = 'pop' , names = 'continent' , title ='Population of World in 1952 continent wise' )

# but here we are not able to explode a particular continent because we need go (graph objects) for that

`Sunburst Plot`
- Sunburst plots visualize hierarchical data spanning outwards radially from root to leaves.
- The hierarchy is represented by a sequence of rings, where the root node is the innermost circle and the leaves are the outermost rings.
- here we can only use categorical variables in path

In [None]:
# Sunburst plot -> Sunburst plots visualize hierarchical data spanning outwards radially from root to leaves. -> color
# path -> [], values

temp_df = gap[gap['year'] == 2007]
px.sunburst(temp_df, path=['continent','country'], values='pop' , color='lifeExp',title='Population of countries in each continent for the year 2007')

# more levels
# temp_df['planet'] = 'earth'
# px.sunburst(temp_df, path=['planet','continent','country'], values='pop')

In [None]:
import seaborn as sns
tips = sns.load_dataset('tips')
tips

In [None]:
px.sunburst(tips , path = ['sex', 'smoker' , 'day' , 'time' ] , values = 'total_bill' , color = 'tip' , title = 'Sunburst plot of tips dataset')

`treemap`
- Treemaps display hierarchical data as a set of nested rectangles.
- The area of each rectangle is proportional to a specified dimension of the data.

In [None]:
temp_df = gap[gap['year'] == 2007]
px.treemap(temp_df, path=[px.Constant('World'),'continent','country'], values='pop' , color='lifeExp',title='Population of countries in each continent for the year 2007')

- here color is just like hue in seaborn

In [None]:
# Heatmap -> find heatmap of all continents with year on avg life exp
temp_df = tips.pivot_table(index='day', columns='sex', values='total_bill', aggfunc='sum')

px.imshow(temp_df)

In [None]:
# find heatmap of all continents with year on avg life exp
temp_df = gap.pivot_table(index='year', columns='continent', values='lifeExp', aggfunc='mean')
px.imshow(temp_df , title = 'Heatmap of avg life expectancy of continents over the years')

`3D scatter plot`
- A three-dimensional scatter plot is a graphical representation of data points in a three-dimensional space, where each point is defined by three coordinates (x, y, z).
- It is used to visualize the relationship between three continuous variables and can help identify patterns, trends, and clusters within the data.
- generally used with numeric data

In [None]:
# plot a 3D scatter plot of all countries in 2007 with life exp on x , gdp per cap on y and pop on z axis
temp_df = gap[gap['year'] == 2007]
px.scatter_3d(temp_df , x = 'lifeExp' , y = 'pop' , z = 'gdpPercap' , color = 'continent' , size_max = 60 , hover_name = 'country' , title = '3D scatter plot of all countries in 2007' , log_y = True)

In [None]:
px.scatter_3d(iris , x = 'sepal_length' , y = 'sepal_width' , z = 'petal_width' , color = 'species' , hover_name = 'species' , title = '3D scatter plot of all iris species')

In [None]:
# scatter_matrix -> dimensions
px.scatter_matrix(iris, dimensions=['sepal_length', 'sepal_width', 'petal_length', 'petal_width'] , color='species' , title = 'Scatter matrix of all iris species' , symbol='species' , height=700)
# here symbol is used to differentiate the species using different shapes and height is used to increase the size of the plot .

In [None]:
# facet plot

In [None]:
# 3d surface plot

In [None]:
# subplots
from plotly.subplots import make_subplots