In [1]:
import pandas as pd
import numpy as np
import chart_studio.plotly as py
import seaborn as sns
import plotly.express as px
%matplotlib inline

import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline()

Good links to use = https://plotly.com/python/builtin-colorscales/


In [2]:
# creating random data:
array_ = np.random.randn(150,4)*100
df = pd.DataFrame(array_, columns=['A','B','C','D'])
df.head()

Unnamed: 0,A,B,C,D
0,53.294306,60.813584,-79.472159,60.575085
1,-92.916425,-5.94235,-8.684463,14.961498
2,58.363906,101.573513,-289.643434,0.50923
3,59.133892,-137.726696,122.034519,-31.194468
4,1.398666,124.37324,161.563873,48.111084


**Basic ploting**

In [3]:
df[['A','B']].iplot()

*1. Line plot*:

In [4]:
## Importing graph object to allow us to easily customize our plots
# we'll use plotly datsets to make it as real as possible


import plotly.graph_objects as go
df_stocks = px.data.stocks()
df_stocks.head(3)

Unnamed: 0,date,GOOG,AAPL,AMZN,FB,NFLX,MSFT
0,2018-01-01,1.0,1.0,1.0,1.0,1.0,1.0
1,2018-01-08,1.018172,1.011943,1.061881,0.959968,1.053526,1.015988
2,2018-01-15,1.032008,1.019771,1.05324,0.970243,1.04986,1.020524


In [5]:
px.line(df_stocks, 
        x="date", y="AAPL",
       labels={'x':'day','y':'Price'} ## renaming my lables
)

In [6]:
## Let's say we want to see apple vs google

px.line(df_stocks, 
        x="date", y=["AAPL","GOOG"],
       labels={'x':'day','y':'Price'},## renaming my lables
        title="Apple vs Google"
        
)


In [7]:
## Let's add another comlexity by using fig

# initiate figure
fig = go.Figure()
#start adding viz
fig.add_trace(go.Scatter(x=df_stocks['date'], y=df_stocks['AAPL'],
                        mode='lines', name='Apple'))
fig.add_trace(go.Scatter(x=df_stocks['date'], y=df_stocks['GOOG'],
                        mode='lines+markers', name='Google'))
fig.add_trace(go.Scatter(x=df_stocks['date'], y=df_stocks['AMZN'],
                        mode='lines+markers', name='Amazon',
                        line=dict(color='firebrick', width=2, dash='dashdot')))

# further enhance the layout, all these are optional - I am just showing possibilities
fig.update_layout(title="3 stocks together wow!", xaxis_title="dates baby",yaxis_title="money money money",
                  ## pimping our x axis
                 xaxis=dict(showline=True, showgrid=True, showticklabels=True,
                           linecolor='rgb(204,204,205)', linewidth=2, ticks='outside'))

*2. Bar chart*:

In [8]:
## Let's have another datasets
for name in dir(px.data):
    if '__' not in name:
        print(name)

absolute_import
carshare
election
election_geojson
experiment
gapminder
iris
medals_long
medals_wide
stocks
tips
wind


In [9]:
df_uk_gap = px.data.gapminder().query("country=='United Kingdom'")
df_uk_gap.head(5)

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
1596,United Kingdom,Europe,1952,69.18,50430000,9979.508487,GBR,826
1597,United Kingdom,Europe,1957,70.42,51430000,11283.17795,GBR,826
1598,United Kingdom,Europe,1962,70.76,53292000,12477.17707,GBR,826
1599,United Kingdom,Europe,1967,71.36,54959000,14142.85089,GBR,826
1600,United Kingdom,Europe,1972,72.01,56079000,15895.11641,GBR,826


In [10]:
px.bar(df_uk_gap, x="year", y="pop", title="UK population over years")

In [11]:
df_eu_gap = px.data.gapminder().query("continent=='Europe'")
df_eu_gap.head()

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
12,Albania,Europe,1952,55.23,1282697,1601.056136,ALB,8
13,Albania,Europe,1957,59.28,1476505,1942.284244,ALB,8
14,Albania,Europe,1962,64.82,1728137,2312.888958,ALB,8
15,Albania,Europe,1967,66.22,1984060,2760.196931,ALB,8
16,Albania,Europe,1972,67.69,2263554,3313.422188,ALB,8


In [12]:
px.bar(df_eu_gap, x="year", y="pop", color="country", title="EU Population by country",
      labels={'pop':'Population', 'year':'Timeline'})

In [13]:
## To get bars close to each others rathe rthan stacked
countries = ['Sweden','Norway','Denmark']
df_countries = df_eu_gap[df_eu_gap.country.isin(countries)] 

fig = px.bar(df_countries, x="year", y="pop", color="country", title="EU Population by country",
      labels={'pop':'Population', 'year':'Timeline'}, barmode='group', text='pop')
## since numbers are in millions, let's make them easy to read
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')

fig.update_layout(uniformtext_minsize=10) ## let's change the font
fig.update_layout(xaxis_tickangle=-45) ## rotate text angles

*3. Scatter plot*:

In [14]:
df_iris = px.data.iris()
df_iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id
0,5.1,3.5,1.4,0.2,setosa,1
1,4.9,3.0,1.4,0.2,setosa,1
2,4.7,3.2,1.3,0.2,setosa,1
3,4.6,3.1,1.5,0.2,setosa,1
4,5.0,3.6,1.4,0.2,setosa,1


In [15]:
px.scatter(df_iris, x='sepal_length', y='sepal_width',
          color='species', size='petal_length', 
           hover_data=['petal_width']) # by adding hover_data we get to see all features

In [16]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_iris.sepal_length,y=df_iris.sepal_width,
                         mode='markers',marker_color=df_iris.sepal_length,
                         text=df_iris.species, marker=dict(showscale=True)))

fig.update_layout(title="Iris datasset with scale measure")

In [17]:
## Scatter can be pretty handy with big data.
# we'll create new one on the fly

big = np.random.randn(50000,2)*100
df_big = pd.DataFrame(big, columns=['A','B'])
df_big.head(3)

Unnamed: 0,A,B
0,160.11198,93.699924
1,24.436127,20.154007
2,3.312913,-155.490373


In [18]:
fig = go.Figure(go.Scattergl(x=df_big.A, y=df_big.B, mode='markers',
                            marker=dict( color=df_big.A, colorscale='Viridis', line_width=3)))
fig

## We can do the same customization we did before on a graph objects

*3. Pie chart*:

In [19]:
df_eu = px.data.gapminder().query("continent== 'Europe'")
df_eu.head()

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
12,Albania,Europe,1952,55.23,1282697,1601.056136,ALB,8
13,Albania,Europe,1957,59.28,1476505,1942.284244,ALB,8
14,Albania,Europe,1962,64.82,1728137,2312.888958,ALB,8
15,Albania,Europe,1967,66.22,1984060,2760.196931,ALB,8
16,Albania,Europe,1972,67.69,2263554,3313.422188,ALB,8


In [20]:
df_eu_2002 = df_eu[df_eu.year==2002]
px.pie(df_eu_2002, values='pop', names='country',
      title='Europ continent by country in 2002', color_discrete_sequence = px.colors.sequential.Cividis)

In [21]:
## another way to manipulate the pie chart with more flexibility
df_scand = df_eu[df_eu.country.isin(['Sweden','Norway','Denmark','Finland'])]

fig = px.pie(df_scand, values='pop', names='country',
             title='Scandnavia in 2002',
             hover_data=['lifeExp'], labels={'lifeExp':'life expectancy'},
             color_discrete_sequence = px.colors.sequential.Cividis)
fig.update_traces(textposition='inside',textinfo='percent+label',
                 pull=[0.1,0,0,0])
fig.show()

*4. Boxplot*

In [26]:
fig = px.box(df_scand, x="country", y="pop", points='all')
fig.show()

In [48]:
df_tips = px.data.tips()
fig = go.Figure()
fig.add_traces(go.Box(y=df_tips['tip'][df_tips['smoker']=='Yes'], marker_color='royalblue', name="Tips",
                     boxmean='sd', # to see the std on the graphy
                     boxpoints='all',
                     jitter=0.5,
                     whiskerwidth=0.2))
fig.add_traces(go.Box(y=df_tips['total_bill'][df_tips['smoker']=='Yes'], marker_color='indianred', name="Total bills",
                     boxmean='sd', # to see the std on the graphy
                     boxpoints='all',
                     jitter=0.5,
                     whiskerwidth=0.15))

fig.update_layout(title="Smoker's Tips vs total bills",
                 yaxis=dict(gridcolor='rgb(255,255,255)',
                           gridwidth=3),
                 paper_bgcolor='rgb(243,243,243)',
                 plot_bgcolor='rgb(243,243,243)')

*5. Histogram*:

In [45]:
fig = px.histogram(df_eu_2002["pop"], nbins=12, labels={'value':'Population'}, 
                   title="Dist of EU in 2002", marginal='box', 
                  color_discrete_sequence=["royalblue"])
fig.update_layout(
yaxis_title_text="Country's count", bargap=0.1, showlegend=False)
fig

In [24]:
df_tips= px.data.tips()
fig = px.histogram(df_tips, x='tip', color='sex', labels={'sex':'Gender'}, marginal='box', title="Tips dist by gender")
fig.update_layout(
yaxis_title_text='Tips count')

In [36]:
df_tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


*5. Density Heatmap*:

In [50]:
fig = px.density_heatmap(df_tips, x="day", y="time",
                        z="total_bill", color_continuous_scale='Viridis')
fig

In [69]:
fig = px.density_heatmap(df_tips, x="day", y="time",
                        z="total_bill", 
                        marginal_x='histogram',
                        marginal_y= 'histogram')
fig.update_layout(title="Dist of total bills in days and time")

*6. 3D viz*:

In [85]:
iris=px.data.iris()
fig = px.scatter_3d(iris, x="sepal_width", y="petal_length", z="sepal_length", color='species')
fig

*7. Scatter Matrix*:

In [94]:
fig = px.scatter_matrix(df_tips, color='day', title="Tips by day and time")           
                       
fig


*8. Map scatter*:

In [101]:
df_map = px.data.gapminder().query("year==2002")

fig = px.scatter_geo(df_map, locations="iso_alpha",
                    color="continent",
                    size="pop",
                    projection="orthographic")
fig

In [100]:
df_map.iso_alpha.head()

10    AFG
22    ALB
34    DZA
46    AGO
58    ARG
Name: iso_alpha, dtype: object

*8. Polar  chart*:

In [103]:
df_tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [110]:
px.line_polar(df_tips, r="total_bill", theta="day", color="sex", line_close=True)

*9. Ternary plot*:

In [111]:
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id
0,5.1,3.5,1.4,0.2,setosa,1
1,4.9,3.0,1.4,0.2,setosa,1
2,4.7,3.2,1.3,0.2,setosa,1
3,4.6,3.1,1.5,0.2,setosa,1
4,5.0,3.6,1.4,0.2,setosa,1


In [113]:
px.scatter_ternary(iris, a="sepal_length", b="sepal_width", c="petal_length", hover_name="species", color="species")

*10 Breaking down plot into facet*:

In [114]:
px.scatter(df_tips, x="total_bill", y='tip', color="sex", 
           facet_col="smoker"    # with this you can break it into coloumns
          )

In [115]:
import seaborn as sns

In [116]:
df_att = sns.load_dataset("attention")
df_att.head()

Unnamed: 0.1,Unnamed: 0,subject,attention,solutions,score
0,0,1,divided,1,2.0
1,1,2,divided,1,3.0
2,2,3,divided,1,3.0
3,3,4,divided,1,5.0
4,4,5,divided,1,4.0


In [120]:
fig = px.line(df_att, x='solutions', y='score', facet_col='subject', title="Scores vs solution per subject", 
             facet_col_wrap=5)
fig