<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Plotly-exercises-from--https://plot.ly/python/user-guide/" data-toc-modified-id="Plotly-exercises-from--https://plot.ly/python/user-guide/-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Plotly exercises from  <a href="https://plot.ly/python/user-guide/" target="_blank">https://plot.ly/python/user-guide/</a></a></span></li><li><span><a href="#The-end" data-toc-modified-id="The-end-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>The end</a></span></li></ul></div>

#### Plotly exercises from  https://plot.ly/python/user-guide/

In [1]:
import pandas as pd
import plotly
import plotly.graph_objs as go
from plotly.offline import iplot
import cufflinks as cf
cf.go_offline()

In [2]:
df = pd.read_csv(
    'http://www.stat.ubc.ca/~jenny/notOcto/STAT545A/examples/gapminder/data/gapminderDataFiveYear.txt',
    sep='\t')
df = df[df.year == 2007]

In [3]:
df.continent.unique()

array(['Asia', 'Europe', 'Africa', 'Americas', 'Oceania'], dtype=object)

In [4]:
americas = df[df.continent == 'Americas']
europe = df[df.continent == 'Europe']
asia = df[df.continent == 'Asia']
africa = df[df.continent == 'Africa']
oceania = df[df.continent == 'Oceania']

In [5]:
# a trace is just the name we give to a collection of data and the specifications of which we want that data plotted.
# a trace is also a dictionary-like object itself and will be named according to how you want the data displayed on the plotting surface. 
trace_comp0 = go.Scatter(x=americas.gdpPercap, y=americas.lifeExp,
                        mode='markers',marker=dict(size=12,line=dict(width=1),color="navy"),
                        name='Americas', text=americas.country)

trace_comp1 = go.Scatter(x=europe.gdpPercap, y=europe.lifeExp,
                        mode='markers', marker=dict(size=12, line=dict(width=1),color="red"),
                        name='Europe', text=europe.country)

trace_comp2 = go.Scatter(x=asia.gdpPercap, y=asia.lifeExp,
                        mode='markers',marker=dict(size=12,line=dict(width=1),color="yellow"),
                        name='Asia', text=asia.country)

trace_comp3 = go.Scatter(x=africa.gdpPercap, y=africa.lifeExp,
                        mode='markers',marker=dict(size=12,line=dict(width=4),color="black"),
                        name='Africa', text=africa.country)

trace_comp4 = go.Scatter(x=oceania.gdpPercap, y=oceania.lifeExp,
                        mode='markers',marker=dict(size=12,line=dict(width=4),color="green"),
                        name='Oceania', text=oceania.country)

In [6]:
# data_comp contains all the traces that we wish to plot
data_comp = [trace_comp0, trace_comp1, trace_comp2, trace_comp3, trace_comp4]
layout_comp = go.Layout(title='Life Expectancy v. Per Capita GDP, 2007', hovermode='closest',
        xaxis=dict(title='GDP per capita (2000 dollars)',ticklen=5,zeroline=False,gridwidth=2,),
        yaxis=dict(title='Life Expectancy (years)',ticklen=5,gridwidth=2)) 

In [7]:
# data is actually a list object
fig_comp = go.Figure(data=data_comp, layout=layout_comp)

In [8]:
iplot(fig_comp)

In [9]:
# code improved - no need to construct 5 separate traces
from plotly.graph_objs.scatter import Marker
trc = go.Scatter(x=df.gdpPercap, y=df.lifeExp, mode='markers', text=df.country)

# to match colors as below order should be the same as traces order having put into fig_comp 'data' key
continents = [americas, europe, asia, africa, oceania]
colors = ['red','blue','yellow','black','green']
style = [dict(marker=Marker(color=colors[i], size=14, line=dict(width=1, color='black'))) for i in range(len(continents))]

fig = go.Figure(data=[trc], layout=layout_comp)
fig = fig_comp.update({'data':style})
iplot(fig)

In [10]:
df[['gdpPercap','lifeExp','country','continent']].iplot(kind='scatter', x='gdpPercap', y='lifeExp', mode='markers',
                                                       categories='continent', name='country',
                                                       xTitle='GDP per capita(2000 dollars)',
                                                       yTitle='Life Expectancy (years)',
                                                       title='Life Expectancy v. Per Capita GDP,2007')

In [11]:
# another example of scatter plot - simple one with only data object; to remember - data must be a list object!!
iplot([go.Scatter(x=[1,2,3], y=[4,5,6], marker={'color': 'red', 'symbol': 104, 'size': 10}, 
                mode="markers+lines",  text=["one","two","three"])])
# specifications related to plotting this data: the points plotted as hollow x's with lines joining them, all in red.

In [12]:
# # easy to add another Scatter object to the data list (trace1 as above example)
trace1 = go.Scatter(x=[1,2,3], y=[4,5,6], marker={'color': 'red', 'symbol': 104, 'size': 10}, 
                mode="markers+lines",  text=["one","two","three"])
import numpy as np
x = np.arange(1, 3.2, 0.2)
y = 6* np.sin(x)

trace2 = go.Scatter(x=x, y=y, marker={'color':'blue', 'symbol':'star', 'size':10},
                   mode='markers', name='second_trace') # 'name' is not visible on graph, it is useful for editing in plot.ly
data = [trace1, trace2]
#data

In [13]:
# layout object is a dictionary object with specificatons common for all figure object(title, axes, legend, etc)
layout={'title':'First Plot', 'xaxis':{'title': 'x'}, 'yaxis':{'title':'y'}}
iplot(go.Figure(data=data, layout=layout))

In [14]:
#  we can also add some text annotation for example in order to indicate the maximum point that's been plotted
# on the current plotting surface; needed text and x,y arguments
layout.update(dict(annotations=[go.layout.Annotation(text="Highest Point", x=3, y=6)]))
iplot(go.Figure(data=data, layout=layout))

In [15]:
# Shapes - next upgrade to the layout-adding a rectangular block to highlight the secton where trace 1 is above trace 2
layout.update(dict(shapes = [
        # 1st highlight during Feb 4 - Feb 6
        {'type': 'rect',
        # x-reference is assigned to the x-values
        'xref': 'x',
        # y-reference is assigned to the plot paper [0,1]
        'yref': 'y',
        'x0': '1', 'y0': 0, 'x1': '2', 'y1': 7,
        'fillcolor': '#d3d3d3','opacity': 0.2,'line': {'width': 0,}}]))
iplot(go.Figure(data=data, layout=layout))

In [16]:
# example of displaying a few plots in the one figure
from plotly import tools

heatmap = go.Heatmap(z=[[1, 20, 30],
                       [20, 1, 60],
                       [30, 60, 1]],showscale=False)

y0 = np.random.randn(50)
y1 = np.random.randn(50)+1

box_1 = go.Box(y=y0)
box_2 = go.Box(y=y1)
data = [heatmap, box_1, box_2]

# creating basic figure with places for subplots
fig = tools.make_subplots(rows=2, cols=2, specs=[[{}, {}], [{'colspan': 2}, None]],
                          subplot_titles=('First Subplot','Second Subplot', 'Third Subplot'))
# appending figure with traces
fig.append_trace(box_1, 1, 1)
fig.append_trace(box_2, 1, 2)
fig.append_trace(heatmap, 2, 1)

fig['layout'].update(height=600, width=600, title='i <3 subplots')

iplot(fig, filename='box_heatmap1');



plotly.tools.make_subplots is deprecated, please use plotly.subplots.make_subplots instead



#### The end

In [17]:
import seaborn as sns
df = sns.load_dataset('tips')
df.head()
df.info()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
total_bill    244 non-null float64
tip           244 non-null float64
sex           244 non-null category
smoker        244 non-null category
day           244 non-null category
time          244 non-null category
size          244 non-null int64
dtypes: category(4), float64(2), int64(1)
memory usage: 7.0 KB


In [18]:
# numpy arrays taken further as x and y to use in plot
df.day.value_counts().index.values.astype(str)
df.day.value_counts().values

array(['Sat', 'Sun', 'Thur', 'Fri'], dtype='<U4')

array([87, 76, 62, 19], dtype=int64)

In [19]:
# widgets are very helpful to make online changes in data params or plotting data 
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

In [20]:
# color set
_11_lg = '#95C061'
_12_lb = '#75ABDE'
_13_lo = '#FE9C43'
_14_lbr = '#A6886D'
_15_lr = '#E7675D'
_16_lp = '#7C5674'

color_set = [_11_lg, _12_lb, _13_lo, _14_lbr, _15_lr, _16_lp]

data = []
data.append(go.Bar(
            x = df['day'].value_counts().index.values.astype('str'), # Need to use .astype('str') to transform categorical data
            y = df['day'].value_counts().values,
            marker = dict(color = color_set[0])))

iplot(data)

In [21]:
# horizontal version
data = []
data.append(go.Bar(x=df.day.value_counts().values,
                  y=df.day.value_counts().index.values.astype(str),
                  marker=dict(color=color_set[3]),
                  orientation='h'))
layout=go.Layout(xaxis=dict(title='Number of visits'), title='Activities per day')
iplot(go.Figure(data,layout))

In [22]:
# the basic plot with the cufflinks usage (dataframe.iplot())
# creating small dataframe by taking value_counts() method and next using to_frame() method
df2 = df.day.value_counts().to_frame()
df2
df2_index = df2.index.values.astype(str)
df2.iplot(kind='bar', color=color_set[4])

Unnamed: 0,day
Sat,87
Sun,76
Thur,62
Fri,19


In [23]:
# the same plot but after resetting index it needs to add y value and x (optionally)
df2.reset_index()
df2.reset_index().iplot(kind='bar', x='index', y='day', text='index', color=color_set[2])

Unnamed: 0,index,day
0,Sat,87
1,Sun,76
2,Thur,62
3,Fri,19


In [24]:
# with cufflinks horizontal bar needs kind='barh' - no x and y
df2.iplot(kind='barh', color=color_set[1])

In [25]:
# using histogram to show the data distribution (for seaborn it is distplot() function)
data = []
# nbinsx - maximum number of bins but algorithm finds the optimum number
data.append(go.Histogram(x=df.tip, nbinsx=30, marker=dict(color=color_set[0], line=dict(color=color_set[1], width=1))))
iplot(data)

In [26]:
# the same with cufflinks (bins means exact number of bins as given)
df.tip.iplot(kind='hist', bins= 20)

In [27]:
# Box object in plotly has equivalent in Seaborn - boxplot() function
data = []
for col in df.day.unique().astype(str):
    data.append(go.Box(y=df[df.day==col]['tip'], name=col, showlegend=False)) # no x because col represents it
iplot(data)

In [28]:
# bubble plot with scatter object, 'bubbles' made with marker's size parameter (need to be used in brackets!), because
# df.size means the amount of data in dataframe
data = []
data.append(go.Scatter(x=df.total_bill, y=df.tip, mode='markers', marker=dict(size=df['size']*5)))
layout = go.Layout(xaxis=dict(title='Total Bil'), yaxis=dict(title='Tip Value'), title='Tips vs Total Bills')
iplot(go.Figure(data=data, layout=layout))

# cufflinks way
df.iplot(kind='bubble', x='total_bill', y='tip', size='size', color='orange', theme='solar')

In [29]:
# heatmap in plotly cufflinks (kind='heatmap') equivalates to Seaborn's heatmap() function
df.corr().iplot(kind='heatmap', colorscale=None) # other colorscale -'Greens', 'Reds', 'Blues'

In [30]:
# cufflinks scatter_matrix as an equivalent to Seaborn pairplot()

# Since all columns should be numerical values
df[['total_bill', 'tip', 'size']].scatter_matrix()

In [31]:
x = [1, 2, 3]
y = [1000, 10000, 100000]
y2 = [5000, 10000, 90000]

trace1 = go.Bar(x=x, y=y, name='trace1')
trace2 = go.Bar(x=x, y=y2, name='trace2', visible=False)

data = [trace1, trace2]

updatemenus = list([
    dict(active=1,
         buttons=list([
            dict(label='Log Scale',
                 method='update',
                 args=[{'visible': [True, True]},
                       {'title': 'Log scale',
                        'yaxis': {'type': 'log'}}]),
            dict(label='Linear Scale',
                 method='update',
                 args=[{'visible': [True, True]},
                       {'title': 'Linear scale',
                        'yaxis': {'type': 'linear'}}])
            ]),
        )
    ])

layout = dict(updatemenus=updatemenus, title='Linear scale')
fig = go.Figure(data=data, layout=layout)

plotly.offline.iplot(fig)