# **Chart_Studio Plotly + Cufflinks**
___
___

***Topics***



*   **Frequency Bar Plot** *(Example: How many times any name (groupby) appeared in a column)*
*   **Total Bar Plot** *(Example: Total sum of money based on each name (groupby))*
*   **Normal Bar Plot** *(Example: Price for each car)*
*   **Multi-Column Normal Bar Plot** *(Example: what are the asking prices of same car in two different shops)*




In [None]:
!pip install cufflinks chart_studio plotly

**Import List**

In [2]:
# plotly imports
import plotly.graph_objs as go
import chart_studio.plotly as py
# Cufflinks wrapper on plotly
import cufflinks as cf

import pandas as pd
import numpy as np

%matplotlib inline

# Options for pandas
pd.options.display.max_columns = 30

# Display all cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

## Initializations

**This function is required for GOOGLE COLAB plotly view**
Need to call the following line in **every cell** if it contains vizualization code

`configure_plotly_browser_state()`

`init_notebook_mode(connected=False)`

In [3]:
def configure_plotly_browser_state():
  import IPython
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              plotly: 'https://cdn.plot.ly/plotly-latest.min.js?noext',
            },
          });
        </script>
        '''))

In [4]:
from plotly.offline import iplot, init_notebook_mode
init_notebook_mode(connected=True)
cf.go_offline(connected=True)

# Set global theme
cf.set_config_file(world_readable=True, theme='pearl')

## DATA

In [10]:
df = pd.read_csv('census.csv')
df = df[df['SUMLEV'] == 50]
df.head()

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,COUNTY,STNAME,CTYNAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,...,RINTERNATIONALMIG2011,RINTERNATIONALMIG2012,RINTERNATIONALMIG2013,RINTERNATIONALMIG2014,RINTERNATIONALMIG2015,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015
1,50,3,6,1,1,Alabama,Autauga County,54571,54571,54660,55253,55175,55038,55290,55347,...,0.363924,0.289782,0.290347,0.3263,0.343466,7.242091,-2.915927,-3.012349,2.265971,-2.530799,7.606016,-2.626146,-2.722002,2.59227,-2.187333
2,50,3,6,1,3,Alabama,Baldwin County,182265,182265,183193,186659,190396,195126,199713,203709,...,1.011215,0.912334,0.881921,1.073855,1.095627,14.83296,17.647293,21.845705,19.243287,17.197872,15.844176,18.559627,22.727626,20.317142,18.293499
3,50,3,6,1,5,Alabama,Barbour County,27457,27457,27341,27226,27159,26973,26815,26489,...,-0.146609,-0.257424,-0.11084,-0.074366,0.0,-4.728132,-2.50069,-7.056824,-3.904217,-10.543299,-4.874741,-2.758113,-7.167664,-3.978583,-10.543299
4,50,3,6,1,7,Alabama,Bibb County,22915,22919,22861,22733,22642,22512,22549,22583,...,0.438654,0.705234,0.797272,0.93207,0.930604,-5.527043,-5.068871,-6.201001,-0.177537,0.177258,-5.088389,-4.363636,-5.403729,0.754533,1.107861
5,50,3,6,1,9,Alabama,Blount County,57322,57322,57373,57711,57776,57734,57658,57673,...,0.052136,0.329041,0.34629,0.485302,0.485559,1.807375,-1.177622,-1.748766,-2.062535,-1.36997,1.859511,-0.84858,-1.402476,-1.577232,-0.884411


In [11]:
df.describe()

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,COUNTY,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,NPOPCHG_2010,NPOPCHG_2011,...,RINTERNATIONALMIG2011,RINTERNATIONALMIG2012,RINTERNATIONALMIG2013,RINTERNATIONALMIG2014,RINTERNATIONALMIG2015,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015
count,3142.0,3142.0,3142.0,3142.0,3142.0,3142.0,3142.0,3142.0,3142.0,3142.0,3142.0,3142.0,3142.0,3142.0,3142.0,...,3142.0,3142.0,3142.0,3142.0,3142.0,3142.0,3142.0,3142.0,3142.0,3142.0,3142.0,3142.0,3142.0,3142.0,3142.0
mean,50.0,2.668682,5.192553,30.280076,103.572884,98262.04,98268.02,98455.4,99210.33,99969.01,100708.9,101498.2,102297.5,187.383195,754.931254,...,0.963157,1.05956,1.064171,1.206725,1.239492,-1.670721,-2.579837,-1.321452,-2.307488,-1.384129,-0.707563,-1.520276,-0.257281,-1.100763,-0.144637
std,0.0,0.803149,1.96406,15.144339,107.70406,312946.7,312958.3,313547.8,316704.0,319968.6,323135.1,326234.7,329209.7,796.741665,3847.284814,...,1.588162,1.906898,1.695318,1.855839,1.905114,10.479885,11.948278,12.268314,11.512543,12.380406,10.681113,12.299923,12.275591,11.537218,12.33941
min,50.0,1.0,1.0,1.0,1.0,82.0,82.0,83.0,90.0,81.0,89.0,87.0,89.0,-5442.0,-13926.0,...,-0.895456,-3.787879,-1.121705,-0.907853,-0.856688,-128.205128,-147.727273,-109.876862,-178.947368,-84.844838,-128.205128,-147.727273,-102.988031,-178.947368,-62.322703
25%,50.0,2.0,4.0,18.0,35.0,11114.5,11114.5,11087.75,11081.75,11015.0,11033.75,10995.0,10986.0,-19.0,-91.75,...,0.107218,0.107352,0.129411,0.170373,0.17498,-6.204546,-7.629136,-6.444499,-7.41037,-6.77351,-5.487819,-6.943265,-5.568782,-6.321287,-5.623333
50%,50.0,3.0,5.0,29.0,79.0,25872.0,25872.0,25846.0,25790.5,25834.0,25748.0,25689.5,25698.5,7.0,7.5,...,0.458653,0.440731,0.49309,0.585486,0.595054,-1.605021,-2.664319,-1.857368,-2.019618,-2.046162,-0.74607,-1.676233,-0.880087,-0.990431,-0.935521
75%,50.0,3.0,7.0,45.0,133.0,66780.0,66780.5,66912.25,66885.0,67099.5,67494.25,67700.5,67527.5,77.75,220.5,...,1.190459,1.205907,1.29955,1.511478,1.549393,2.909174,2.58847,3.392461,3.320646,3.792381,4.010888,3.904147,4.676683,4.544616,4.981018
max,50.0,4.0,9.0,56.0,840.0,9818605.0,9818700.0,9826009.0,9896602.0,9970436.0,10045180.0,10109440.0,10170290.0,15111.0,73051.0,...,24.910478,40.908521,23.009385,23.827477,23.73085,101.123596,119.193807,239.130435,146.367943,281.407035,101.123596,119.861185,239.130435,146.960126,281.407035


## **Frequency Bar Plot(Total times a name appeared)**

In [None]:
####### These Lines are important for Google Colab ######
configure_plotly_browser_state()
init_notebook_mode(connected=False)
####### Without these lines figure will be null ######

df.groupby('STNAME').count()['SUMLEV'].iplot(
    kind='bar', yTitle='Total Row frequency', linecolor='black', title='Total Rows related to States')

⚠️*Comment: `.count()` function is used. That is why frequency *

## **Total Bar Plot (Total sum for a single values)**

In [None]:
####### These Lines are important for Google Colab ######
configure_plotly_browser_state()
init_notebook_mode(connected=False)
####### Without these lines figure will be null ######

df.groupby('STNAME').sum()['CENSUS2010POP'].iplot(
    kind='bar', yTitle='Total Population', linecolor='black', title='Population by Sates')

⚠️*Comment: `sum()` function is used that is why total population is calculated based on state (groupby)*

## **Normal Bar Plot**

In [15]:
df = pd.read_csv('census.csv')
df = df[df['SUMLEV'] == 40]
df.head()

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,COUNTY,STNAME,CTYNAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,...,RINTERNATIONALMIG2011,RINTERNATIONALMIG2012,RINTERNATIONALMIG2013,RINTERNATIONALMIG2014,RINTERNATIONALMIG2015,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015
0,40,3,6,1,0,Alabama,Alabama,4779736,4780127,4785161,4801108,4816089,4830533,4846411,4858979,...,1.02772,1.01984,1.002216,1.142716,1.179963,0.002295,-0.193196,0.381066,0.582002,-0.467369,1.030015,0.826644,1.383282,1.724718,0.712594
68,40,4,9,2,0,Alaska,Alaska,710231,710249,714021,722720,731228,737442,737046,738432,...,2.104763,3.764921,3.157959,3.167201,3.525637,-1.173489,-1.946424,-3.915107,-14.43891,-10.407475,0.931274,1.818497,-0.757148,-11.271709,-6.881838
98,40,4,8,4,0,Arizona,Arizona,6392017,6392307,6408208,6468732,6553262,6630799,6728783,6828065,...,1.962889,2.091538,2.218133,2.541397,2.558707,1.327489,5.24574,3.905473,6.219955,6.776501,3.290378,7.337279,6.123606,8.761352,9.335208
114,40,3,7,5,0,Arkansas,Arkansas,2915918,2915958,2922394,2938538,2949499,2957957,2966835,2978204,...,0.960939,1.078798,1.124342,1.287134,1.309663,1.365312,-0.432402,-0.442153,-1.060966,-0.407735,2.326251,0.646395,0.682189,0.226168,0.901928
190,40,4,9,6,0,California,California,37253956,37254503,37334079,37700034,38056055,38414128,38792291,39144818,...,3.910168,3.810915,4.068514,4.606197,4.637637,-1.148464,-1.163788,-1.339869,-0.862856,-1.981572,2.761704,2.647127,2.728645,3.743342,2.656065


In [None]:
####### These Lines are important for Google Colab ######
configure_plotly_browser_state()
init_notebook_mode(connected=False)
####### Without these lines figure will be null ######

df2 = df[['STNAME','ESTIMATESBASE2010']].set_index('STNAME')

df2.iplot(kind='bar', xTitle='State', yTitle='Estimated Base Population',
    title='Estimated Base Population by 2010')

⚠️*Comment: Estimated Population for Alabama 4.78M*

`Needed to **set state as X-Axis**`

## **Multi-Column Comparrision using Normal Bar Plot**

In [None]:
####### These Lines are important for Google Colab ######
configure_plotly_browser_state()
init_notebook_mode(connected=False)
####### Without these lines figure will be null ######

df2 = df[['STNAME','POPESTIMATE2010','POPESTIMATE2015']].set_index('STNAME')

df2.iplot(kind='bar', xTitle='State', yTitle='Estimated Population',
    title='Estimated Population comparrison between 2010 & 2015')