# Visualization using `Plotly API`

In [2]:
%matplotlib inline

import numpy as np
import pandas as pd
# While importing module, it looks for credential file and uses it to sign in to API server for communications.
import plotly.plotly as py

pd.set_option('precision', 2)
pd.set_option('notebook_repr_html', True)
# Other useful tools
import plotly.tools as tls

# Graph object to piece together all Plotly plots
import plotly.graph_objs as go

# Cufflinks binds plotly to pandas dataframes in IPython
import cufflinks as cf

# Insert Credentials
# tls.set_credentials_file(username='', api_key='')

# Write credentials to Python dictionary
# tls.get_credentials_file()


cf.set_config_file(theme='ggplot', sharing='public', offline=False, world_readable=True)


## Dataset

In [3]:
def us_pop_class(p):
    if p <= 0.01*1e6:
        return '0 ~ 10K'
    elif p < 0.1 * 1e6:
        return '10K+ ~ 100K'
    elif p <= 1 * 1e6:
        return '100K+ ~ 1M'
    elif p <= 2*1e6:
        return '1M+ ~ 2M'
    elif p <= 5*1e6:
        return '2M+ ~ 5M'
    else:
        return '5M+'

In [5]:
us_city = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_us_cities.csv')
us_city.columns = ['City', 'Population', 'Lat', 'Long']
us_city['Label'] = us_city.Population.apply(us_pop_class)
us_city.head()

Unnamed: 0,City,Population,Lat,Long,Label
0,New York,8287238,40.73,-73.99,5M+
1,Los Angeles,3826423,34.05,-118.24,2M+ ~ 5M
2,Chicago,2705627,41.88,-87.62,2M+ ~ 5M
3,Houston,2129784,29.76,-95.37,2M+ ~ 5M
4,Philadelphia,1539313,39.95,-75.16,1M+ ~ 2M


In [6]:
us_city.iplot(kind='bubble', y='Lat', x='Long', size='Population', text='Label', categories='Label', 
              title='Populations in US Cities', xTitle='Longitude', yTitle='Latitude' )

In [7]:
url = 'https://raw.githubusercontent.com/cs109/2014_data/master/mtcars.csv'
mtcars = pd.read_csv(url, sep = ',', index_col=0)

print(mtcars.shape)
mtcars.head()


(32, 11)


Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.88,17.02,0,1,4,4
Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
Hornet 4 Drive,21.4,6,258.0,110,3.08,3.21,19.44,1,0,3,1
Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


In [130]:
# Bar chart with Inner texts

mt2 = mtcars['disp']
mt2.sort_values(ascending=False).head(10).iplot(kind='bar', textposition='inside')


In [121]:
# Data and Colors

dfx= df[['categories', 'size']]
ctz_blue_colors = cf.colors.color_range('blue',5)

# Donut chart
dfx.iplot(kind='pie', labels='categories', values='size', hole=0.4, legend=False, 
          pull=0.02, textposition='outside', textinfo='label+percent', colors=ctz_blue_colors)

In [10]:

df = cf.datagen.bubble(4)
df.head()

Unnamed: 0,categories,size,text,x,y
0,category1,43,TWR.FX,-0.366,-0.7
1,category1,67,EXL.RE,0.428,0.33
2,category1,97,ECL.CH,-0.0679,-0.25
3,category1,55,SYI.EN,0.741,0.26
4,category1,58,MXD.CD,0.00907,0.56


In [27]:
df.iplot(kind='bubble',x='x',y='y',size='size',categories='categories')

## `Histogram`

In [18]:
mtcars.mpg.iplot(kind='hist', bins=10, 
                xTitle='Distribution of MPG', yTitle='Miles Per Gallon', filename='pandas_plot/histogram')

## Relationships between `CYL` and `MPG`

In [21]:
# Relationship between cyl and mpg
mtcars[['cyl','mpg']].iplot(kind='scatter', x='cyl', y='mpg', mode='markers', 
                            colors='darkred', xTitle='Cylinder', yTitle='MPG', 
                            title='Relationship between cylinders and MPG', filename='pandas_plot/scatter')

## Relationship between `Horsepower` and `MPG`

In [5]:
mtcars[['hp', 'mpg']].iplot(kind='scatter', mode='markers', x='hp', y='mpg', 
                            xTitle='Horsepower', yTitle='MPG', colors='green',
                            title='Relationship between horsepower and MPG', filename='pandas_plot/scatter2')

## Generate `Correlation Matrix`

In [6]:
df = mtcars[['mpg', 'hp', 'cyl']]
df.scatter_matrix(filename='pandas_plot/scatter_matrix')

## `Spread()` and `Ratio()` charts

In [7]:
mtcars[['mpg', 'hp']].iplot(kind='ratio', filename='pandas_plot/ratio_plot')

In [8]:
mtcars[['cyl', 'mpg']].iplot(kind='spread', filename='pandas_plot/spread_plot')

## `Boxplot()`

In [3]:
mtcars.iplot?

In [27]:
mtcars[['drat','wt', 'gear', 'carb', 'am']].iplot(kind='box', filename='pandas_plot/box_plot')

## `Bubble Plot`

In [10]:
mtcars.iplot(kind='bubble', x='mpg', y='hp', size='cyl', xTitle='MPG', yTitle='Horsepower', filename='pandas_plot/bubble')

## `Subplots`

In [11]:
df = mtcars[['drat', 'gear', 'carb']]
df.iplot(subplots=True, shape=(3,1), shared_xaxes=True, fill=True, vertical_spacing=.05, filename='pandas_plot/subplot')

### Area Plot

In [25]:
df = pd.DataFrame(np.random.rand(10, 3), columns=['a', 'b', 'c'])
df.iplot(kind='scatter', subplots=True, shape=(1,3), fill=True, shared_yaxes=True, 
         filename='pandas_plot/areaplot')

## `Bubble Charts` in `Pandas`

In [14]:
data_url = 'http://www.stat.ubc.ca/~jenny/notOcto/STAT545A/examples/gapminder/data/gapminderDataFiveYear.txt'
dframe = pd.read_csv(data_url, sep='\t')

# cf.set_config_file(offline=False, world_readable=True, theme='pearl')


def GapMinderByYear(yr):
    """ Returns frame based on year """
    if yr not in dframe.year.values:
        print("No Data for given year. Use {} to {}".format(dframe.year.values.min(), dframe.year.values.max()))
        return 
    else:
        return dframe[dframe.year == yr]

    
    
# Set year
set_year = 2002
gap_data = GapMinderByYear(set_year)
print(gap_data.head(3))

gap_data.iplot(kind='bubble', x='gdpPercap', y='lifeExp', size='pop', text='country', theme='ggplot',
               xTitle='GDP per Capita', yTitle='Life Expectancy', filename='pandas_plot/new_bubble')



        country  year       pop continent  lifeExp    gdpPercap
10  Afghanistan  2002  25268405      Asia   42.129   726.734055
22      Albania  2002   3508512    Europe   75.651  4604.211737
34      Algeria  2002  31287142    Africa   70.994  5288.040382


In [5]:
mtcars.iplot?