# Watch Me Code 2: Plotly and Cufflinks

- Plotly is a cloud based plotting service. It uses the popular JavaScript library D3.js.
- Plotly is simple to use 
- It includes a module called `cufflinks` to attach Plotly to pandas

In [1]:
!pip install plotly
!pip install chart-studio



In [8]:
import chart_studio as plotly
import chart_studio.plotly as py
import plotly.graph_objects as go
import pandas as pd

To use plot.ly you need to sign up for a free account then get API credentials. https://plot.ly/settings/api

In [5]:
# setup the credentials
plotly.tools.set_credentials_file(username='mafudgefc94', api_key='k7KpNxwXupnJBchuesc0')

In [6]:
# Start with a Simple Pandas DataFrame
grades = { 'subjects' : ['Mathematics', 'English', 'History', 'Science', 'Arts'],
           'grades' : [67, 60, 36, 61, 58]
         }
grades_df = pd.DataFrame(grades)
grades_df

Unnamed: 0,subjects,grades
0,Mathematics,67
1,English,60
2,History,36
3,Science,61
4,Arts,58


To plot with plotly, we need:

- Data: this is a list of subplots, 
- A Python Dictionary of information to put on the plot

In [9]:
grade_data = [go.Bar(x=grades_df['subjects'], y=grades_df['grades'])] # Bar Plot, note this is a list

py.iplot({ 'data': grade_data,
            'layout': {
               'title': 'My Grades R Asum',
               'xaxis': {
                 'title': 'Subjects Are Bad'},
               'yaxis': {
                'title': 'Grades'}
        }})

PlotlyRequestError: User cannot save file.
Account limit reached: Your account is limited to creating 100 charts. To continue, you can override or delete existing charts or you can upgrade your account at: https://plot.ly/products/cloud

Cufflinks is a Python module which "attaches" plot.ly to the dataframe (just like cufflinks "attach" to your shirt-sleeve). 

This allows you to plot similarly to pandas.

In [9]:
# Same plot as a one-liner with cufflinks!
grades_df.iplot(kind='bar', x='subjects', y='grades', title='My Grades R Awesum', xTitle='Grades', yTitle='Subjects Are Bad')

In [12]:
# Have some pie, note it uses labels and values
grades_df.iplot(kind='pie', labels='subjects', values='grades', title='My Grades R Awesum')

In [17]:
# Don't like all that color? 
grades_df.iplot(kind='pie', labels='subjects', values='grades', title='My Grades R Awesum', colorscale='greens', textinfo='value+percent')

How about an example with multiple series? For that we need to pull in another dataset

In [18]:
cuse_weather_df = pd.read_csv('https://raw.githubusercontent.com/mafudge/datasets/master/weather/syracuse-ny.csv')
cuse_weather_df = cuse_weather_df[ cuse_weather_df['EST'].str.startswith('2015-')] 
cuse_weather_df.head(5)

Unnamed: 0,EST,Max TemperatureF,Mean TemperatureF,Min TemperatureF,Max Dew PointF,MeanDew PointF,Min DewpointF,Max Humidity,Mean Humidity,Min Humidity,...,Max VisibilityMiles,Mean VisibilityMiles,Min VisibilityMiles,Max Wind SpeedMPH,Mean Wind SpeedMPH,Max Gust SpeedMPH,PrecipitationIn,CloudCover,Events,WindDirDegrees
6480,2015-1-1,34,27.0,20,25,13,8,91,64,36,...,10,8,1,28,12,33.0,0.01,8,Snow,248
6481,2015-1-2,31,23.0,15,26,23,15,100,82,63,...,10,4,0,22,9,28.0,0.03,8,Fog-Snow,285
6482,2015-1-3,40,28.0,15,37,23,15,100,87,74,...,10,6,1,21,10,29.0,0.97,8,Rain-Snow,84
6483,2015-1-4,55,44.0,33,51,39,27,100,88,76,...,10,10,7,35,13,45.0,0.03,8,Rain-Snow,259
6484,2015-1-5,33,22.0,10,28,10,-3,88,70,51,...,10,8,1,37,22,47.0,0.01,6,Snow,274


In [22]:
r = dict(color='red')
g = dict(color='green')
b = dict(color='blue')
grade_data = [
    go.Scatter(x=cuse_weather_df['EST'], y=cuse_weather_df['Max TemperatureF'], mode="lines", name="Max Temp", marker=r),
    go.Scatter(x=cuse_weather_df['EST'], y=cuse_weather_df['Mean TemperatureF'], mode="lines+markers", name="Mean Temp", marker=g),
    go.Scatter(x=cuse_weather_df['EST'], y=cuse_weather_df['Min TemperatureF'], mode="lines", name="Min Temp", marker=b)
]

py.iplot({ 'data': grade_data,
            'layout': {
               'title': 'Syracuse Weather 2015',
               'xaxis': {
                 'title': 'Day of the Year'},
               'yaxis': {
                'title': 'Temperature Deg F'}
        }})

Here's another example with the Exam Scores Dataset. Shows you how much more expressive plot.ly can be.

In [23]:
scores_df = pd.read_csv('https://raw.githubusercontent.com/mafudge/datasets/master/exam-scores/exam-scores.csv')
scores_df = scores_df.sort_values(by='Student Score')
scores_df[0:6]

Unnamed: 0,Class Section,Exam Version,Completion Time,Made Own Study Guide,Did Exam Prep Assignment,Studied In Groups,Student Score,Percentage,Letter Grade
23,M01,D,35,?,?,?,13,43.30%,F
8,M01,B,20,N,N,Y,13,43.30%,F
21,M01,D,25,N,N,Y,15,50.00%,D
29,M02,A,20,N,N,N,16,53.30%,D
52,M02,C,40,Y,Y,N,16,53.30%,D
56,M02,C,60,N,N,Y,16,53.30%,D


In [29]:
grade_data = [
    go.Scatter(x=scores_df['Letter Grade'], y=scores_df['Completion Time'], mode="markers", 
               marker= { 'size': scores_df['Student Score'], 'sizemode' : 'diameter', 'sizeref' : 1.0})
]

py.iplot({ 'data': grade_data,
            'layout': {
               'title': 'Exam Grades',
               'xaxis': {
                 'title': 'Letter Grade'},
               'yaxis': {
                'title': 'Time To Complete Exam'}
        }})

In [34]:
grade_data = [
    go.Heatmap(x=scores_df['Exam Version'], y=scores_df['Completion Time'], z=scores_df['Student Score'])
]

py.iplot({ 'data': grade_data,
            'layout': {
               'title': 'Exam Grades Heat Map',
               'xaxis': {
                 'title': 'Exam Version'},
               'yaxis': {
                 'title': 'Time To Complete Exam'}           
        }})

In [35]:
# A manual sample, showing you don't need to use Pandas at all.

trace0 = Scatter(
    x=[1,2,3,4,5,6,7,8],
    y=[10, 15, 13, 17, 15, 12, 10, 18],
    mode = "markers",
    name = "series 2"
)
trace1 = Scatter(
    x=[1,2,3,4,5,6,7,8],
    y=[16, 5, 11, 9, 16, 10, 14, 12],
    mode="line",
    name = "series 1"
)
data = Data([trace0, trace1])

py.iplot({ 'data': data,
            'layout': {
               'title': 'Sample Chart',
               'xaxis': {
                 'title': 'X Axis'},
               'yaxis': {
                 'title': 'Y Axis'}            
        }})

In [11]:
import chart_studio.plotly as py
import plotly.graph_objects as go
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder2007.csv')

fig = go.Figure(go.Scatter(x=df.gdpPercap, y=df.lifeExp, text=df.country, mode='markers', name='2007'))
fig.update_xaxes(title_text='GDP per Capita', type='log')
fig.update_yaxes(title_text='Life Expectancy')

py.iplot(fig, filename='pandas-multiple-scatter')

PlotlyRequestError: User cannot save file.
Account limit reached: Your account is limited to creating 100 charts. To continue, you can override or delete existing charts or you can upgrade your account at: https://plot.ly/products/cloud