In [40]:
%matplotlib inline

In [41]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import matplotlib
import seaborn as sns

## Charting with Plotly

For static charts, presented as PNGs or SVGs, Matplotlib is eminently customizable,
though its API could be more intuitive. But if you want your charts to have any
dynamic/interactive elements, for example the ability to change or filter the dataset
with buttons or selectors then you will need a different charting library and this is
where Plotly comes in.

In [42]:
# Read our cleaned Nobel prize dataset into a dataframe
df = pd.read_json(open('data/nobel_winners_cleaned.json'))

In [43]:
new_index = pd.Index(np.arange(1901, 2015), name='year')

by_year_nat_sz = df.groupby(['year', 'country'])\
.size().unstack().reindex(new_index).fillna(0)

# Our continental country list created by selecting the biggest two
# or three winners in the three continents compared.
regions = [
{'label':'N. America',
'countries':['United States', 'Canada']},
{'label':'Europe',
'countries':['United Kingdom', 'Germany', 'France']},
{'label':'Asia',
'countries':['Japan', 'Russia', 'India']}
]

# Creates a new column with a region label for each dict in the
# regions list, summing its countries members.
for region in regions:
    by_year_nat_sz[region['label']] =\
    by_year_nat_sz[region['countries']].sum(axis=1)
# Plots the cumulative sum of all the new region columns.    
#by_year_nat_sz[[r['label'] for r in regions]].cumsum().plot()

In [44]:
df_regions = by_year_nat_sz[[r['label'] for r in regions]].cumsum()
print(df_regions)

country  N. America  Europe  Asia
year                             
1901            0.0     4.0   0.0
1902            0.0     7.0   0.0
1903            0.0    10.0   0.0
1904            0.0    13.0   1.0
1905            0.0    15.0   1.0
...             ...     ...   ...
2010          327.0   230.0  36.0
2011          333.0   231.0  36.0
2012          338.0   233.0  37.0
2013          346.0   235.0  37.0
2014          349.0   239.0  40.0

[114 rows x 3 columns]


### Plotly Express
Plotly provides an express module which enables fast chart sketches, great for
exploratory iteration in a notebook. This module has high-level objects for line charts, bar charts etc. and can take Panda’s dataframes as arguments interpreting columnar data
. The regional dataframe we just created can be used directly by Plotly
express to build a line-chart in a couple of lines:

In [45]:
import plotly.express as px

fig = px.line(df_regions, labels={'country': 'Regions'}, 
              line_dash='country', line_dash_sequence=['solid', 'dash', 'dot'], height=800, width=800)
fig.show()

### Plotly Graph-objects
Using Plotly graph-objects involves a little more boilerplate code but the pattern is
essentially the same whether creating bar charts, violin-plots, maps etc.. The idea is to use an array of graph-object such as scattered points (lines in line mode), bars, can‐
dles, boxes etc., as data for the figure. A layout object is used to provide other chart
features.

In [39]:
import plotly.graph_objs as go

traces = []

for region in regions:
    name = region['label']
    traces.append(
        go.Scatter(
            x=df_regions.index, # years
            y=df_regions[name], # cum. prizes
            name=name,
            mode="lines",
            hovertemplate=f"{name}<br>%{{x}}<br>$%{{y}}<extra></extra>",
            line=dict(dash=['solid', 'dash', 'dot'][len(traces)])
     )
)
layout = go.Layout(height=800, width=800,\
xaxis_title="year", yaxis_title="cumulative prizes")
fig = go.Figure(traces, layout)
fig.show()

### Mapping with Plotly
Another of Plotly’s big strengths is its mapping libraries and particularly its ability to
integrate the hugely powerful Mapbox ecosystem one of the most powerful tile-based
mapping resources for the web. Mapbox tiling system is fast and efficient and opens
up the possibility of ambitious mapping visualizations.

In [8]:
df_country_category = df.groupby(['country', 'category'])\
 .size().unstack()
df_country_category['Total'] = df_country_category.sum(1)
df_country_category.head(3) # top three rows

category,Chemistry,Economics,Literature,Peace,Physics,Physiology or Medicine,Total
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Argentina,1.0,,,2.0,,2.0,5.0
Australia,,1.0,1.0,,1.0,6.0,9.0
Austria,3.0,1.0,1.0,2.0,4.0,4.0,15.0


In [9]:
df_country_category = df_country_category.\
 loc[df_country_category.Total > 2].copy()
df_country_category

category,Chemistry,Economics,Literature,Peace,Physics,Physiology or Medicine,Total
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Argentina,1.0,,,2.0,,2.0,5.0
Australia,,1.0,1.0,,1.0,6.0,9.0
Austria,3.0,1.0,1.0,2.0,4.0,4.0,15.0
Belgium,1.0,,1.0,3.0,1.0,4.0,10.0
Canada,4.0,1.0,1.0,1.0,2.0,2.0,11.0
China,,,1.0,2.0,2.0,,5.0
Denmark,1.0,,3.0,1.0,3.0,5.0,13.0
Egypt,1.0,,1.0,2.0,,,4.0
France,8.0,2.0,16.0,9.0,12.0,12.0,59.0
Germany,28.0,1.0,8.0,4.0,23.0,16.0,80.0


In [10]:
# If not already installed, install plotly and geopy libraries

!pip install geopy



In [11]:
from geopy.geocoders import Nominatim

In [12]:
geolocator = Nominatim(user_agent="nobel_prize_app")

In [13]:
# Check the first five coutries for geolocator data
for name in df_country_category.index[:5]:
    location = geolocator.geocode(name)
    print("Name: ", name)
    print("Coords: ", (location.latitude, location.longitude))
    print("Raw details: ", location.raw)   

Name:  Argentina
Coords:  (-34.9964963, -64.9672817)
Raw details:  {'place_id': 297135159, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright', 'osm_type': 'relation', 'osm_id': 286393, 'boundingbox': ['-55.1925709', '-21.7808568', '-73.5605371', '-53.6374515'], 'lat': '-34.9964963', 'lon': '-64.9672817', 'display_name': 'Argentina', 'class': 'boundary', 'type': 'administrative', 'importance': 0.9201041945616906, 'icon': 'https://nominatim.openstreetmap.org/ui/mapicons/poi_boundary_administrative.p.20.png'}
Name:  Australia
Coords:  (-24.7761086, 134.755)
Raw details:  {'place_id': 296930094, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright', 'osm_type': 'relation', 'osm_id': 80500, 'boundingbox': ['-55.3228175', '-9.0880125', '72.2461932', '168.2261259'], 'lat': '-24.7761086', 'lon': '134.755', 'display_name': 'Australia', 'class': 'boundary', 'type': 'administrative', 'importance': 0.9621350639151115, 'icon': 'https://

In [14]:
lats = {}
lons = {}
for name in df_country_category.index:
    location = geolocator.geocode(name)
    if location:
        lats[name] = location.latitude
        lons[name] = location.longitude
    else:
        print("No coords for %s"%name)

In [15]:
df_country_category.loc[:,'Lat'] = pd.Series(lats)
df_country_category.loc[:,'Lon'] = pd.Series(lons)

In [16]:
def calc_marker_radius(size, scale=5):
    rad = np.sqrt(size/np.pi) * scale
    return rad

In [17]:
# import the express module
import plotly.express as px
#init_notebook_mode(connected=True)

# use our calc_marker_radius function to process a date-column 
# containing infections by country
size = df_country_category['Total'].apply(calc_marker_radius, args=(16,))
# the mapbox takes an array of latitudes and longitudes for 
# marker placement, as well as our calculated 'size' 
# array. Zoom indicates the position of the 'camera' above the earth.
# 0.7 is a standard global default. 
fig = px.scatter_mapbox(df_country_category, lat="Lat", lon="Lon", 
                        hover_name=df_country_category.index, 
                        hover_data=['Total'],
                        color_discrete_sequence=["olive"],
                        zoom=0.7, size=size)
fig.update_layout(mapbox_style="carto-positron", width=800, height=450)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [18]:
import plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

import pandas as pd

mapbox_access_token = "pk.eyJ1Ijoia3lyYW4tZGFsZSIsImEiOiJjanZkbWE0cjkwNjkxNDBwYW0xb3p4MmtqIn0.WP9fQgGMgu8_nmMxJwFsbg"

df_cc = df_country_category
# Plotly will take an array of latitude and longitudes, columns in our dataframe
site_lat = df_cc.Lat
site_lon = df_cc.Lon
totals = df_cc.Total
locations_name = df_cc.index

layout = go.Layout(
    title='Nobel prize totals by country',
    hovermode='closest',
    showlegend=False,
    margin ={'l':0,'t':0,'b':0,'r':0},
    mapbox=dict(
        accesstoken=mapbox_access_token,
        # we can set map details here including center, pitch and bearing.. try playing  with these.
#         bearing=0,
# #         center=dict(
# #             lat=38,
# #             lon=-94
# #         ),
#         pitch=0,
        zoom=0.7,
        style='light'
    ),
    width=875, height=450
)

traces = [
            go.Scattermapbox(
            lat=site_lat,
            lon=site_lon,
            mode='markers',
            marker=dict(
                size=totals.apply(calc_marker_radius, args=(7,)),
                color='olive',
                opacity=0.8
            ),
            #text=locations_name,
            # we use a Python list comprehension to create an array of text labels for 
            # the country markers
            text=[f'{locations_name[i]} won {int(x)} total prizes' for i, x in enumerate(totals)],
            hoverinfo='text'
             )
]

fig = go.Figure(traces, layout=layout)
fig.show()

### Adding custom controls with Plotly
One of the cool features of Plotly’s interactive maps is the ability to add custom con‐
trols in Python which can be ported to the web as HTML+JS controls. The control
API is a little clunky, in my opinion, and limited to a small set of controls but the
ability to add dataset selectors, sliders, filters and the like is a great asset. Here we’ll
add a few buttons to our Nobel map, allowing the user to filter the dataset by category

In [19]:
print(df_country_category.head(2))

category   Chemistry  Economics  Literature  Peace  Physics  \
country                                                       
Argentina        1.0        NaN         NaN    2.0      NaN   
Australia        NaN        1.0         1.0    NaN      1.0   

category   Physiology or Medicine  Total        Lat         Lon  
country                                                          
Argentina                     2.0    5.0 -34.996496  -64.967282  
Australia                     6.0    9.0 -24.776109  134.755000  


In [20]:
df_country_category.fillna(0, inplace=True)

In [21]:
import plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

import pandas as pd

mapbox_access_token = "pk.eyJ1Ijoia3lyYW4tZGFsZSIsImEiOiJjanZkbWE0cjkwNjkxNDBwYW0xb3p4MmtqIn0.WP9fQgGMgu8_nmMxJwFsbg"

categories = ['Total', 'Chemistry',   'Economics', 'Literature','Peace', 'Physics','Physiology or Medicine',]
df_cc = df_country_category
# Plotly will take an array of latitude and longitudes, columns in our dataframe
site_lat = df_cc.Lat
site_lon = df_cc.Lon
# We'll use the locations name as a label (note it will label regions by associated country)
# Challenge! - try and fix this to add region and country labelling...
locations_name = df_cc.index

layout = go.Layout(
    title='Covid virus cases totals by country',
    #autosize=True,
    hovermode='closest',
    showlegend=False,
    margin ={'l':0,'t':0,'b':0,'r':0},
    mapbox=dict(
        accesstoken=mapbox_access_token,
        # we can set map details here including center, pitch and bearing.. try playing  with these.
#         bearing=0,
# #         center=dict(
# #             lat=38,
# #             lon=-94
# #         ),
#         pitch=0,
        zoom=0.75,
        style='light'
    ),
    width=875, height=450
    #width=750
)

fig = go.Figure(layout=layout)

# our significant dates (note descending order) 
#categories = ['total', 'Chemistry', 'Physics', 'Literature', 'Physiology or Medicine', 'Peace', 'Economics']
#categories = df.index
# some hex colors for our date trace markers
#colors = ['#920d35', '#6e415e', '#497586', '#24a9ae']
colors = ['#1b9e77','#d95f02','#7570b3','#e7298a','#66a61e','#e6ab02','#a6761d']
# We'll generate buttons for each of the dates with associated colors from the color array
buttons = []

# We'll want a button to make all the markers visible:
# buttons.append(
#   dict(
#     label="show all dates",
#     method="update",
#     args=[{"visible": [True, True, True, True]}]
#   )
# )
default_category = 'Total'
for i, category in enumerate(categories):
    # The add_trace method adds chart objects one at a time instead of all together using a data array
    visible = False
    if category == default_category:
        visible = True
    fig.add_trace(
        go.Scattermapbox(
            lat=site_lat,
            lon=site_lon,
            mode='markers',
            marker=dict(
                size=df_cc[category].apply(calc_marker_radius, args=(7,)),
                color=colors[i],
                opacity=0.8
            ),
            #text=locations_name,
            # we use a Python list comprehension to create an array of text labels for 
            # the country markers
            text=[f'{locations_name[i]} prizes for {category}: {int(x)}' for i, x in enumerate(df_cc[category])],
            hoverinfo='text',
            visible=visible
             ),    
    )
    # We start with a mask array of boolean False, one for each date
    # In Python [True] * 3 == [True, True, True]
    mask = [False] * len(categories)
    # We now cycle through the dates and associate this button with a True value for it's associated trace
    # i.e. button 5/19/20 has mask [True, False, False, False]
    # These masks will be used when defining the button to set which trace it makes visible
#     for i, _category in enumerate(categories):
#         if _category == category:
#             mask[i] = True
    mask[categories.index(category)] = True
    # Now we can use that boolean mask to add a button to our button list        
    buttons.append(
            dict(
                label=category,
                method="update",
                args=[{"visible": mask}],

                #args = [{"data": getNewMarkers("FM")}]
                #args=[{"y": [df["ice_cream_cones"], df["drinks"]]},],
            ),
    )
    

# We update our fig layout by adding the list of buttons we just generated to the figure.
# The x and y parameters allow positioning of the button row and we can also set the buttons' direction
fig.layout.update(
    updatemenus=[
        dict(
            type="buttons",
            direction="down",
            active=0,
            x=0.0,
            xanchor='left',
            y=0.65,
            showactive=True, # show the last button clicked
            buttons=buttons,
            #pad=dict(t=2,b=2,l=5,r=5),

        )
    ]
)

fig.show()

### From notebook to the web with Plotly

In [36]:
# Plot data
#fig = go.Figure(layout_template=None)
traces = []
for region in regions:
    name = region['label']
    traces.append(
        go.Scatter(
            y=df_regions[name],
            x=df_regions.index,
            name=name,
            mode="lines",
            hovertemplate=f"{name}<br>%{{x}}<br>$%{{y}}<extra></extra>",
            line=dict(dash=['solid', 'dash', 'dot'][len(traces)])
        )
    )
layout = go.Layout(xaxis_title="year", yaxis_title="cumulative prizes")
fig = go.Figure(traces, layout)
fig.show()

In [23]:
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [24]:
embed_string = plot(fig, output_type='div', include_plotlyjs="cdn")
embed_string

'<div>                        <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: \'local\'};</script>\n        <script src="https://cdn.plot.ly/plotly-2.14.0.min.js"></script>                <div id="5a745dd1-1313-48fd-ba4f-c8354341c0d2" class="plotly-graph-div" style="height:100%; width:100%;"></div>            <script type="text/javascript">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("5a745dd1-1313-48fd-ba4f-c8354341c0d2")) {                    Plotly.newPlot(                        "5a745dd1-1313-48fd-ba4f-c8354341c0d2",                        [{"hovertemplate":"N. America<br>%{x}<br>$%{y}<extra></extra>","mode":"lines","name":"N. America","x":[1901,1902,1903,1904,1905,1906,1907,1908,1909,1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,