In this notebook I'll tackle two case studies:

# Dashboard (1) UK Population

This dashboard will display information about the UK population.
We're going to visualise the life expectancy at birth, and again at 65 for men and women in each constituent country of the UK. This data will be shown on two line charts.

Next, we'll look at the share of men and women in each region, displaying this data in a bar chart.
We'll then show the number of children, adults and pensioners in each region in the UK, once again, shown as a bar chart.

In [None]:
# <script>
#   function code_toggle() {
#     if (code_shown){
#       $('div.input').hide('500');
#       $('#toggleButton').val('Show Code')
#     } else {
#       $('div.input').show('500');
#       $('#toggleButton').val('Hide Code')
#     }
#     code_shown = !code_shown
#   }

#   $( document ).ready(function(){
#     code_shown=false;
#     $('div.input').hide()
#   });
# </script>
# <form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>

In [1]:
import pandas as pd

import plotly.plotly as py
from plotly.graph_objs import *
from plotly.tools import make_subplots
import plotly.offline as pyo

pyo.offline.init_notebook_mode(connected = True)

In [2]:
demographic = pd.read_csv("https://raw.githubusercontent.com/Hisham-Hussein/datasets/master/dashboardsData/\
UKRegionsPopulation.csv",
                         index_col = 0)
demographic.head()

Unnamed: 0,Country,Children,Adults,Pensioners,Male,Female
0,North East,524417.0,1601007.0,499197.0,1287177,1337444
1,North West,1521365.0,4351005.0,1301465.0,3534396,3639439
2,Yorkshire and The Humber,1145643.0,3271192.0,973741.0,2658411,2732165
3,East Midlands,971538.0,2827943.0,877557.0,2309197,2367841
4,West Midlands,1261883.0,3443312.0,1045805.0,2844758,2906242


In [3]:
lifeExp = pd.read_csv("https://raw.githubusercontent.com/Hisham-Hussein/datasets/master/dashboardsData/\
UKCountriesLifeExpectancy.csv",
                        index_col = 0)

lifeExp.head()

Unnamed: 0,Year,Var,Country,N,Sex
0,2002,age 65,N Ireland,15.73,Male
1,2003,age 65,N Ireland,15.92,Male
2,2004,age 65,N Ireland,16.17,Male
3,2005,age 65,N Ireland,16.4,Male
4,2006,age 65,N Ireland,16.7,Male


In [4]:
# setting up the dashboard Object

db = make_subplots(rows=2, cols = 2,
                   subplot_titles = ['Life expectancy at birth',
                                   'Life expectancy at 65',
                                   'Share of men and women in UK',
                                    'Age of people in the UK'], print_grid = False)




# Making the line charts

## lookups 
sex_color = {'Male' : "rgb(114,229,239)",
            'Female' : 'rgb(125,26,110)'}

country_symbol = {'England' : 'cross',
                'Scotland' : 'x',
                'Wales' : 'circle',
                'Scotland' : 'square',
                'N Ireland' : 'triangle'}
years = lifeExp['Year'].unique()


## Creating the hovertext column
def hover_text(record):
    return "<b>{country}</b><br>{life_expec:.0f} years<br><i>{gender}</i>".format(
        country=record['Country'], 
        life_expec=record['N'], 
        gender=record['Sex']
    )

lifeExp['text'] = lifeExp.apply(hover_text, axis='columns')


## the line plots
for life in lifeExp['Var'].unique():
    if life == 'age 65':
        show = True
        col = 2
    else:
        show = False
        col = 1
        
    for country in lifeExp['Country'].unique():
        
        for sex in lifeExp['Sex'].unique():
                       
            db.append_trace({'type': 'scatter',
                             'mode': 'markers+lines',
                             'x'   : years,
                             'y'   : lifeExp['N'][(lifeExp['Sex'] == sex) &
                                                  (lifeExp['Country'] == country) &
                                                   (lifeExp['Var'] == life)],
                             'text': lifeExp['text'][(lifeExp['Sex'] == sex) &
                                                  (lifeExp['Country'] == country) &
                                                   (lifeExp['Var'] == life)],
                             'hoverinfo': 'text',
                             'showlegend': show,
                             'name': '{}, {}'.format(country, sex),
                             'legendgroup': country,
                             'marker': {'color': sex_color[sex],
                                        'symbol': country_symbol[country],
                                        'size': 6,
                                        'line': {'width': 1,
                                                 'color': '#333'}}},
                            
                            
                          row = 1, col = col)
            

# making the firt bar chart

## creating hovertext columns (we need a text column per each original column except for the country)
def hoverText(row, varName):
    return "<b>{}</b><br>{:,.0f}<br><i>{}</i>".format(row['Country'], row[varName], varName)

demoTextLookup = {}

for col in demographic.columns:
    if col != 'Country':
        demographic['text{}'.format(col)] = demographic.apply(hoverText, args=(col,), axis = 1)
        demoTextLookup[col] = 'text{}'.format(col)
        
## create line chart for gender share
mf = demographic.sort_values(['Male', 'Female'], ascending=False)

for sex in ['Male', 'Female']:
    
    textCol = demoTextLookup[sex]
    
    db.append_trace({
        'type': 'bar',
        'x': mf['Country'],
        'y': mf[sex],
        'text': mf[textCol],
        'hoverinfo': 'text',
        'name': sex,
        'legendgroup': 'malefemale',
        'marker': {'color': sex_color[sex],
                   'line': {'width': 1,
                            'color': '#333'}}
    },
    row = 2, col = 1)
    
    
# making the second bar chart

ages = demographic.sort_values(by=['Adults','Pensioners','Children'],
                              ascending = False)

ageColours = [ "rgb(133,199,156)", "rgb(82,239,153)", "rgb(52,75,70)"]
ageGroups = ['Children','Adults','Pensioners']
ageLookup = dict(zip(ageGroups, ageColours))

for age in ageGroups:
    textCol = demoTextLookup[age]
    db.append_trace({'type' : 'bar',
                    'x' : ages['Country'],
                    'y' : ages[age],
                     'text' : ages[textCol],
                     'hoverinfo' : 'text',
                     'name' : age,
                     'legendgroup' : 'ages',
                    'marker' : {'color' : ageLookup[age],
                               'line' : {'width' : 1,
                                        'color' : '#333'}}},
                   row = 2, col = 2)
    

#Get the minimum and maximum to set the static range
maximum = max(demographic[ageGroups].max())
minimum = min(demographic[ageGroups].min())
diff = maximum - minimum

for i in range(3, 5):
    #1. Setting static range for bar chart
    db['layout']['yaxis{}'.format(i)].update({'range' : [0,maximum + (diff * 0.1)]})
    #3. Tickangle for x-axes on bar charts
    db['layout']['xaxis{}'.format(i)].update({'tickangle' : -45})
    
#4. Line chart axis titles
db['layout']['yaxis1'].update({'title' : 'Years'})
db['layout']['yaxis3'].update({'title' : 'Number of people'})

#2. Setting tracegroupgap
db['layout'].update({'legend' : {'tracegroupgap' : 25},
                     #5. Setting width and height
                    'width' : 1000,
                    'height' : 750,
                     #6. Add a bottom margin
                    'margin' : {'b' : 130},
                     #9. Hovermode to closest
                    'hovermode' : 'closest'})

#8. Adding a title for the dashboard  
db['layout']['annotations'] +=[{'text' : 'Demographics of the UK Population',
                                     'xref' : 'paper',
                                     'yref' : 'paper',
                                      'xanchor' : 'center',
                                    'showarrow' : False,
                                'font' : {'size' : 24},
                                     'x' : 0.5,
                                     'y' : 1.15}]
            
pyo.iplot(db)

# Dashboard (2) - Crime in London

In this second lesson on dashboards we're going to visualise some data about crime in London. The dataset we'll be using contains information on all the different crimes recorded by the Metropolitan police during 2016.

This is a 40mb raw dataset, so we'll be using the pandas groupby() function to summarise the data in different ways.

Our dashboard will have seven rows; the first two charts will occupy two rows each, whilst the last three charts will have one each.

The final three charts will show simple summaries of the data and will serve to give contextual information about the data to the data to help our readers understand the situation better. The first two charts we make will be a little more complex and are the focal point of this visualisation.

We'll create the contextual charts first, as doing so will help us to familiarise ourselves with the dataset.

First of all we'll use a line chart to show how the number of crimes changes each month.

Next, we'll create a bar chart showing the number of crimes in each location.

The next step will be to create another bar chart, this one showing the number of each different type of crime.

Now for the more complex charts . . . These will be a special type of scatterplot which allows us to show the interaction between different categories.

The first chart will show, for each type of crime, what is the most common outcome.

The second chart will show for each location, what is the most common type of crime.

In [5]:
crimes = pd.read_csv('Data/MetPoliceCrimeStats.csv',
                    index_col = 0)
crimes.head()

Unnamed: 0,Outcome Month,Outcome type,Crime Month,Crime type,Area
0,2016-01,Court case unable to proceed,2016-01,Violence and sexual offences,Enfield
3,2016-01,Investigation complete; no suspect identified,2016-01,Criminal damage and arson,Lambeth
4,2016-01,Investigation complete; no suspect identified,2016-01,Theft from the person,Lambeth
5,2016-01,Investigation complete; no suspect identified,2016-01,Violence and sexual offences,Lambeth
6,2016-01,Investigation complete; no suspect identified,2016-01,Theft from the person,Lambeth


## Making the Dashboard

This is the final plot. To see the code used to generate this dashboard, unfold the the code by pressing the button at the beginning of this notebook.

In [6]:
# Making the subplots object
metCrimes = make_subplots(rows = 7,
                         cols = 1,
                         specs = [[{'rowspan' : 2}],
                                 [None],
                                  [{'rowspan' : 2}],
                                 [None],
                                 [{}],
                                 [{}],
                                 [{}]],
                         subplot_titles = [ 'Outcomes for crime',
                                           'Types of crime by location',
                                           'Crimes by month',
                                          'Crimes by location',
                                          'Types of crime'
                                         ],
                         print_grid = False)

# (1) Line chart of crimes by month
crimesByMonth = crimes.groupby('Crime Month', as_index = False).count()


# Let's add a text column which will show the month name and the number of crimes
crimesByMonth['Crime Month'] = pd.to_datetime(crimesByMonth['Crime Month'], format='%Y-%m')

crimesByMonth['Month'] = crimesByMonth['Crime Month'].apply(lambda x: x.strftime("%B"))
crimesByMonth['text'] = crimesByMonth.apply(lambda x: "<b>{}:</b><br>{:,} crimes".format(x['Month'], x['Crime type']), axis = 1)

monthCrime = {'type' : 'scatter',
              'x' : crimesByMonth['Month'],
              'y' : crimesByMonth['Area'],
              'text' : crimesByMonth['text'],
              'showlegend' : False,
               'marker' : {'color' : '#944dc7',
                          'size' : 8,
                          'line' : {'width' : 1,
                                   'color' : '#333'}},
               'name' : 'Number of crimes<br>by month',
              'hoverinfo' : 'text'}

# append it and modify the y-axis
metCrimes.append_trace(monthCrime, 5, 1)
metCrimes['layout']['yaxis3'].update({'range' : [0, max(crimesByMonth['Area']) * 1.05]})
metCrimes['layout'].update({'height' : 1000})


# (2)Number of crimes in each location
# To get the number of crimes in each location we must group by the 'Area' column and count the number of rows in each group:
areaCrime = crimes.groupby('Area', as_index=False).count()

# recode the areas that aren't in London, and groupby again to get 'Not in London' as a separate Area category, then call .sum()
def inLondon(row):
    if row['Outcome Month'] > 1000: 
        return row['Area']
    else: 
        return 'Not in London'

areaCrime['Area'] = areaCrime.apply(inLondon, axis = 1)
areaCrime = areaCrime.groupby('Area', as_index = False).sum()

areaCrime.sort_values(by='Outcome Month', ascending=False, inplace = True)


areaBar = {'type' : 'bar',
      'x' : areaCrime['Area'],
      'y' : areaCrime['Outcome Month'],
       'name' : 'Crimes by Area',
           'showlegend' : False,
       'hoverinfo' : 'x+y',
      'marker' : {'color' : 'rgba(77, 83, 199, 0.6)',
                 'line' : {'width' : 1,
                          'color' : '#333'}}}

metCrimes.append_trace(areaBar, row = 6, col = 1)

metCrimes['layout']['xaxis4'].update({'tickangle' : 45})


# (3) Types of crime
crimeTypes = crimes.groupby('Crime type', as_index=False).count()
crimeTypes.sort_values(by='Outcome Month', ascending = False, inplace = True)

crimeBar = {'type' : 'bar',
      'x' : crimeTypes['Crime type'],
      'y' : crimeTypes['Outcome Month'],
       'name' : 'Crimes by type',
            'showlegend' : False,
       'hoverinfo' : 'x+y',
      'marker' : {'color' : 'rgba(226, 97, 50, 0.6)',
                 'line' : {'width' : 1,
                          'color' : '#333'}}}

metCrimes.append_trace(crimeBar, row = 7, col = 1)

# (4) Type of crime and location
typeLoc = crimes.groupby(['Crime type', 'Area'], as_index=False).count()

# we need to find out which locations are outside London. 
# We did this earlier for the bar chart showing the number of crimes by area, but this time, 
# rather than recoding the Area column we must instead make a new column showing whether the area is in London or not.
# We'll create a new column by applying the inLondon() function:

locationRecode = crimes.groupby(['Area'], as_index=False).count()
locationRecode['Recoded Area'] = locationRecode.apply(inLondon, axis = 1)

# merge this recoded DataFrame onto our grouped DataFrame
typeLoc = typeLoc.merge(locationRecode[['Area','Recoded Area']], how = 'left', on='Area')


# drop the old Area column, rename the 'Recoded Area' to 'Area', 
# and do the group by again but calling .sum() rather than .count() (as we did earlier):
typeLoc = typeLoc.drop(['Area'], axis = 1).rename(columns={'Recoded Area' : 'Area'})

typeLoc = typeLoc.groupby(['Crime type','Area'], as_index=False).sum()

# make a text column
typeLoc['text'] = typeLoc.apply(lambda x: "<b>{}</b><br>{}<br>{:,}".format(x['Area'], 
                                                                           x['Crime type'],
                                                                          x['Outcome Month']),
                               axis = 1)
typeLoc.head()

# make the plot
typeLocScatter = {'type' : 'scatter',
                  'mode' : 'markers',
                 'x' : typeLoc['Area'],
                 'y' : typeLoc['Crime type'],
                  'text' : typeLoc['text'],
                  'opacity' : 0.8,
                  'hoverinfo' : 'text',
                  'showlegend' : False,
                  'name' : 'Type and Location of crime',
                  'marker' : {'size' : typeLoc['Outcome Month'],
                             'sizeref' : 150,
                             'sizemin' : 1.5}
                 }

# append it to the dashboard
metCrimes.append_trace(typeLocScatter, row = 3, col = 1)


# (5) Types of crime by outcome
# This chart will show for each type of crime, how many crimes of that type resulted in a specifc outcome.
# first of all need to recode all of the different outcomes into fewer categories to allow us to plot them more easily.

def wideCoding(crime):
    if crime in ['Court case unable to proceed',
                 'Investigation complete; no suspect identified',
                'Formal action is not in the public interest',
                 'Offender given absolute discharge',
                'Unable to prosecute suspect']:
        return 'No judicial outcome'
    elif crime in ['Local resolution','Offender given a caution',
                   'Offender given a drugs possession warning',
                   'Offender given community sentence',
                   'Offender given conditional discharge',
                   'Offender given penalty notice',
                   'Offender fined',
                   'Offender deprived of property',
                   'Offender given suspended prison sentence',
                   'Offender otherwise dealt with',
                   'Offender ordered to pay compensation']:
        return 'Punished, not prison'
    elif crime in ['Suspect charged',
                   'Suspect charged as part of another case',
                  'Defendant sent to Crown Court']:
        return 'Outcome not yet known'
    else:
        return crime
    
crimes['Broad Outcome'] = crimes['Outcome type'].apply(wideCoding)

# group the DataFrame by the Crime type and Broad Outcome to get the data for our chart. We'll also make a text column
typeOutcome = crimes.groupby(['Crime type','Broad Outcome'], as_index=False).count()
typeOutcome['text'] = typeOutcome.apply(lambda x: "<b>{}</b><br>{}<br>{:,}".format(x['Crime type'],
                                                                                   x['Broad Outcome'],
                                                                                  x['Outcome Month']),
                               axis = 1)

# make the same type of chart that we just previously made
typeOutcomeScatter = {'type' : 'scatter',
                  'mode' : 'markers',
                 'x' : typeOutcome['Broad Outcome'],
                 'y' : typeOutcome['Crime type'],
                  'text' : typeOutcome['text'],
                  'opacity' : 0.8,
                  'hoverinfo' : 'text',
                      'showlegend' : False,
                  'name' : 'Type of crime and outcome',
                  'marker' : {'size' : typeOutcome['Outcome Month'],
                             'sizeref' : 1000,
                             'sizemin' : 2}
                 }

# append it
metCrimes.append_trace(typeOutcomeScatter, row = 1, col = 1)

# (6) Changing the layout
# - Rotate the ticklabels
# - Increase the margins
# - Change the domain of the charts
# - Increase the height of the dashboard

# rotate the ticklabels on the x2 axis. I'm also going to set showgrid to False for the x1 axis
metCrimes['layout']['xaxis2'].update({'tickangle' : 45})
metCrimes['layout']['xaxis1'].update({'showgrid' : False})

# change the left, bottom and right margins to accomodate the ticklabels:
metCrimes['layout'].update({'margin' : {'l' : 180,
                                        'b' : 120,
                                        'r' : 90}})

# Updating chart titles and domains:

metCrimes['layout'].update({'height' : 1500})
metCrimes['layout']['annotations'][0]['y'] = 0.95   # move down on the first chart
# move the entire second chart up by changing the domain and y-position of the chart title
metCrimes['layout']['yaxis2'].update({'domain' : [0.51, 0.74]})
metCrimes['layout']['annotations'][1]['y'] = 0.73

# do the same for the Crimes by location chart:
metCrimes['layout']['yaxis4'].update({'domain' : [0.19, 0.27]})
metCrimes['layout']['annotations'][3]['y'] = 0.26


# Setting a title and changing hovermode
metCrimes['layout']['annotations'].append({'text' : 'Crime recorded by the Metropolitan Police in 2016',
                                      'showarrow' : False,
                                       'xref' : 'paper',
                                      'x' : 0.5,
                                      'xanchor' : 'centre',
                                      'yref' : 'paper',
                                      'y' : 1,
                                      'yanchor' : 'top',
                                      'font' : {'size' : 24}})

metCrimes['layout'].update({'hovermode' : 'closest'})
pyo.iplot(metCrimes)