### Python module/package imports for this chapter

In [1]:
import math, json, collections, itertools

In [2]:
import numpy as np
import matplotlib.pyplot as pp
%matplotlib inline

In [3]:
from mpl_toolkits.basemap import Basemap
import geopy

### Code and data needed from previous videos

In [4]:
medal = collections.namedtuple('medal',['year','athlete','team','event'])
medals = [medal(*line.strip().split('\t')) for line in open('goldmedals.txt','r')]

## Challenge solution

In [5]:
medals_by_athlete = collections.Counter(medal.athlete for medal in medals)

In [6]:
medals_by_athlete.most_common(5)

[('Paavo Nurmi', 9),
 ('Carl Lewis', 9),
 ('Usain Bolt', 9),
 ('Ray Ewry', 8),
 ('Allyson Felix', 6)]

In [7]:
events_by_athlete_set = collections.defaultdict(set)

for medal in medals:
    events_by_athlete_set[medal.athlete].add(medal.event)

In [8]:
def howmany(tup):
    return len(tup[1])

sorted(events_by_athlete_set.items(),key=howmany,reverse=True)

[('Paavo Nurmi',
  {'10000m men',
   '1500m men',
   '3000m team men',
   '5000m men',
   'cross country individual men',
   'cross country team men'}),
 ('Usain Bolt',
  {'100m',
   '100m men',
   '200m',
   '200m men',
   '4x100m relay',
   '4x100m relay men'}),
 ('Ville Ritola',
  {'10000m men',
   '3000m steeplechase men',
   '3000m team men',
   '5000m men',
   'cross country team men'}),
 ('Allyson Felix',
  {'200m women',
   '4x100m relay',
   '4x100m relay women',
   '4x400m relay',
   '4x400m relay women'}),
 ('Alvin Kraenzlein',
  {'110m hurdles men', '200m hurdles men', '60m men', 'long jump men'}),
 ('Hannes Kolehmainen',
  {'10000m men', '5000m men', 'cross country individual men', 'marathon men'}),
 ('Jesse Owens',
  {'100m men', '200m men', '4x100m relay men', 'long jump men'}),
 ('Fanny Blankers-Koen',
  {'100m women', '200m women', '4x100m relay women', '80m hurdles women'}),
 ('Betty Cuthbert',
  {'100m women', '200m women', '400m women', '4x100m relay women'}),
 ('Ca

In [9]:
def clean(event):
    return ' '.join(word for word in event.split() if word not in ('men','women'))

In [10]:
clean('100m')

'100m'

In [11]:
clean('100m men')

'100m'

In [12]:
events_by_athlete_set = collections.defaultdict(set)

for medal in medals:
    events_by_athlete_set[medal.athlete].add(clean(medal.event))

def howmany(tup):
    return len(tup[1])

sorted(events_by_athlete_set.items(),key=howmany,reverse=True)

[('Paavo Nurmi',
  {'10000m',
   '1500m',
   '3000m team',
   '5000m',
   'cross country individual',
   'cross country team'}),
 ('Ville Ritola',
  {'10000m',
   '3000m steeplechase',
   '3000m team',
   '5000m',
   'cross country team'}),
 ('Alvin Kraenzlein', {'110m hurdles', '200m hurdles', '60m', 'long jump'}),
 ('Hannes Kolehmainen',
  {'10000m', '5000m', 'cross country individual', 'marathon'}),
 ('Jesse Owens', {'100m', '200m', '4x100m relay', 'long jump'}),
 ('Fanny Blankers-Koen', {'100m', '200m', '4x100m relay', '80m hurdles'}),
 ('Betty Cuthbert', {'100m', '200m', '400m', '4x100m relay'}),
 ('Carl Lewis', {'100m', '200m', '4x100m relay', 'long jump'}),
 ('Ray Ewry',
  {'high jump, standing', 'long jump, standing', 'triple jump, standing'}),
 ('Archie Hahn', {'100m', '200m', '60m'}),
 ('James Lightbody', {'1500m', '3000m steeplechase', '800m'}),
 ('Harry Hillman', {'200m hurdles', '400m', '400m hurdles'}),
 ('Melvin Sheppard', {'1500m', '4x400m relay', '800m'}),
 ('Emil Zato