### Python module/package imports for this chapter

In [1]:
import math, json, collections, itertools

In [2]:
import numpy as np
import matplotlib.pyplot as pp
%matplotlib inline

In [3]:
from mpl_toolkits.basemap import Basemap
import geopy

### Code and data needed from previous videos

In [4]:
medal = collections.namedtuple('medal',['year','athlete','team','event'])
medals = [medal(*line.strip().split('\t')) for line in open('goldmedals.txt','r')]

## Challenge solution

#### Top 5 athletes with the most gold medals
Correct

In [6]:
counter = collections.Counter(medal.athlete for medal in medals)

In [8]:
counter.most_common(5)

[('Paavo Nurmi', 9),
 ('Carl Lewis', 9),
 ('Usain Bolt', 9),
 ('Ray Ewry', 8),
 ('Allyson Felix', 6)]

#### Top 5 athletes who won gold medals in the largest variety of events
Incorrect (check at the end for solution)

In [10]:
events = {medal.event for medal in medals}

In [11]:
events

{'10000m men',
 '10000m walk men',
 '10000m walk women',
 '10000m women',
 '100m',
 '100m hurdles',
 '100m hurdles women',
 '100m men',
 '100m women',
 '10miles walk (16093m)',
 '10miles walk (16093m) men',
 '10miles walk (16093m) women',
 '110m hurdles',
 '110m hurdles men',
 '1500m',
 '1500m men',
 '1500m women',
 '200m',
 '200m hurdles men',
 '200m men',
 '200m women',
 '20km race walk women',
 '20km walk',
 '20km walk men',
 '20km walk women',
 '3000m steeplechase',
 '3000m steeplechase men',
 '3000m steeplechase women',
 '3000m team men',
 '3000m walk men',
 '3000m women',
 '3200m steeplechase men',
 '3500m walk men',
 '3miles team (4828m) men',
 '4000m steeplechase men',
 '400m',
 '400m hurdles',
 '400m hurdles men',
 '400m hurdles women',
 '400m men',
 '400m women',
 '4miles team men',
 '4x100m relay',
 '4x100m relay men',
 '4x100m relay women',
 '4x400m relay',
 '4x400m relay men',
 '4x400m relay women',
 '5 miles (8047m) men',
 '5000m men',
 '5000m team',
 '5000m team men',
 '

In [14]:
athlete_and_event = collections.namedtuple('athlete_and_event', ['athlete', 'event'])

In [19]:
athletes_and_events = {athlete_and_event(medal.athlete, medal.event) for medal in medals}

In [21]:
athletes_and_events

{athlete_and_event(athlete='Sherone Simpson', event='4x100m relay women'),
 athlete_and_event(athlete='Derrick Brew', event='4x400m relay men'),
 athlete_and_event(athlete='Reginald Walker', event='100m men'),
 athlete_and_event(athlete='Arnold Jackson', event='1500m men'),
 athlete_and_event(athlete='Fanny Blankers-Koen', event='100m women'),
 athlete_and_event(athlete='Walter Francis Davis', event='high jump men'),
 athlete_and_event(athlete='James Ray Hines', event='100m men'),
 athlete_and_event(athlete='Juha Tiainen', event='hammer throw men'),
 athlete_and_event(athlete='Henry Argue Russell', event='4x100m relay men'),
 athlete_and_event(athlete='Foy Draper', event='4x100m relay men'),
 athlete_and_event(athlete='Helen Herring Stephens', event='100m women'),
 athlete_and_event(athlete='Tell Schirnding Berna', event='3000m team men'),
 athlete_and_event(athlete='Howard Valentine', event='4miles team men'),
 athlete_and_event(athlete='Karin Richert-Balzer', event='80m hurdles women

In [22]:
counter = collections.Counter(athlete_and_event.athlete for athlete_and_event in athletes_and_events)

In [23]:
counter.most_common(5)

[('Usain Bolt', 6),
 ('Paavo Nurmi', 6),
 ('Ville Ritola', 5),
 ('Allyson Felix', 5),
 ('Fanny Blankers-Koen', 4)]

##### Correct solution for Q2

In [37]:
events_by_athlete_set = collections.defaultdict(set)

In [38]:
def clean(event):
    return ' '.join(word for word in event.split() if word not in ('men', 'women'))

In [39]:
for medal in medals:
    events_by_athlete_set[medal.athlete].add(clean(medal.event))

In [40]:
def count_events(events_tuple):
    return len(events_tuple[1])

sorted(events_by_athlete_set.items(), key = count_events, reverse = True)

[('Paavo Nurmi',
  {'10000m',
   '1500m',
   '3000m team',
   '5000m',
   'cross country individual',
   'cross country team'}),
 ('Ville Ritola',
  {'10000m',
   '3000m steeplechase',
   '3000m team',
   '5000m',
   'cross country team'}),
 ('Alvin Kraenzlein', {'110m hurdles', '200m hurdles', '60m', 'long jump'}),
 ('Hannes Kolehmainen',
  {'10000m', '5000m', 'cross country individual', 'marathon'}),
 ('Jesse Owens', {'100m', '200m', '4x100m relay', 'long jump'}),
 ('Fanny Blankers-Koen', {'100m', '200m', '4x100m relay', '80m hurdles'}),
 ('Betty Cuthbert', {'100m', '200m', '400m', '4x100m relay'}),
 ('Carl Lewis', {'100m', '200m', '4x100m relay', 'long jump'}),
 ('Ray Ewry',
  {'high jump, standing', 'long jump, standing', 'triple jump, standing'}),
 ('Archie Hahn', {'100m', '200m', '60m'}),
 ('James Lightbody', {'1500m', '3000m steeplechase', '800m'}),
 ('Harry Hillman', {'200m hurdles', '400m', '400m hurdles'}),
 ('Melvin Sheppard', {'1500m', '4x400m relay', '800m'}),
 ('Emil Zato