In [216]:
import json
import csv

In [217]:
# Data

with open('world_cup_2018.json', encoding='utf8') as world_cup_file: 
    world_cup_data = json.load(world_cup_file)

with open('country_populations.csv') as population_file:
    reader = csv.DictReader(population_file)
    population_data = list(reader)

In [218]:
# Check that the overall data structure is a dictionary
# Check that the dictionary has 2 kyes, 'name' and 'rounds'

assert type(world_cup_data) == dict
assert list(world_cup_data.keys()) == ['name', 'rounds']

print(world_cup_data['name'])
print(len(world_cup_data['rounds']))
print(population_data[0])

World Cup 2018
20
{'': '2714', 'Country Name': 'Afghanistan', 'Country Code': 'AFG', 'Year': '1960', 'Value': '8996973'}


In [219]:
# Check that the overall data structure is a list
# Check that the 0th element is a dictionary

assert type(population_data) == list
assert type(population_data[0]) == dict

In [220]:
highest_lvl_keys = world_cup_data.keys()
print(highest_lvl_keys)

dict_keys(['name', 'rounds'])


In [221]:
print(world_cup_data['name'])
print(world_cup_data['rounds'][0])

World Cup 2018
{'name': 'Matchday 1', 'matches': [{'num': 1, 'date': '2018-06-14', 'time': '18:00', 'team1': {'name': 'Russia', 'code': 'RUS'}, 'team2': {'name': 'Saudi Arabia', 'code': 'KSA'}, 'score1': 5, 'score2': 0, 'score1i': 2, 'score2i': 0, 'goals1': [{'name': 'Gazinsky', 'minute': 12, 'score1': 1, 'score2': 0}, {'name': 'Cheryshev', 'minute': 43, 'score1': 2, 'score2': 0}, {'name': 'Dzyuba', 'minute': 71, 'score1': 3, 'score2': 0}, {'name': 'Cheryshev', 'minute': 90, 'offset': 1, 'score1': 4, 'score2': 0}, {'name': 'Golovin', 'minute': 90, 'offset': 4, 'score1': 5, 'score2': 0}], 'goals2': [], 'group': 'Group A', 'stadium': {'key': 'luzhniki', 'name': 'Luzhniki Stadium'}, 'city': 'Moscow', 'timezone': 'UTC+3'}]}


In [222]:
# Extracting Matches

matches = []
rounds = world_cup_data['rounds']

for round_ in rounds:
    round_matches = round_['matches']
    matches.extend(round_matches)

matches[0]

{'num': 1,
 'date': '2018-06-14',
 'time': '18:00',
 'team1': {'name': 'Russia', 'code': 'RUS'},
 'team2': {'name': 'Saudi Arabia', 'code': 'KSA'},
 'score1': 5,
 'score2': 0,
 'score1i': 2,
 'score2i': 0,
 'goals1': [{'name': 'Gazinsky', 'minute': 12, 'score1': 1, 'score2': 0},
  {'name': 'Cheryshev', 'minute': 43, 'score1': 2, 'score2': 0},
  {'name': 'Dzyuba', 'minute': 71, 'score1': 3, 'score2': 0},
  {'name': 'Cheryshev', 'minute': 90, 'offset': 1, 'score1': 4, 'score2': 0},
  {'name': 'Golovin', 'minute': 90, 'offset': 4, 'score1': 5, 'score2': 0}],
 'goals2': [],
 'group': 'Group A',
 'stadium': {'key': 'luzhniki', 'name': 'Luzhniki Stadium'},
 'city': 'Moscow',
 'timezone': 'UTC+3'}

In [223]:
assert len(matches) == 64
assert type(matches[0]) == dict

In [224]:
teams_set = set()

for match in matches:
    teams_set.add(match['team1']['name'])
    teams_set.add(match['team2']['name'])

teams = sorted(list(teams_set))
print(teams)

['Argentina', 'Australia', 'Belgium', 'Brazil', 'Colombia', 'Costa Rica', 'Croatia', 'Denmark', 'Egypt', 'England', 'France', 'Germany', 'Iceland', 'Iran', 'Japan', 'Mexico', 'Morocco', 'Nigeria', 'Panama', 'Peru', 'Poland', 'Portugal', 'Russia', 'Saudi Arabia', 'Senegal', 'Serbia', 'South Korea', 'Spain', 'Sweden', 'Switzerland', 'Tunisia', 'Uruguay']


In [225]:
assert type(teams) == list
assert len(teams) == 32
assert type(teams[0]) == str

In [226]:
combined_data = {team: {'wins': 0} for team in teams}

assert type(combined_data) == dict
assert type(list(combined_data.keys())[0]) == str
assert combined_data['Japan'] == {'wins': 0}

In [227]:
def find_winner(match):
    if match['score1'] > match['score2']:
        return match['team1']['name']
    elif match['score2'] > match['score1']:
        return match['team2']['name']
    else:
        return None

In [228]:
assert find_winner(matches[0]) == 'Russia'
assert find_winner(matches[1]) == 'Uruguay'
assert find_winner(matches[2]) == None

In [275]:
for match in matches:
    winner = find_winner(match)
    if winner:
        combined_data[winner]['wins'] += 1

combined_data

{'Argentina': {'wins': 2},
 'Australia': {'wins': 0},
 'Belgium': {'wins': 12},
 'Brazil': {'wins': 6},
 'Colombia': {'wins': 4},
 'Costa Rica': {'wins': 0},
 'Croatia': {'wins': 6},
 'Denmark': {'wins': 2},
 'Egypt': {'wins': 0},
 'England': {'wins': 6},
 'France': {'wins': 12},
 'Germany': {'wins': 2},
 'Iceland': {'wins': 0},
 'Iran': {'wins': 2},
 'Japan': {'wins': 2},
 'Mexico': {'wins': 4},
 'Morocco': {'wins': 0},
 'Nigeria': {'wins': 2},
 'Panama': {'wins': 0},
 'Peru': {'wins': 2},
 'Poland': {'wins': 2},
 'Portugal': {'wins': 2},
 'Russia': {'wins': 4},
 'Saudi Arabia': {'wins': 2},
 'Senegal': {'wins': 2},
 'Serbia': {'wins': 2},
 'South Korea': {'wins': 2},
 'Spain': {'wins': 2},
 'Sweden': {'wins': 6},
 'Switzerland': {'wins': 2},
 'Tunisia': {'wins': 2},
 'Uruguay': {'wins': 8}}

In [277]:
population_data_filtered = []

for record in population_data:
    if record['Country Name'] in teams and record['Year'] == '2018':
        population_data_filtered.append(record)

len(population_data_filtered)

27

In [279]:
population_record_samples = population_data_filtered[:5]
population_record_samples[2]

{'': '3834',
 'Country Name': 'Belgium',
 'Country Code': 'BEL',
 'Year': '2018',
 'Value': 11433256}

In [281]:
teams[13]

'Iran'

In [283]:
def normalize_location(country_name):
    """
    Given a country name, return the name that the
    country uses when playing in the FIFA World Cup
    """
    name_sub_dict = {
        "Russian Federation": "Russia",
        "Egypt, Arab Rep.": "Egypt",
        "Iran, Islamic Rep.": "Iran",
        "Korea, Rep.": "South Korea",
        "United Kingdom": "England"
    }
    return name_sub_dict.get(country_name, country_name)

print(normalize_location("Russian Federation"))
print(normalize_location("Argentina"))

Russia
Argentina


In [293]:
polulation_data_filtered = []

for record in population_data:
    country = record['Country Name'].strip().lower()
    teams_lower = [t.lower() for t in teams]
    
    if country in teams and record['Year'] == '2018':
        country = normalize_location(country)
        record["Country Name"] = country
        population_data_filtered.append(record)

len(population_data_filtered)

27

In [285]:
for record in population_data_filtered:
    if record["Value"] is not None:
        record["Value"] = int(record["Value"])

population_data_filtered[-1]

assert type(population_data_filtered[-1]["Value"]) == int

In [287]:
combined_data

{'Argentina': {'wins': 2},
 'Australia': {'wins': 0},
 'Belgium': {'wins': 12},
 'Brazil': {'wins': 6},
 'Colombia': {'wins': 4},
 'Costa Rica': {'wins': 0},
 'Croatia': {'wins': 6},
 'Denmark': {'wins': 2},
 'Egypt': {'wins': 0},
 'England': {'wins': 6},
 'France': {'wins': 12},
 'Germany': {'wins': 2},
 'Iceland': {'wins': 0},
 'Iran': {'wins': 2},
 'Japan': {'wins': 2},
 'Mexico': {'wins': 4},
 'Morocco': {'wins': 0},
 'Nigeria': {'wins': 2},
 'Panama': {'wins': 0},
 'Peru': {'wins': 2},
 'Poland': {'wins': 2},
 'Portugal': {'wins': 2},
 'Russia': {'wins': 4},
 'Saudi Arabia': {'wins': 2},
 'Senegal': {'wins': 2},
 'Serbia': {'wins': 2},
 'South Korea': {'wins': 2},
 'Spain': {'wins': 2},
 'Sweden': {'wins': 6},
 'Switzerland': {'wins': 2},
 'Tunisia': {'wins': 2},
 'Uruguay': {'wins': 8}}

In [297]:
for record in population_data_filtered:
    country = record["Country Name"]
    population = int(record["Value"]) if record["Value"] is not None else None
    
    for team in combined_data:
        if team.lower() == country.lower():
            combined_data[team]["population"] = population

combined_data

{'Argentina': {'wins': 2, 'population': 44494502},
 'Australia': {'wins': 0, 'population': 24982688},
 'Belgium': {'wins': 12, 'population': 11433256},
 'Brazil': {'wins': 6, 'population': 209469333},
 'Colombia': {'wins': 4, 'population': 49648685},
 'Costa Rica': {'wins': 0, 'population': 4999441},
 'Croatia': {'wins': 6, 'population': 4087843},
 'Denmark': {'wins': 2, 'population': 5793636},
 'Egypt': {'wins': 0},
 'England': {'wins': 6},
 'France': {'wins': 12, 'population': 66977107},
 'Germany': {'wins': 2, 'population': 82905782},
 'Iceland': {'wins': 0, 'population': 352721},
 'Iran': {'wins': 2},
 'Japan': {'wins': 2, 'population': 126529100},
 'Mexico': {'wins': 4, 'population': 126190788},
 'Morocco': {'wins': 0, 'population': 36029138},
 'Nigeria': {'wins': 2, 'population': 195874740},
 'Panama': {'wins': 0, 'population': 4176873},
 'Peru': {'wins': 2, 'population': 31989256},
 'Poland': {'wins': 2, 'population': 37974750},
 'Portugal': {'wins': 2, 'population': 10283822},


In [299]:
assert type(combined_data["Uruguay"]) == dict
assert type(combined_data["Uruguay"]["population"]) == int