### Reading data from CSV files using csv package

In [None]:
import csv

In [None]:
# Read Cities.csv data into default list format and print all rows
# Make sure data file is in same folder as notebook
# Note all values are read as strings
with open('Cities.csv') as f:
    rows = csv.reader(f)
    for r in rows:
        print(r)
# Add print rows

In [None]:
# Same as previous except use dictionary format
with open('Cities.csv') as f:
    rows = csv.DictReader(f)
    for r in rows:
        print(r)

In [None]:
# Print the name and longitude of all cities with longitude < 0
# Use dictionary format
with open('Cities.csv') as f:
    rows = csv.DictReader(f)
    for r in rows:
        if r['longitude'] < 0:
            print(r['city'], r['longitude'])
# What happened?

In [None]:
# Same but using list format
with open('Cities.csv') as f:
    rows = csv.reader(f)
    for r in rows:
        if float(r[3]) < 0:
            print(r[0], r[3])
# Fix header row with next(rows)

In [None]:
# Perform join of cities and countries, print city and whether in EU
with open('Cities.csv') as f1:
    rows1 = csv.DictReader(f1)
    for city in rows1:
        with open('Countries.csv') as f2:
            rows2 = csv.DictReader(f2)
            for country in rows2:
                if city['country'] == country['country']:
                    print(city['city'], country['EU'])

### <font color="green">Your Turn</font>

In [None]:
# Using Countries.csv and reading in dictionary format, find
# all countries that have coastline and are not in the EU.
# Print the list of countries and their highest points.
# Note: for the "and" of two conditions C1 and C2, use "C1 and C2"
# Hint: The copy-paste-modify approach to programming
# is highly recommended!

### Reading data into Python data structures

In [None]:
# Read Cities.csv data into list of dictionaries
cities = []
with open('Cities.csv') as f:
    rows = csv.DictReader(f)
    for r in rows:
        cities.append(r)
print(cities)

In [None]:
# Perform join of cities and countries, print city and whether in EU
cities = []
countries = []
with open('Cities.csv') as f:
    rows = csv.DictReader(f)
    for r in rows:
        cities.append(r)
with open('Countries.csv') as f:
    rows = csv.DictReader(f)
    for r in rows:
        countries.append(r)
for city in cities:
    for country in countries:
        if city['country'] == country['country']:
            print(city['city'], country['EU'])

In [None]:
# Compute overall average city temperature
temps = []
with open('Cities.csv') as f:
    rows = csv.DictReader(f)
    for r in rows:
        temps.append(float(r['temperature'])) 
print(sum(temps)/len(temps))

In [None]:
# Compute average city temperature for each country
# Lots to notice here in terms of using dictionaries!
sums = {} # dictionary with key = country and value = sum of temperatures
counts = {} # dictionary with key = country and value = number of cities
with open('Cities.csv') as f:
    rows = csv.DictReader(f)
    for r in rows:
        if r['country'] in sums:
            sums[r['country']] += float(r['temperature'])
            counts[r['country']] += 1
        else:
            sums[r['country']] = float(r['temperature'])
            counts[r['country']] = 1
for country in sums:
    print(country, sums[country]/counts[country])

### <font color="green">Your Turn</font>

In [None]:
# Compute the average longitude and average latitude of all
# cities that are in an EU country. Print the two values.
# First four lines are included as a hint:
lats = [] # list of latitudes of cities in an EU country
longs = [] # list of longitudes of cities in an EU country
cities = []
countries = []
# your code here

### Computing average directly using NumPy package

In [None]:
import numpy as np

In [None]:
# Compute overall average city temperature
temps = []
with open('Cities.csv') as f:
    rows = csv.DictReader(f)
    for r in rows:
        temps.append(float(r['temperature'])) 
print(np.average(temps))

In [None]:
# Compute average city temperature for each country
temps = {} # dictionary with key = country and value = list of temperatures
with open('Cities.csv') as f:
    rows = csv.DictReader(f)
    for r in rows:
        if r['country'] in temps:
            temps[r['country']].append(float(r['temperature']))
        else:
            temps[r['country']] = [float(r['temperature'])]
for country in temps:
    print(country, np.average(temps[country]))

### Minimum and maximum

In [None]:
# Overall minimum and maximum temperatures
temps = [] # list of all temperatures
with open('Cities.csv') as f:
    rows = csv.DictReader(f)
    for r in rows:
        temps.append(float(r['temperature']))
print('Minimum:', min(temps))
print('Maximum:', max(temps))

In [None]:
# Alternative method
min = 100.00 # greater than any possible minimum
max = -100.00 # smaller than any possible maximum
with open('Cities.csv') as f:
    rows = csv.DictReader(f)
    for r in rows:
        if float(r['temperature']) < min:
            min = float(r['temperature'])
        if float(r['temperature']) > max:
            max = float(r['temperature'])
print('Minimum:', min)
print('Maximum:', max)

### <font color="green">Your Turn</font>

In [None]:
# Determine which country has the lowest average city temperature
# and which country has the highest average city temperature.
# Print the two countries.
# Hint: Start with code above that computes average temperatures
# for each country, then adapt the second min/max method.
# Suggestion: Get lowest working then add highest

### <font color="green">Your Turn: World Cup Data</font>

In [None]:
# What player on a team with “ia” in the team name played less than
# 200 minutes and made more than 100 passes? Print the player surname.
# Note: In Python, use "'abc' in s" to check whether string s contains'abc'
# Reminder: Convert minutes and passes to integers before comparing to values

In [None]:
# What is the average number of passes made by defenders? By forwards?

In [None]:
# Which team has the highest ratio of goalsFor to goalsAgainst?
# Print the team name only.
# Reminder: Use float() to make sure you're doing floating point division
# Hint: Use two variables to keep track of highest ratio seen so far
# and team with that ratio:
ratio = 0 # highest ratio seen so far
team = '' # team with highest ratio
# Your code here

In [None]:
# How many players on a team with ranking <10 played more than 350 minutes?
# Reminder: Convert ranking and minutes to integers before comparing to values
# Hint: Compute join of Players and Teams, using a variable to count number of
# players satisfying requirement

In [None]:
# BONUS!
# Write a loop that interactively asks the user to enter a team name.
# If the team exists, print how many games the team played, how many
# yellow cards and red cards the team had, and the average number of
# minutes played by players on that team.
# If the team doesn't exist, print "Team not in 2010 World Cup".
# If 'quit' is entered, terminate the loop.
# Note: To read a string from the user instead of a number, use
# raw_input() instead of input()