# Python: List comprehension

Goals: 

* Interesting new functions: enumerate() and item()

* Discovering the lists comprehension and its advantages

* Real case: dataset on the historical members of the American Congress

* Count and determine the most frequent first names

## The enumerate function

In [1]:
# Motivation
students = ["Daouda", "Moha", "Seyni", "Khadir", "Mamadou"]
ages = [16, 12, 17, 10, 15]

Let's display for each student his age.

In [2]:
for student in students:
    print(student)
for age in ages:
    print(age)

Daouda
Moha
Seyni
Khadir
Mamadou
16
12
17
10
15


We notice that by displaying the elements of the first list, we are not able to display the elements of the second list. To do this, Python's **enumerate()** function can help us do this task more easily.

In [3]:
# Overview
for index, student in enumerate(students):
    print("Index:", index)
    print("Student:", student)

Index: 0
Student: Daouda
Index: 1
Student: Moha
Index: 2
Student: Seyni
Index: 3
Student: Khadir
Index: 4
Student: Mamadou


Thus with the index, it is possible to retrieve the age of each student.

In [4]:
# Example 1
for index, student in enumerate(students):
    print("Student:", student)
    print("Age:", ages[index])

Student: Daouda
Age: 16
Student: Moha
Age: 12
Student: Seyni
Age: 17
Student: Khadir
Age: 10
Student: Mamadou
Age: 15


In [5]:
# Example 2
cars = [["Black", "Tesla", "Model X"], ["Grey", "Tesla", "Model S Plaid"]]
prices = [114990, 129990]

Let's use the **enumerate()** function to add the price to each car.

In [6]:
for i, car in enumerate(cars):
    car.append(prices[i])

In [7]:
print(cars)

[['Black', 'Tesla', 'Model X', 114990], ['Grey', 'Tesla', 'Model S Plaid', 129990]]


## List comprehension

In [8]:
# Motivation
animals = ["Dog", "Tiger", "Lion", "Cow", "Snake"]
animals_lenght = []

for animal in animals:
    animals_lenght.append(len(animal))

In [9]:
print(animals_lenght)

[3, 5, 4, 3, 5]


In [10]:
# Use of list comprehension
animals_lenght = [len(animal) for animal in animals]
animals_lenght

[3, 5, 4, 3, 5]

In [11]:
# Example
prices = [10, 150, 200, 350]
prices_doubled = [price * 2 for price in prices]
prices_doubled

[20, 300, 400, 700]

## Counting female names

### Training

In [12]:
import csv

f = open("legislators.csv")
legislators = list(csv.reader(f))

In [13]:
for row in legislators:
    
    birthday = row[2]
    birth_year = birthday.split('-')[0]
    
    try:
        birth_year = int(birth_year)
    except Exception:
        birth_year = 0
    
    row.append(birth_year)

In [14]:
legislators[0][7] = "birth_year"

In [15]:
name_counts = {}

for row in legislators:
    
    gender = row[3]
    year = row[7]
    
    if gender == 'F' and year > 1950:
        name = row[1]
        if name in name_counts:
            name_counts[name] += 1
        else:
            name_counts[name] = 1

In [16]:
print(name_counts)

{'Enid': 1, 'Lynn': 1, 'Karen': 1, 'Denise': 1, 'Katherine': 1, 'Melissa': 2, 'Blanche': 1, 'Cynthia': 1, 'Shelley': 2, 'Nancy': 1, 'Deborah': 2, 'Heather': 1, 'Kathleen': 2, 'Mary': 2, 'Stephanie': 1, 'Betsy': 1, 'Hilda': 1, 'Ellen': 1, 'Gabrielle': 1, 'Sandy': 1, 'Ann Marie': 1, 'Nan': 1, 'Laura': 1, 'Jean': 1, 'Betty': 1}


## The None object

In [17]:
# Motivation 1
values = [2, 12, 60]
max_value = 0

for value in values:
    if value > max_value:
        max_value = value

print(max_value)

60


In [18]:
# Motivation 2
values = [-2, -12, -60]
max_value = 0

for value in values:
    if value > max_value:
        max_value = value

print(max_value)

0


In [19]:
# With None
values = [-2, -12, -60]
max_value = None

for value in values:
    if max_value is None or value > max_value:
        max_value = value

print(max_value)

-2


### Training

In [20]:
values = [None, 1, 45, None, 75]
check_bool = [x is not None and x > 30 for x in values]
check_bool

[False, False, True, False, True]

## Application: most frequent female names

### Training

In [21]:
max_value = None

for key in name_counts:
    value = name_counts[key]
    
    if max_value is None or value > max_value:
        max_value = value

In [22]:
print(name_counts)

{'Enid': 1, 'Lynn': 1, 'Karen': 1, 'Denise': 1, 'Katherine': 1, 'Melissa': 2, 'Blanche': 1, 'Cynthia': 1, 'Shelley': 2, 'Nancy': 1, 'Deborah': 2, 'Heather': 1, 'Kathleen': 2, 'Mary': 2, 'Stephanie': 1, 'Betsy': 1, 'Hilda': 1, 'Ellen': 1, 'Gabrielle': 1, 'Sandy': 1, 'Ann Marie': 1, 'Nan': 1, 'Laura': 1, 'Jean': 1, 'Betty': 1}


In [23]:
print(max_value)

2


## The items method

In [24]:
# Example
fruits = {
    "apple" : 12,
    "banana" : 5,
    "orange" : 20
}

In [25]:
for fruit, number in fruits.items():
    print(fruit, ":", number)

apple : 12
banana : 5
orange : 20


## Find frequent first names

### Training 1

In [26]:
top_female_names = [k for k, v in name_counts.items() if v == 2]
top_female_names

['Melissa', 'Shelley', 'Deborah', 'Kathleen', 'Mary']

### Training 2

In [27]:
top_male_names = []
male_name_counts = {}

for row in legislators:
    if row[3] == "M" and row[7] > 1940:
        name = row[1]
        if name in male_name_counts:
            male_name_counts[name] += 1
        else:
            male_name_counts[name] = 1

In [28]:
top_male_count = None

for name, count in male_name_counts.items():
    if top_male_count is None or count > top_male_count:
        top_male_count = count

In [29]:
for name, count in male_name_counts.items():
    if count == top_male_count:
        top_male_names.append(name)

In [30]:
print(top_male_names)

['John']


## Challenge

### Dataset

In [31]:
import csv

f = open("nfl_suspensions_data.csv")
nfl_suspensions = list(csv.reader(f))
nfl_suspensions = nfl_suspensions[1:]

In [32]:
print(nfl_suspensions[:5])

[['F. Davis', 'WAS', 'Indef.', 'Substance abuse, repeated offense', 'Marijuana-related', '2014', 'http://www.cbssports.com/nfl/eye-on-football/24448694/redskins-te-fred-davis-suspended-Indefiniteinitely-by-nfl'], ['J. Blackmon', 'JAX', 'Indef.', 'Substance abuse, repeated offense', '', '2014', 'http://espn.go.com/nfl/story/_/id/11257934/justin-blackmon-jacksonville-jaguars-arrested-marijuana-possession'], ['L. Brazill', 'IND', 'Indef.', 'Substance abuse, repeated offense', '', '2014', 'http://www.nfl.com/news/story/0ap2000000364622/article/lavon-brazill-released-by-colts-in-wake-of-suspension'], ['T. Jackson', 'WAS', 'Indef.', 'Substance abuse, repeated offense', '', '2014', 'http://www.nfl.com/news/story/0ap2000000364087/article/tanard-jackson-suspended-Indefiniteinitely-by-nfl'], ['M. Hapes', 'NYG', 'Indef.', 'Personal conduct', 'Gambling-related', '1946', 'http://espn.go.com/blog/nflnation/tag/_/name/frank-filchock']]


In [33]:
years = {}

for suspension in nfl_suspensions:
    
    row_year = suspension[5]
    if row_year in years:
        years[row_year] += 1
    else:
        years[row_year] = 1

In [34]:
print(years)

{'2014': 29, '1946': 1, '1947': 1, '2010': 21, '2008': 10, '2007': 17, '1983': 1, '2009': 10, '2005': 8, '2000': 1, '2012': 45, '2001': 3, '2006': 11, '1989': 17, '   ': 1, '1963': 1, '2013': 40, '1990': 3, '2011': 13, '2004': 6, '2002': 7, '2003': 9, '1997': 3, '1999': 5, '1993': 1, '1995': 1, '1998': 2, '1994': 1, '1986': 1}


### Unique values

In [35]:
teams = [row[1] for row in nfl_suspensions]
unique_teams = set(teams)

In [36]:
print(unique_teams)

{'HOU', 'SEA', 'JAX', 'TEN', 'CIN', 'LA', 'GB', 'DEN', 'TB', 'SD', 'NYG', 'WAS', 'DAL', 'MIN', 'OAK', 'NE', 'DET', 'SF', 'CLE', 'BUF', 'CHI', 'NO', 'STL', 'BAL', 'FREE', 'CAR', 'IND', 'NYJ', 'PIT', 'MIA', 'ARI', 'PHI', 'KC', 'ATL'}


In [37]:
games = [row[2] for row in nfl_suspensions]
unique_games = set(games)

In [38]:
print(unique_games)

{'2', '36', 'Indef.', '10', '4', '14', '3', '16', '1', '20', '6', '8', '32', '5'}


### Suspension class

In [39]:
class Suspension():
    def __init__(self, row):
        self.name = row[0]
        self.team = row[1]
        self.games = row[2]
        self.year = row[5]

In [40]:
third_suspension = Suspension(nfl_suspensions[2])

In [41]:
print(third_suspension.name, "|", third_suspension.team, "|", third_suspension.games, "|", third_suspension.year)

L. Brazill | IND | Indef. | 2014


### Improved suspension class

In [42]:
class Suspension():
    def __init__(self, row):
        self.name = row[0]
        self.team = row[1]
        self.games = row[2]
        try:
            self.year = int(row[5])
        except Exception:
            self.year = 0
            
    def get_year(self):
        return self.year

In [43]:
missing_year = Suspension(nfl_suspensions[22])
get_missing_year = missing_year.get_year()

In [44]:
print(get_missing_year)

0
