# Part 1 Reading CSV Files with csv.reader
#### 1a-b).  Keep your imports at the top of your notebook

In [3]:
import csv

#### 1c). Open the "movies.csv" file with `csv.reader`

In [12]:
with open("movies.csv", "r") as movies_fd: # open a file context
    csv_data = csv.reader(movies_fd)
    movies = list(csv_data) # converts the *iterator* into a list

IOError: [Errno 2] No such file or directory: 'movies.csv'

#### 1d-e.)  Fix the IOError, and unpack the data.zip 

In [13]:
try:
    with open("data/movies.csv", "r") as movies_fd: # open a file context
        csv_data = csv.reader(movies_fd)
        movies = list(csv_data) # converts the *iterator* into a list
except IOError as ioe:
    print("I/O Error occurred: %s" % ioe)

#### 1f) Print the movies list

In [14]:
print movies 

[['Title', 'Year', 'Rated', 'Released', 'Runtime', 'Genre', 'Director', 'Writers', 'Actors', 'Plot', 'Language', 'Country', 'Awards', 'Poster', 'Metascore', 'imdbRating', 'imdbVotes', 'imdbID', 'Type', 'Response'], ['Casablanca', '1942', 'PG', '23-Jan-43', '102 min', 'Drama, Romance, War', 'Michael Curtiz', 'Julius J. Epstein (screenplay), Philip G. Epstein (screenplay), Howard Koch (screenplay), Murray Burnett (play), Joan Alison (play)', 'Humphrey Bogart, Ingrid Bergman, Paul Henreid, Claude Rains', 'Set in Casablanca, Morocco during the early days of World War II: An American expatriate meets a former lover, with unforeseen complications.', 'English, French, German, Italian', 'USA', 'Won 3 Oscars. Another 6 wins & 6 nominations.', 'http://ia.media-imdb.com/images/M/MV5BMjQwNDYyNTk2N15BMl5BanBnXkFtZTgwMjQ0OTMyMjE@._V1_SX300.jpg', 'N/A', '8.6', '357332', 'tt0034583', 'movie', 'TRUE'], ['The Big Sleep', '1946', 'APPROVED', '31-Aug-46', '114 min', 'Crime, Film-Noir, Mystery', 'Howard Ha

#### 1g-h) Print the Movies (use list *slicing* to access only the data rows in the movies list)

In [15]:
for row in movies[1:]:
    print("- %s (%s)" % (row[0], row[1]))
    print("  Directed by: %s" % row[6])
    print("  Starring: %s" % row[8])
    print("  Plot: %s" % row[9])

- Casablanca (1942)
  Directed by: Michael Curtiz
  Starring: Humphrey Bogart, Ingrid Bergman, Paul Henreid, Claude Rains
  Plot: Set in Casablanca, Morocco during the early days of World War II: An American expatriate meets a former lover, with unforeseen complications.
- The Big Sleep (1946)
  Directed by: Howard Hawks
  Starring: Humphrey Bogart, Lauren Bacall, John Ridgely, Martha Vickers
  Plot: Private detective Philip Marlowe is hired by a rich family. Before the complex case is over, he's seen murder, blackmail, and what might be love.
- The African Queen (1951)
  Directed by: John Huston
  Starring: Humphrey Bogart, Katharine Hepburn, Robert Morley, Peter Bull
  Plot: In Africa during WWI, a gin-swilling riverboat captain is persuaded by a strait-laced missionary to use his boat to attack an enemy warship.


#### 1i) Compute the *number of records*

In [16]:
# Notice the use of array (list) slicing with [1:]
print("Total number of movies: %s" % len(movies[1:]))

Total number of movies: 3


## Part 1 Challenge
#### Convert the list of lists into a list of dicts (dictionaries)

In [21]:
# Get the header
movie_header = movies[0]

# Define a new array to load dictionaries into
movie_as_dicts = [] 

# Iterate over the movie rows (skipping the header)
for m in movies[1:]:
    # Use list indexing to pack a dictionary with the columns
    row_dict = {}
    for i in range(len(m)):
        row_dict[movie_header[i]] = m[i]
    # Append the new row dictionary to movie_as_dict
    movie_as_dicts.append(row_dict)

# Display results
movie_as_dicts

[{'Actors': 'Humphrey Bogart, Ingrid Bergman, Paul Henreid, Claude Rains',
  'Awards': 'Won 3 Oscars. Another 6 wins & 6 nominations.',
  'Country': 'USA',
  'Director': 'Michael Curtiz',
  'Genre': 'Drama, Romance, War',
  'Language': 'English, French, German, Italian',
  'Metascore': 'N/A',
  'Plot': 'Set in Casablanca, Morocco during the early days of World War II: An American expatriate meets a former lover, with unforeseen complications.',
  'Poster': 'http://ia.media-imdb.com/images/M/MV5BMjQwNDYyNTk2N15BMl5BanBnXkFtZTgwMjQ0OTMyMjE@._V1_SX300.jpg',
  'Rated': 'PG',
  'Released': '23-Jan-43',
  'Response': 'TRUE',
  'Runtime': '102 min',
  'Title': 'Casablanca',
  'Type': 'movie',
  'Writers': 'Julius J. Epstein (screenplay), Philip G. Epstein (screenplay), Howard Koch (screenplay), Murray Burnett (play), Joan Alison (play)',
  'Year': '1942',
  'imdbID': 'tt0034583',
  'imdbRating': '8.6',
  'imdbVotes': '357332'},
 {'Actors': 'Humphrey Bogart, Lauren Bacall, John Ridgely, Martha

#### Here is the equivalent, but more *pythonic* with List Comprehensions

In [27]:
# Use a list comprehension over the movie rows (i.e. movies[1:])
# - m is a row of values, and movies[0] is the header row
# - zip() pairs each header column name and column value as tuples
# - dict takes a list of (key, value) tuples and converts to a dictionary
movie_as_dicts = [dict(zip(movies[0],m)) for m in movies[1:]] 

# Display the results
movie_as_dicts

[('Title', 'Casablanca'), ('Year', '1942'), ('Rated', 'PG'), ('Released', '23-Jan-43'), ('Runtime', '102 min'), ('Genre', 'Drama, Romance, War'), ('Director', 'Michael Curtiz'), ('Writers', 'Julius J. Epstein (screenplay), Philip G. Epstein (screenplay), Howard Koch (screenplay), Murray Burnett (play), Joan Alison (play)'), ('Actors', 'Humphrey Bogart, Ingrid Bergman, Paul Henreid, Claude Rains'), ('Plot', 'Set in Casablanca, Morocco during the early days of World War II: An American expatriate meets a former lover, with unforeseen complications.'), ('Language', 'English, French, German, Italian'), ('Country', 'USA'), ('Awards', 'Won 3 Oscars. Another 6 wins & 6 nominations.'), ('Poster', 'http://ia.media-imdb.com/images/M/MV5BMjQwNDYyNTk2N15BMl5BanBnXkFtZTgwMjQ0OTMyMjE@._V1_SX300.jpg'), ('Metascore', 'N/A'), ('imdbRating', '8.6'), ('imdbVotes', '357332'), ('imdbID', 'tt0034583'), ('Type', 'movie'), ('Response', 'TRUE')]


[{'Actors': 'Humphrey Bogart, Ingrid Bergman, Paul Henreid, Claude Rains',
  'Awards': 'Won 3 Oscars. Another 6 wins & 6 nominations.',
  'Country': 'USA',
  'Director': 'Michael Curtiz',
  'Genre': 'Drama, Romance, War',
  'Language': 'English, French, German, Italian',
  'Metascore': 'N/A',
  'Plot': 'Set in Casablanca, Morocco during the early days of World War II: An American expatriate meets a former lover, with unforeseen complications.',
  'Poster': 'http://ia.media-imdb.com/images/M/MV5BMjQwNDYyNTk2N15BMl5BanBnXkFtZTgwMjQ0OTMyMjE@._V1_SX300.jpg',
  'Rated': 'PG',
  'Released': '23-Jan-43',
  'Response': 'TRUE',
  'Runtime': '102 min',
  'Title': 'Casablanca',
  'Type': 'movie',
  'Writers': 'Julius J. Epstein (screenplay), Philip G. Epstein (screenplay), Howard Koch (screenplay), Murray Burnett (play), Joan Alison (play)',
  'Year': '1942',
  'imdbID': 'tt0034583',
  'imdbRating': '8.6',
  'imdbVotes': '357332'},
 {'Actors': 'Humphrey Bogart, Lauren Bacall, John Ridgely, Martha

In [28]:
# Example, here's the zip for the first row of the movies values
print zip(movies[0],movies[1])

[('Title', 'Casablanca'), ('Year', '1942'), ('Rated', 'PG'), ('Released', '23-Jan-43'), ('Runtime', '102 min'), ('Genre', 'Drama, Romance, War'), ('Director', 'Michael Curtiz'), ('Writers', 'Julius J. Epstein (screenplay), Philip G. Epstein (screenplay), Howard Koch (screenplay), Murray Burnett (play), Joan Alison (play)'), ('Actors', 'Humphrey Bogart, Ingrid Bergman, Paul Henreid, Claude Rains'), ('Plot', 'Set in Casablanca, Morocco during the early days of World War II: An American expatriate meets a former lover, with unforeseen complications.'), ('Language', 'English, French, German, Italian'), ('Country', 'USA'), ('Awards', 'Won 3 Oscars. Another 6 wins & 6 nominations.'), ('Poster', 'http://ia.media-imdb.com/images/M/MV5BMjQwNDYyNTk2N15BMl5BanBnXkFtZTgwMjQ0OTMyMjE@._V1_SX300.jpg'), ('Metascore', 'N/A'), ('imdbRating', '8.6'), ('imdbVotes', '357332'), ('imdbID', 'tt0034583'), ('Type', 'movie'), ('Response', 'TRUE')]


In [29]:
# Example, here's that zip from the previous cell converted to a dict
print dict(zip(movies[0],movies[1]))

{'Plot': 'Set in Casablanca, Morocco during the early days of World War II: An American expatriate meets a former lover, with unforeseen complications.', 'Rated': 'PG', 'Response': 'TRUE', 'Language': 'English, French, German, Italian', 'Title': 'Casablanca', 'Country': 'USA', 'Metascore': 'N/A', 'imdbRating': '8.6', 'Director': 'Michael Curtiz', 'Released': '23-Jan-43', 'Writers': 'Julius J. Epstein (screenplay), Philip G. Epstein (screenplay), Howard Koch (screenplay), Murray Burnett (play), Joan Alison (play)', 'Actors': 'Humphrey Bogart, Ingrid Bergman, Paul Henreid, Claude Rains', 'Year': '1942', 'Genre': 'Drama, Romance, War', 'Awards': 'Won 3 Oscars. Another 6 wins & 6 nominations.', 'Runtime': '102 min', 'Type': 'movie', 'Poster': 'http://ia.media-imdb.com/images/M/MV5BMjQwNDYyNTk2N15BMl5BanBnXkFtZTgwMjQ0OTMyMjE@._V1_SX300.jpg', 'imdbVotes': '357332', 'imdbID': 'tt0034583'}


# Part 2 Reading CSV Files with csv.DictReader
#### 2a) Use the csv.DictReader with the `fieldnames` arguement. 
*Note that you can use the header row, or type it in from the .CSV file.*

In [37]:
movie_fieldnames = movies[0]
movie_fieldnames

['Title',
 'Year',
 'Rated',
 'Released',
 'Runtime',
 'Genre',
 'Director',
 'Writers',
 'Actors',
 'Plot',
 'Language',
 'Country',
 'Awards',
 'Poster',
 'Metascore',
 'imdbRating',
 'imdbVotes',
 'imdbID',
 'Type',
 'Response']

#### First use csv.DictReader with `fieldnames`

In [41]:
try:
    with open("data/movies.csv", "r") as movies_fd: # open a file context
        csv_data = csv.DictReader(movies_fd, fieldnames=movie_fieldnames)
        movies = list(csv_data) # converts the *iterator* into a list
except IOError as ioe:
    print("I/O Error occurred: %s" % ioe)

In [42]:
# Notice that the first row contains the header names
movies

[{'Actors': 'Actors',
  'Awards': 'Awards',
  'Country': 'Country',
  'Director': 'Director',
  'Genre': 'Genre',
  'Language': 'Language',
  'Metascore': 'Metascore',
  'Plot': 'Plot',
  'Poster': 'Poster',
  'Rated': 'Rated',
  'Released': 'Released',
  'Response': 'Response',
  'Runtime': 'Runtime',
  'Title': 'Title',
  'Type': 'Type',
  'Writers': 'Writers',
  'Year': 'Year',
  'imdbID': 'imdbID',
  'imdbRating': 'imdbRating',
  'imdbVotes': 'imdbVotes'},
 {'Actors': 'Humphrey Bogart, Ingrid Bergman, Paul Henreid, Claude Rains',
  'Awards': 'Won 3 Oscars. Another 6 wins & 6 nominations.',
  'Country': 'USA',
  'Director': 'Michael Curtiz',
  'Genre': 'Drama, Romance, War',
  'Language': 'English, French, German, Italian',
  'Metascore': 'N/A',
  'Plot': 'Set in Casablanca, Morocco during the early days of World War II: An American expatriate meets a former lover, with unforeseen complications.',
  'Poster': 'http://ia.media-imdb.com/images/M/MV5BMjQwNDYyNTk2N15BMl5BanBnXkFtZTgwMjQ

#### 2b) Use the for-loop to iterate over the data records
*Notive that the first record is the Header row*

In [44]:
for row in movies:
    print("%s (%s)" % (row["Title"], row["Year"]))
    print("Directed by: %s" % row["Director"])
    print("Starring: %s" % row["Actors"])
    print("Plot: %s" % row["Plot"])

Title (Year)
Directed by: Director
Starring: Actors
Plot: Plot
Casablanca (1942)
Directed by: Michael Curtiz
Starring: Humphrey Bogart, Ingrid Bergman, Paul Henreid, Claude Rains
Plot: Set in Casablanca, Morocco during the early days of World War II: An American expatriate meets a former lover, with unforeseen complications.
The Big Sleep (1946)
Directed by: Howard Hawks
Starring: Humphrey Bogart, Lauren Bacall, John Ridgely, Martha Vickers
Plot: Private detective Philip Marlowe is hired by a rich family. Before the complex case is over, he's seen murder, blackmail, and what might be love.
The African Queen (1951)
Directed by: John Huston
Starring: Humphrey Bogart, Katharine Hepburn, Robert Morley, Peter Bull
Plot: In Africa during WWI, a gin-swilling riverboat captain is persuaded by a strait-laced missionary to use his boat to attack an enemy warship.


#### 2c) What does the documentation say will happen when the fieldnames parameter is removed from the function call?
* Look at the Documentation for csv.DictReader?  https://docs.python.org/2/library/csv.html#csv.DictReader 
* "If the fieldnames parameter is omitted, the values in the first row of the csvfile will be used as the fieldnames."

#### 2d) Modify your code that uses csv.DictReader() so that you can read in the csv file without parsing in the header line.

In [49]:
try:
    with open("data/movies.csv", "r") as movies_fd: # open a file context
        csv_data = csv.DictReader(movies_fd)
        movies = list(csv_data) # converts the *iterator* into a list
except IOError as ioe:
    print("I/O Error occurred: %s" % ioe)

In [50]:
for row in movies:
    print("%s (%s)" % (row["Title"], row["Year"]))
    print("Directed by: %s" % row["Director"])
    print("Starring: %s" % row["Actors"])
    print("Plot: %s" % row["Plot"])

Casablanca (1942)
Directed by: Michael Curtiz
Starring: Humphrey Bogart, Ingrid Bergman, Paul Henreid, Claude Rains
Plot: Set in Casablanca, Morocco during the early days of World War II: An American expatriate meets a former lover, with unforeseen complications.
The Big Sleep (1946)
Directed by: Howard Hawks
Starring: Humphrey Bogart, Lauren Bacall, John Ridgely, Martha Vickers
Plot: Private detective Philip Marlowe is hired by a rich family. Before the complex case is over, he's seen murder, blackmail, and what might be love.
The African Queen (1951)
Directed by: John Huston
Starring: Humphrey Bogart, Katharine Hepburn, Robert Morley, Peter Bull
Plot: In Africa during WWI, a gin-swilling riverboat captain is persuaded by a strait-laced missionary to use his boat to attack an enemy warship.


# Part 3 Reading and parsing XML File
#### 3a) Add this import to the top of your Notebook

In [7]:
import xml.etree.ElementTree as et

#### 3b) Using the File Manager, you can see the XML file 

#### 3c) Opens the data file using the xml.etree.ElementTree class, and gets the root of the XML tree

In [8]:
tree = et.ElementTree(file='data/movies.xml')
root = tree.getroot()

#### 3d) Using .find() and .findall() to print the records
- First demonstrate using .find() on all `Movies` Elements in the XML tree

In [9]:
for m in root:
    rated = m.find("Rated")
    print(rated.text)

PG
APPROVED
PG


In [24]:
rated_tag = m.find("Rated")
print(rated_tag.text)

PG


- Print out the data records using .find() and .findall()

In [59]:
for m in root:
    print("%s (%s)" % (m.find("Title").text, m.find("Year").text))
    print("Directed by: %s" % m.find("Director").text)
    # First find the "Actors" element 
    actors_element = m.find("Actors")
    # Then, use .findall() to get all the "Actor" elements
    actors = actors_element.findall("Actor")
    for a in actors:
        print("Starring: %s" % a.text)
    print("Plot: %s" % m.find("Plot").text)

Casablanca (1942)
Directed by: Michael Curtiz
Starring: Humphrey Bogart
Starring: Ingrid Bergman
Starring: Paul Henreid
Starring: Claude Rains
Plot: Set in Casablanca, Morocco during the early days of World War II: An American expatriate meets a former lover, with unforeseen complications
The Big Sleep (1946)
Directed by: Howard Hawks
Starring: Humphrey Bogart
Starring: Lauren Bacall
Starring: John Ridgely
Starring: Martha Vickers
Plot: Private detective Philip Marlowe is hired by a rich family. Before the complex case is over, he's seen murder, blackmail, and what might be love.
The African Queen (1951)
Directed by: John Huston
Starring: Humphrey Bogart
Starring: Katharine Hepburn
Starring: Robert Morley
Starring: Peter Bull
Plot: In Africa during WWI, a gin-swilling riverboat captain is persuaded by a strait-laced missionary to use his boat to attack an enemy warship.


#### 3d) Getting the number of movies is simply the number of elements in the root of the XML tree

In [60]:
print("Total number of movies: %s" % len(root))

Total number of movies: 3


# Part 3 Challenge
#### Modify your XML parsing code to read the movie data records into a *list of dicts*

In [12]:
# Define a new array to load dictionaries into.
movie_as_dicts = [] 

# Iterate over every `Movie` element
for m in root:
    # Use list indexing to pack a dictionary with the columns
    row_dict = {}

    # Iterate over every child of the `Movie` element
    for child in m:
        
        if(child.tag == "Actors"): # handle "Actors" separately
            # get a list of Actor's names using a List Comprehension
            actors_names = [a.text for a in child] 
            # join the Actor's names together as a comma-separated string
            row_dict["Actors"] = ", ".join(actors_names)
            # /OR/ alternatively, just put the list of actors_names as the value
            # row_dict["Actors"] = actors_names
        
        else: # key is "tag" name and the value is the child's "text" attribute
            row_dict[child.tag] = child.text

    # Append the new row dictionary to movie_as_dict
    movie_as_dicts.append(row_dict)

# Display results
movie_as_dicts

[{'Actors': 'Humphrey Bogart, Ingrid Bergman, Paul Henreid, Claude Rains',
  'Awards': 'Won 3 Oscars. Another 6 wins and 6 nominations.',
  'Country': 'USA',
  'Director': 'Michael Curtiz',
  'Genre': 'Drama, Romance, War',
  'Language': 'English, French, German, Italian',
  'Metascore': 'N/A',
  'Plot': 'Set in Casablanca, Morocco during the early days of World War II: An American expatriate meets a former lover, with unforeseen complications',
  'Poster': 'http://ia.media-imdb.com/images/M/MV5BMjQwNDYyNTk2N15BMl5BanBnXkFtZTgwMjQ0OTMyMjE@._V1_SX300.jpg',
  'Rated': 'PG',
  'Released': '23 Jan 1943',
  'Response': 'True',
  'Runtime': '102 min',
  'Title': 'Casablanca',
  'Type': 'movie',
  'Writers': '\n      ',
  'Year': '1942',
  'imdbID': 'tt0034583',
  'imdbRating': '8.6',
  'imdbVotes': '357,332'},
 {'Actors': 'Humphrey Bogart, Lauren Bacall, John Ridgely, Martha Vickers',
  'Awards': '1 win',
  'Country': 'USA',
  'Director': 'Howard Hawks',
  'Genre': 'Crime, Film-Noir, Mystery

# Part 4 What Movie were they in?
#### 4a) Using "in" to find whether an actor was in a particular movie

In [65]:
def find_actor_movies(movies_list, actor_name):
    print("Searching movies for actor: %s" % actor_name)
    actor_movies = []
    for m in movies_list:
        if actor_name in m["Actors"]:
            actor_movies.append(m)
    return actor_movies

#### 4b) Test your code on, expect a single actor "Ingrid Bergman"

In [66]:
bergman_movies = find_actor_movies(movies, "Ingrid Bergman")
print bergman_movies

Searching movies for actor: Ingrid Bergman
[{'Plot': 'Set in Casablanca, Morocco during the early days of World War II: An American expatriate meets a former lover, with unforeseen complications.', 'Rated': 'PG', 'Response': 'TRUE', 'Language': 'English, French, German, Italian', 'Title': 'Casablanca', 'Country': 'USA', 'Metascore': 'N/A', 'imdbRating': '8.6', 'Director': 'Michael Curtiz', 'Released': '23-Jan-43', 'Writers': 'Julius J. Epstein (screenplay), Philip G. Epstein (screenplay), Howard Koch (screenplay), Murray Burnett (play), Joan Alison (play)', 'Actors': 'Humphrey Bogart, Ingrid Bergman, Paul Henreid, Claude Rains', 'Year': '1942', 'Genre': 'Drama, Romance, War', 'Awards': 'Won 3 Oscars. Another 6 wins & 6 nominations.', 'Runtime': '102 min', 'Type': 'movie', 'Poster': 'http://ia.media-imdb.com/images/M/MV5BMjQwNDYyNTk2N15BMl5BanBnXkFtZTgwMjQ0OTMyMjE@._V1_SX300.jpg', 'imdbVotes': '357332', 'imdbID': 'tt0034583'}]


#### 4c) Print out the Title of movies, and the total number

In [67]:
print("Ingrid Bergman was in %d movies" % len(bergman_movies))
for m in bergman_movies:
    print("- %s" % m["Title"])

Ingrid Bergman was in 1 movies
- Casablanca


- modify `find_actor_movies` to print a message if actor was *not* in any movies

In [70]:
def find_actor_movies(movies_list, actor_name):
    print("Searching movies for actor: %s" % actor_name)
    actor_movies = []
    for m in movies_list:
        if actor_name in m["Actors"]:
            actor_movies.append(m)
    # Check if actor was not in any movies
    if(len(actor_movies) == 0):
        print("'%s' was not in any movies" % actor_name)
    return actor_movies

In [71]:
find_actor_movies(movies, "George Clooney")

Searching movies for actor: George Clooney
'George Clooney' was not in any movies


[]

# Part 4 Challenge
- Modify your find_actors_movies() function to find all movies whose actor’s name is part of
- Account for a different case

In [73]:
def find_actor_movies(movies_list, actor_name):
    print("Searching movies for actor: %s" % actor_name)
    actor_movies = []
    for m in movies_list:
        # Using "in" accounts for a partial name
        # Drop both to lower-case to account for different cases
        if(actor_name.lower() in m["Actors"].lower()):
            actor_movies.append(m)
    # Check if actor was not in any movies
    if(len(actor_movies) == 0):
        print("'%s' was not in any movies" % actor_name)
    return actor_movies

In [74]:
find_actor_movies(movies, "bogart")

Searching movies for actor: bogart


[{'Actors': 'Humphrey Bogart, Ingrid Bergman, Paul Henreid, Claude Rains',
  'Awards': 'Won 3 Oscars. Another 6 wins & 6 nominations.',
  'Country': 'USA',
  'Director': 'Michael Curtiz',
  'Genre': 'Drama, Romance, War',
  'Language': 'English, French, German, Italian',
  'Metascore': 'N/A',
  'Plot': 'Set in Casablanca, Morocco during the early days of World War II: An American expatriate meets a former lover, with unforeseen complications.',
  'Poster': 'http://ia.media-imdb.com/images/M/MV5BMjQwNDYyNTk2N15BMl5BanBnXkFtZTgwMjQ0OTMyMjE@._V1_SX300.jpg',
  'Rated': 'PG',
  'Released': '23-Jan-43',
  'Response': 'TRUE',
  'Runtime': '102 min',
  'Title': 'Casablanca',
  'Type': 'movie',
  'Writers': 'Julius J. Epstein (screenplay), Philip G. Epstein (screenplay), Howard Koch (screenplay), Murray Burnett (play), Joan Alison (play)',
  'Year': '1942',
  'imdbID': 'tt0034583',
  'imdbRating': '8.6',
  'imdbVotes': '357332'},
 {'Actors': 'Humphrey Bogart, Lauren Bacall, John Ridgely, Martha

# Part 5 Writing CSV Files with csv.DictWriter
- Find an actor's movies using your function
- Conver the actor's name to come up with a new filename
- Get the fieldnames to write out, as column names
- Write out the movies using csv.DictWriter

In [80]:
actor_name = "Ingrid Bergman"
bergman_movies = find_actor_movies(movies, actor_name)
bergman_movies

Searching movies for actor: Ingrid Bergman


[{'Actors': 'Humphrey Bogart, Ingrid Bergman, Paul Henreid, Claude Rains',
  'Awards': 'Won 3 Oscars. Another 6 wins & 6 nominations.',
  'Country': 'USA',
  'Director': 'Michael Curtiz',
  'Genre': 'Drama, Romance, War',
  'Language': 'English, French, German, Italian',
  'Metascore': 'N/A',
  'Plot': 'Set in Casablanca, Morocco during the early days of World War II: An American expatriate meets a former lover, with unforeseen complications.',
  'Poster': 'http://ia.media-imdb.com/images/M/MV5BMjQwNDYyNTk2N15BMl5BanBnXkFtZTgwMjQ0OTMyMjE@._V1_SX300.jpg',
  'Rated': 'PG',
  'Released': '23-Jan-43',
  'Response': 'TRUE',
  'Runtime': '102 min',
  'Title': 'Casablanca',
  'Type': 'movie',
  'Writers': 'Julius J. Epstein (screenplay), Philip G. Epstein (screenplay), Howard Koch (screenplay), Murray Burnett (play), Joan Alison (play)',
  'Year': '1942',
  'imdbID': 'tt0034583',
  'imdbRating': '8.6',
  'imdbVotes': '357332'}]

In [81]:
actor_filename = "%s_movies.csv" % actor_name.lower().replace(" ", "_")
print actor_filename

ingrid_bergman_movies.csv


In [83]:
# the 'keys' of one of the movie elements can be used to get the fieldnames
fieldnames = bergman_movies[0].keys() 
print fieldnames

['Plot', 'Rated', 'Response', 'Language', 'Title', 'Country', 'Metascore', 'imdbRating', 'Director', 'Released', 'Writers', 'Actors', 'Year', 'Genre', 'Awards', 'Runtime', 'Type', 'Poster', 'imdbVotes', 'imdbID']


In [84]:
try:
    with open("data/%s" % actor_filename, "w") as movies_fd: # open a file context
        csv_file = csv.DictWriter(movies_fd, fieldnames=fieldnames)
        csv_file.writeheader() # Writes the header to the file
        csv_file.writerows(bergman_movies) # Writes the rows to the file
except IOError as ioe:
    print("I/O Error occurred: %s" % ioe)

#### View the file 'ingrid_bergman_movies.csv' in File Manager -> data/