In [69]:
# %matplotlib inline
import pandas as pd
import numpy as np

### Download the data and load it to Pandas. 

You can find them [here](https://drive.google.com/file/d/1NY6cmF9Shjw-dD7BD6bNmfcIVz-kQcFR/view?usp=sharing).

In [70]:
titles = pd.read_csv('data/titles.csv')
titles.head()

Unnamed: 0,title,year
0,The Rising Son,1990
1,The Thousand Plane Raid,1969
2,Crucea de piatra,1993
3,Country,2000
4,Gaiking II,2011


In [71]:
cast = pd.read_csv('data/cast.csv', sep=",")
cast.head()

Unnamed: 0,title,year,name,type,character,n
0,Closet Monster,2015,Buffy #1,actor,Buffy 4,
1,Suuri illusioni,1985,Homo $,actor,Guests,22.0
2,Battle of the Sexes,2017,$hutter,actor,Bobby Riggs Fan,10.0
3,Secret in Their Eyes,2015,$hutter,actor,2002 Dodger Fan,
4,Steve Jobs,2015,$hutter,actor,1988 Opera House Patron,


In [72]:
# check data types
titles.dtypes

title    object
year      int64
dtype: object

### How many movies are listed in the titles dataframe?

In [73]:
# drop duplicates to make sure that there are less lengths
len(titles.drop_duplicates())

232330

### What are the earliest two films listed in the titles dataframe?

In [74]:
# using sort_values() to sort by year, then show the first 2 titles to be 
titles.sort_values('year').head(2)

Unnamed: 0,title,year
177757,Miss Jerry,1894
215272,The Startled Lover,1898


### How many movies have the title "Hamlet"?

In [75]:
# use len() to count number of titles with 'Hamlet' in the title
hamlet_titles = titles[titles['title'] == 'Hamlet']
hamlet_titles

Unnamed: 0,title,year
6009,Hamlet,1948
45350,Hamlet,1990
46721,Hamlet,1910
92146,Hamlet,1976
94355,Hamlet,1987
94554,Hamlet,2000
98554,Hamlet,1921
102919,Hamlet,2011
122704,Hamlet,1969
128388,Hamlet,1954


In [76]:
len(hamlet_titles)

18

### How many movies are titled "North by Northwest"?

In [77]:
# use boolean operations to find movies titled 'North by Northwest'
nnw_titles = titles[titles['title'] == 'North by Northwest']
len(nnw_titles)

1

### When was the first movie titled "Hamlet" made?

In [78]:
# use previously found titles named 'Hamlet'
hamlet_titles.sort_values('year').head(1)['year']

46721    1910
Name: year, dtype: int64

### List all of the "Treasure Island" movies from earliest to most recent.

In [79]:
# use boolean operation to find all movies titled 'Treasure Island' then sort values by year ascending
titles[titles['title'] == 'Treasure Island'].sort_values('year')

Unnamed: 0,title,year
206027,Treasure Island,1918
51287,Treasure Island,1920
191050,Treasure Island,1934
96934,Treasure Island,1950
89534,Treasure Island,1972
111343,Treasure Island,1973
205397,Treasure Island,1985
179354,Treasure Island,1999


### How many movies were made in the year 1950?

In [80]:
# user boolean operation to find all movies made in the year 1950
# then use length of list to count those movies
titles_in_1950 = titles[titles['year'] == 1950]
len(titles_in_1950)

1099

### How many movies were made in the year 1960?

In [81]:
# user boolean operation to find all movies made in the year 1960
# then use length of list to count those movies
titles_in_1960 = titles[titles['year'] == 1960]
len(titles_in_1960)

1501

### How many movies were made from 1950 through 1959?

In [82]:
# use isin() to find all movies made in the 1950s using the range as the constraint of the search
# then use length of list to count those movies
titles_in_1950s = titles[titles['year'].isin(range(1950, 1960))]
len(titles_in_1950s.sort_values('year'))

12711

### In what years has a movie titled "Batman" been released?

In [83]:
# use boolean operations to find movies titled 'Batman'
batman_titles = titles[titles['title'] == 'Batman']

# loop through 'Batman' and print release year for each movie
for batman in batman_titles['year']:
    print(batman)

1989
1943


### How many roles were there in the movie "Inception"?

In [84]:
print(cast.head(5), '\n')
# use DataFrame attributes to determine column data types
print(cast.dtypes)

                  title  year      name   type                character     n
0        Closet Monster  2015  Buffy #1  actor                  Buffy 4   NaN
1       Suuri illusioni  1985    Homo $  actor                   Guests  22.0
2   Battle of the Sexes  2017   $hutter  actor          Bobby Riggs Fan  10.0
3  Secret in Their Eyes  2015   $hutter  actor          2002 Dodger Fan   NaN
4            Steve Jobs  2015   $hutter  actor  1988 Opera House Patron   NaN 

title         object
year           int64
name          object
type          object
character     object
n            float64
dtype: object


In [85]:
# use boolean operation to look for 'Inception' casting details
inception_title = cast[cast['title'] == 'Inception']

# find unique values for each type to find number of roles
list(inception_title['type'].drop_duplicates())

['actor', 'actress']

### How many roles in the movie "Inception" are NOT ranked by an "n" value?

In [86]:
# finding the number of roles in the movie 'Inception' not ranked
inception_not_ranked = inception_title[inception_title['n'].isna()]
len(inception_not_ranked)

26

### But how many roles in the movie "Inception" did receive an "n" value?

In [87]:
# finding the number of roles in the movie 'Inception' that is inverse of not ranked
inception_ranked = inception_title[~inception_title['n'].isna()]
len(inception_ranked)

51

### Display the cast of "North by Northwest" in their correct "n"-value order, ignoring roles that did not earn a numeric "n" value.

In [88]:
# use sort_values on 'Inception' ranked list
inception_ranked.sort_values('n').head()

Unnamed: 0,title,year,name,type,character,n
573027,Inception,2010,Leonardo DiCaprio,actor,Cobb,1.0
834400,Inception,2010,Joseph Gordon-Levitt,actor,Arthur,2.0
3259597,Inception,2010,Ellen Page,actress,Ariadne,3.0
913007,Inception,2010,Tom Hardy,actor,Eames,4.0
2333003,Inception,2010,Ken Watanabe,actor,Saito,5.0


### Display the entire cast, in "n"-order, of the 1972 film "Sleuth".

In [89]:
# find all movies titled 'Sleuth' in year 1972
sleuth_title = cast[cast['title'] == ('Sleuth')]
sleuth_1972 = sleuth_title[sleuth_title['year'] == 1972]

# display 1972 'Sleuth' list ordered by ranking
sleuth_1972[~sleuth_1972['n'].isna()].sort_values('n')

Unnamed: 0,title,year,name,type,character,n
1654402,Sleuth,1972,Laurence Olivier,actor,Andrew Wyke,1.0
316652,Sleuth,1972,Michael Caine,actor,Milo Tindle,2.0
362592,Sleuth,1972,Alec Cawthorne,actor,Inspector Doppler,3.0
1421434,Sleuth,1972,John (II) Matthews,actor,Detective Sergeant Tarrant,4.0
2635708,Sleuth,1972,Eve (III) Channing,actress,Marguerite Wyke,5.0
1405219,Sleuth,1972,Teddy Martin,actor,Police Constable Higgs,6.0


### Now display the entire cast, in "n"-order, of the 2007 version of "Sleuth".

In [90]:
# find all movies titled 'Sleuth' in year 2007
sleuth_title = cast[cast['title'] == ('Sleuth')]
sleuth_2007 = sleuth_title[sleuth_title['year'] == 2007]

# display 1972 'Sleuth' list ordered by ranking
sleuth_2007[~sleuth_2007['n'].isna()].sort_values('n')

Unnamed: 0,title,year,name,type,character,n
316653,Sleuth,2007,Michael Caine,actor,Andrew,1.0
1254632,Sleuth,2007,Jude Law,actor,Milo,2.0
1751872,Sleuth,2007,Harold Pinter,actor,Man on T.V.,3.0


### How many roles were credited in the silent 1921 version of Hamlet?

In [96]:
hamlet_titles = cast[cast['title'] == 'Hamlet']
hamlet_1921 = hamlet_titles[hamlet_titles['year'] == 1921]

In [97]:
len(hamlet_1921.sort_values('n'))

9

### How many roles were credited in Branagh’s 1996 Hamlet?

### How many "Hamlet" roles have been listed in all film credits through history?

### How many people have played an "Ophelia"?

### How many people have played a role called "The Dude"?

### How many people have played a role called "The Stranger"?

### How many roles has Sidney Poitier played throughout his career?

### How many roles has Judi Dench played?

### List the supporting roles (having n=2) played by Cary Grant in the 1940s, in order by year.

### List the leading roles that Cary Grant played in the 1940s in order by year.

### How many roles were available for actors in the 1950s?

### How many roles were avilable for actresses in the 1950s?

### How many leading roles (n=1) were available from the beginning of film history through 1980?

### How many non-leading roles were available through from the beginning of film history through 1980?

### How many roles through 1980 were minor enough that they did not warrant a numeric "n" rank?