In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

## Download the Data and Load to Pandas

In [3]:
titles = pd.read_csv('titles.csv', index_col=None)
titles.head()

Unnamed: 0,title,year
0,The Rising Son,1990
1,The Thousand Plane Raid,1969
2,Crucea de piatra,1993
3,Country,2000
4,Gaiking II,2011


In [4]:
cast = pd.read_csv('cast.csv', index_col=None)
cast.head()

Unnamed: 0,title,year,name,type,character,n
0,Closet Monster,2015,Buffy #1,actor,Buffy 4,
1,Suuri illusioni,1985,Homo $,actor,Guests,22.0
2,Battle of the Sexes,2017,$hutter,actor,Bobby Riggs Fan,10.0
3,Secret in Their Eyes,2015,$hutter,actor,2002 Dodger Fan,
4,Steve Jobs,2015,$hutter,actor,1988 Opera House Patron,


In [5]:
print(titles.shape)
print(cast.shape)

(232330, 2)
(3634467, 6)


## How many movies are listed in the titles Dataframe?

In [6]:
titles.title.count()

232330

## What are the earliest two films listed in the titles Dataframe?

In [7]:
# Sort by ascending values for the column 'year', get the top two results

titles.sort_values('year', ascending=True).title.head(2)

177757            Miss Jerry
215272    The Startled Lover
Name: title, dtype: object

In [8]:
# Alternative view with years included

titles.sort_values('year', ascending=True).head(2)

Unnamed: 0,title,year
177757,Miss Jerry,1894
215272,The Startled Lover,1898


## How many films have the title Hamlet?

In [9]:
# Filter the dataframe down to titles with the name 'Hamlet' and get the count.

Hamlets = titles[titles['title'] == 'Hamlet'].title.count()
print(f'There are {Hamlets} Hamlet movies in this dataframe')

There are 18 Hamlet movies in this dataframe


## How many movies are titles North by Northwest?

In [10]:
# Filter titles dataframe down to movies called 'North by Northwest' and get the count

NxNW = titles[titles['title'] == 'North by Northwest'].title.count()
print(f'There is {NxNW} North by Northwest title in this dataframe')

There is 1 North by Northwest title in this dataframe


## When was the first movie titled Hamlet made?

In [11]:
# Filter dataframe for only Hamlet titles. Get the minimum year.

first_hamlet = titles[titles['title'] == 'Hamlet'].year.min()
print(f"The first movie titled 'Hamlet' was made in {first_hamlet}.")

The first movie titled 'Hamlet' was made in 1910.


## List all of the Treasure Island movies from Earliest to most Recent

In [12]:
# Filter for Treasure Island title, sort values by ascending

titles[titles['title'] == 'Treasure Island'].sort_values('year', ascending=True)

Unnamed: 0,title,year
206027,Treasure Island,1918
51287,Treasure Island,1920
191050,Treasure Island,1934
96934,Treasure Island,1950
89534,Treasure Island,1972
111343,Treasure Island,1973
205397,Treasure Island,1985
179354,Treasure Island,1999


## How many Movies were made in the year 1950?

In [13]:
# Filter titles by year, get the count

films_1950 = titles[titles['year'] == 1950].title.count()
print(f'There were {films_1950} films made in the year 1950.')

There were 1099 films made in the year 1950.


## How many Movies were amde in the year 1960?

In [14]:
films_1960 = titles[titles['year'] == 1960].title.count()
print(f'There were {films_1960} films made in the year 1960.')

There were 1501 films made in the year 1960.


## How many films were made from 1950 through to 1959?

In [15]:
# Filter condition for titles in the year 1950 and up but less than 1960

films_1950s = ((titles['year'] >= 1950) & (titles['year'] < 1960)).sum()
print(f'There were {films_1950s} films made between 1950 and 1960.')

There were 12711 films made between 1950 and 1960.


## In what years has a Movie Titled 'Batman' been Released?

In [16]:
# Filter dataframe by title == Batman, get the years the title has been released

titles[titles['title'] == 'Batman'].year

56000    1989
74608    1943
Name: year, dtype: int64

## How many Roles were there in the Movie Inception?

In [17]:
# Filter cast Dataframe by title called inception, get the count of the 'n' column,
# add the count with the sum of NaN values

Roles = cast[cast['title'] == 'Inception'].n.notna().sum() + cast[cast['title'] == 'Inception'].n.isna().sum()
print(f'There were {Roles} roles in the movie Inception.')

There were 77 roles in the movie Inception.


## How many Roles in the Movie Inception were not Ranked by an "n" value?

In [18]:
# Filter dataframe by title, get the sum of NaN values in the n column

na_roles = cast[cast['title'] == 'Inception'].n.isna().sum()
print(f'There were {na_roles} roles without a corresponding "n" value in the movie Inception.')

There were 26 roles without a corresponding "n" value in the movie Inception.


## How many Roles in the Movie Inception did Receive an "n" value?

In [19]:
# Filter dataframe by title, get sum of not nan values in the n column

notna_roles = cast[cast['title'] == 'Inception'].n.notna().sum()
print(f'There were {notna_roles} roles not considered NaN values in the "n" column.')

There were 51 roles not considered NaN values in the "n" column.


## Display the cast of "North by Northwest" in their correct "n"-value order, ignoring roles that did not earn a numeric "n" value.

In [20]:
# Filter titles by NxNW, sort values by 'n' column in ascending order, drop null values

cast[cast['title'] == 'North by Northwest'].sort_values('n').dropna()

Unnamed: 0,title,year,name,type,character,n
845516,North by Northwest,1959,Cary Grant,actor,Roger O. Thornhill,1.0
3382934,North by Northwest,1959,Eva Marie Saint,actress,Eve Kendall,2.0
1413409,North by Northwest,1959,James Mason,actor,Phillip Vandamm,3.0
3043261,North by Northwest,1959,Jessie Royce Landis,actress,Clara Thornhill,4.0
345822,North by Northwest,1959,Leo G. Carroll,actor,The Professor,5.0
2941468,North by Northwest,1959,Josephine Hutchinson,actress,Mrs. Townsend,6.0
1644542,North by Northwest,1959,Philip Ober,actor,Lester Townsend,7.0
1236791,North by Northwest,1959,Martin Landau,actor,Leonard,8.0
2372617,North by Northwest,1959,Adam Williams,actor,Valerian,9.0
1757154,North by Northwest,1959,Edward Platt,actor,Victor Larrabee,10.0


## Display the entire cast, in "n"-order, of the 1972 film "Sleuth".

In [21]:
# Filter for title Sleuth AND year 1972. Sort Values by 'n' column in ascending order

cast[(cast['title'] == 'Sleuth') & (cast['year'] == 1972)].sort_values('n')

Unnamed: 0,title,year,name,type,character,n
1654402,Sleuth,1972,Laurence Olivier,actor,Andrew Wyke,1.0
316652,Sleuth,1972,Michael Caine,actor,Milo Tindle,2.0
362592,Sleuth,1972,Alec Cawthorne,actor,Inspector Doppler,3.0
1421434,Sleuth,1972,John (II) Matthews,actor,Detective Sergeant Tarrant,4.0
2635708,Sleuth,1972,Eve (III) Channing,actress,Marguerite Wyke,5.0
1405219,Sleuth,1972,Teddy Martin,actor,Police Constable Higgs,6.0


## Now display the entire cast, in "n"-order, of the 2007 version of "Sleuth".

In [22]:
# Filter for Sleuth AND year 2007. Sort values by 'n' column

cast[(cast['title'] == 'Sleuth') & (cast['year'] == 2007)].sort_values('n')

Unnamed: 0,title,year,name,type,character,n
316653,Sleuth,2007,Michael Caine,actor,Andrew,1.0
1254632,Sleuth,2007,Jude Law,actor,Milo,2.0
1751872,Sleuth,2007,Harold Pinter,actor,Man on T.V.,3.0
251883,Sleuth,2007,Kenneth Branagh,actor,Other Man on T.V.,
362593,Sleuth,2007,Alec (II) Cawthorne,actor,Inspector Doppler,
2635707,Sleuth,2007,Eve (II) Channing,actress,Marguerite Wyke,
3244050,Sleuth,2007,Carmel O'Sullivan,actress,Maggie,


## How many roles were credited in the silent 1921 version of Hamlet?

In [23]:
# Filter for Hamlet AND 1921. Create boolean of na vs notna and get sum of true values

Hamlet_1921 = cast[(cast['title'] == 'Hamlet') & (cast['year'] == 1921)].n.notna().sum()
print(f'There were {Hamlet_1921} credited roles in the Silent 1921 version of Hamlet.')

There were 9 credited roles in the Silent 1921 version of Hamlet.


## How many roles were credited in Branagh’s 1996 Hamlet?

In [24]:
# Filter for Hamlet AND 1996. Create boolean of na vs notna and get sum of true values

Hamlet_1996 = cast[(cast['title'] == 'Hamlet') & (cast['year'] == 1996)].n.notna().sum()
print(f'There were {Hamlet_1996} credited roles in the 1996 version of Hamlet')

There were 46 credited roles in the 1996 version of Hamlet


## How many "Hamlet" roles have been listed in all film credits through history?

In [25]:
# Filter for Hamlet title, get the sum of all roles credited and not credited in the dataset

Hamlet_roles = cast[cast['title'] == 'Hamlet'].n.isna().sum() + cast[cast['title'] == 'Hamlet'].n.notna().sum()
print(f'There are {Hamlet_roles} roles in the history of Hamlet listed in this dataset.')

There are 313 roles in the history of Hamlet listed in this dataset.


## How many people have played an "Ophelia"?

In [26]:
# Filter for character name 'Ophelia' and get the count

Ophelia = cast[cast['character'] == 'Ophelia'].character.count()
print(f"There are {Ophelia} Ophelia characters in this dataset.")

There are 111 Ophelia characters in this dataset.


## How many people have played a role called "The Dude"?

In [27]:
# Filter for character name 'The Dude' and get the count

Dude = cast[cast['character'] == 'The Dude'].character.count()
print(f"There are {Dude} 'The Dude' characters listed in this dataset.")

There are 18 'The Dude' characters listed in this dataset.


## How many people have played a role called "The Stranger"?

In [28]:
# Filter for character name 'The Stranger' and get the count

Stranger = cast[cast['character'] == 'The Stranger'].character.count()
print(f"There are {Stranger} 'The Stranger' characters listed in this dataset.")

There are 212 'The Stranger' characters listed in this dataset.


## How many roles has Sidney Poitier played throughout his career?

In [29]:
# Filter for Name Sidney Poitier and get the count

Sidney = cast[cast['name'] == 'Sidney Poitier'].character.count()
print(f'Sidney Poitier has played {Sidney} different roles in his career.')

Sidney Poitier has played 43 different roles in his career.


## How many roles has Judi Dench played?

In [30]:
# Filter for Name Judi Dench and get the count

Dench = cast[cast['name'] == 'Judi Dench'].character.count()
print(f'Judi Dench has played in {Dench} different roles in her career.')

Judi Dench has played in 54 different roles in her career.


## List the supporting roles (having n=2) played by Cary Grant in the 1940s, in order by year.

In [31]:
# Filter for name 'Cary Grant' AND filter for years between 1940 and 1950 AND filter for 'n' = 2.
# Then sort values by year

cast[(cast['name'] == 'Cary Grant') 
     & ((cast['year'] >= 1940) & (cast['year'] < 1950)) 
     & (cast['n'] == 2.0)
    ].sort_values('year')

Unnamed: 0,title,year,name,type,character,n
845513,My Favorite Wife,1940,Cary Grant,actor,Nick Arden,2.0
845523,Penny Serenade,1941,Cary Grant,actor,Roger Adams,2.0


## List the leading roles that Cary Grant played in the 1940s in order by year.

In [32]:
# Filter for name 'Cary Grant' AND filter for years between 1940 and 1950 AND filter for 'n' = 1
# Then sort values by year

cast[(cast['name'] == 'Cary Grant')
     & ((cast['year'] >= 1940) & (cast['year'] < 1950))
     & (cast['n'] == 1.0)
    ].sort_values('year')

Unnamed: 0,title,year,name,type,character,n
845539,The Howards of Virginia,1940,Cary Grant,actor,Matt Howard,1.0
845495,His Girl Friday,1940,Cary Grant,actor,Walter Burns,1.0
845541,The Philadelphia Story,1940,Cary Grant,actor,C. K. Dexter Haven,1.0
845528,Suspicion,1941,Cary Grant,actor,Johnnie,1.0
845543,The Talk of the Town,1942,Cary Grant,actor,Leopold Dilg,1.0
845519,Once Upon a Honeymoon,1942,Cary Grant,actor,Patrick 'Pat' O'Toole,1.0
845486,Destination Tokyo,1943,Cary Grant,actor,Capt. Cassidy,1.0
845511,Mr. Lucky,1943,Cary Grant,actor,Joe Adams,1.0
845512,Mr. Lucky,1943,Cary Grant,actor,Joe Bascopolous,1.0
845520,Once Upon a Time,1944,Cary Grant,actor,Jerry Flynn,1.0


## How many roles were available for actors in the 1950s?

In [33]:
# Filter between years 1950 - 1960 AND filter for type 'actor'. Get the total character count

actors_1950s = cast[((cast['year'] >= 1950) & (cast['year'] < 1960)) 
                   & (cast['type'] == 'actor')
                   ].character.count()
print(f'There were {actors_1950s} roles available for actors in the 1950s.')

There were 155058 roles available for actors in the 1950s.


## How many roles were avilable for actresses in the 1950s?

In [34]:
# Filter between years 1950 - 1960 AND filter for type 'actress'. Get the total character count

actresses_1950s = cast[((cast['year'] >= 1950) & (cast['year'] < 1960))
                       & (cast['type'] == 'actress')
                      ].character.count()
print(f'There were {actresses_1950s} roles available for actresses in the 1950s.')

There were 56412 roles available for actresses in the 1950s.


## How many leading roles (n=1) were available from the beginning of film history through 1980?

In [35]:
# Filter for years before 1981 AND filter for 'n' = 1. Get the total character count

leading_roles = cast[(cast['year'] < 1981)
                     & (cast['n'] == 1.0)
                    ].character.count()
print(f'There were {leading_roles} leading roles available in the history of film from the beginning through 1980.')

There were 64095 leading roles available in the history of film from the beginning through 1980.


## How many non-leading roles were available through from the beginning of film history through 1980?

In [36]:
# Filter for years before 1981 AND filter for 'n' != 1. Get the total character count

non_leading_roles = cast[(cast['year'] < 1981)
                         & (cast['n'] != 1.0)
                        ].character.count()
print(f'There were {non_leading_roles} non-leading roles available from the beginning of film history through 1980.')

There were 1097484 non-leading roles available from the beginning of film history through 1980.


In [37]:
# Validate whether the total count of leading and non-leading roles matches the total available character
# count of roles available prior to 1981

leading_roles + non_leading_roles == cast[cast['year'] < 1981].character.count()

True

## How many roles through 1980 were minor enough that they did not warrant a numeric "n" rank?

In [47]:
# Filter years prior to 1981 AND 'n' column where value = NaN. Get the total character count

minor_roles = cast[(cast['year'] < 1981)
                   & (cast['n'].isna())
                  ].character.count()
print(f'There were {minor_roles} minor roles from the beginning of film history through 1980.')

There were 439134 minor roles from the beginning of film history through 1980.
