In [5]:
import pandas as pd
import numpy as np

### Download the data and load it to Pandas. 

You can find them [here](https://drive.google.com/file/d/1NY6cmF9Shjw-dD7BD6bNmfcIVz-kQcFR/view?usp=sharing).

In [6]:
titles = pd.read_csv('data/titles.csv', index_col=None)
titles.head()

Unnamed: 0,title,year
0,The Rising Son,1990
1,The Thousand Plane Raid,1969
2,Crucea de piatra,1993
3,Country,2000
4,Gaiking II,2011


In [7]:
cast = pd.read_csv('data/cast.csv', index_col=None)
cast.head()

Unnamed: 0,title,year,name,type,character,n
0,Closet Monster,2015,Buffy #1,actor,Buffy 4,
1,Suuri illusioni,1985,Homo $,actor,Guests,22.0
2,Battle of the Sexes,2017,$hutter,actor,Bobby Riggs Fan,10.0
3,Secret in Their Eyes,2015,$hutter,actor,2002 Dodger Fan,
4,Steve Jobs,2015,$hutter,actor,1988 Opera House Patron,


In [22]:
cast.sort_values('year', ascending=False)

Unnamed: 0,title,year,name,type,character,n
42492,War in Snow,2026,Yassin Alnaimi,actor,Abdalrehman,
42491,Rise of Man,2025,Yassin Alnaimi,actor,Mark,
715145,Rise of Man,2025,Ewan Fleck,actor,Jack,
826712,Inside Me,2023,Damien (II) Gonzalez,actor,Matt Garcia,
2818581,Gnome Alone,2023,Becky G.,actress,Chlo?,
...,...,...,...,...,...,...
1728670,Soldiers of the Cross,1900,Orrie Perry,actor,Lion,
1728821,Soldiers of the Cross,1900,Reg Perry,actor,Lion,
559348,Miss Jerry,1894,Chauncey Depew,actor,Himself - the Director of the New York Central...,
2530280,Miss Jerry,1894,Blanche Bayliss,actress,Miss Geraldine Holbrook (Miss Jerry),


### What are the ten most common movie names of all time?

In [18]:
# finding the top 10 most common movie names
cast['title'].value_counts().head(10)

Around the World in Eighty Days       1298
7 cajas                                689
Thelma                                 580
The Ten Commandments                   533
The Eschatrilogy: Book of the Dead     517
Catching Faith                         493
The Dark Knight Rises                  453
Welcome to Essex                       449
Stuck on You                           436
The Buccaneer                          436
Name: title, dtype: int64

### Which three years in the 1930s saw the most films released?

In [21]:
# search for all films in the 1930s
films_1930s = cast[cast['year'].isin(range(1930, 1940))]

# find the 3 years in which the most movies were released
films_1930s['year'].value_counts().head(3)

1937    27462
1936    25755
1938    25206
Name: year, dtype: int64

### Count the number of films that have been released in each decade over the history of cinema.

In [79]:
# define a function which provices a count per decade for the search term
def decade_count(table, field=None, search_term=None):
    # filter table with the search term
    if field:
        table = table[table[field] == search_term]
    # use filtered table to perform a count of occurances for each decade
    for year in range(189, 204):
        print(f'{(year-1) * 10}s', table[table['year'].isin(range((year-1)*10, year*10))]['year'].count())
    
    return table

In [82]:
# without specifying field and search term, decade_count returns a count for the entire table for each decade
decade_count(cast)

1880s 0
1890s 3
1900s 70
1910s 42604
1920s 71472
1930s 199058
1940s 211842
1950s 211470
1960s 187069
1970s 215147
1980s 283194
1990s 381806
2000s 746571
2010s 1083959
2020s 202


Unnamed: 0,title,year,name,type,character,n
0,Closet Monster,2015,Buffy #1,actor,Buffy 4,
1,Suuri illusioni,1985,Homo $,actor,Guests,22.0
2,Battle of the Sexes,2017,$hutter,actor,Bobby Riggs Fan,10.0
3,Secret in Their Eyes,2015,$hutter,actor,2002 Dodger Fan,
4,Steve Jobs,2015,$hutter,actor,1988 Opera House Patron,
...,...,...,...,...,...,...
3634462,Foxtrot,1988,Lilja ??risd?ttir,actress,D?ra,24.0
3634463,Niceland (Population. 1.000.002),2004,Sigr??ur J?na ??risd?ttir,actress,Woman in Bus,26.0
3634464,Skammdegi,1985,Dalla ??r?ard?ttir,actress,Hj?krunarkona,9.0
3634465,U.S.S.S.S...,2003,Krist?n Andrea ??r?ard?ttir,actress,Afgr.dama ? bens?nst??,17.0


### Count the number of "Hamlet" films made in each decade.

In [83]:
decade_count(cast, 'title', 'Hamlet')

1880s 0
1890s 0
1900s 0
1910s 28
1920s 9
1930s 0
1940s 24
1950s 1
1960s 57
1970s 19
1980s 3
1990s 83
2000s 55
2010s 34
2020s 0


Unnamed: 0,title,year,name,type,character,n
1627,Hamlet,1996,Riz Abbasi,actor,Attendant to Claudius,1.0
7488,Hamlet,1921,Fritz Achterberg,actor,"Fortinbras,",9.0
11335,Hamlet,2009,Hayden Adams,actor,Laertes,7.0
11336,Hamlet,2009,Hayden Adams,actor,Player,7.0
14013,Hamlet,1913,Eric Adeney,actor,Reynaldo,14.0
...,...,...,...,...,...,...
3528232,Hamlet,1969,Jennifer Tudor,actress,Court lady,23.0
3550959,Hamlet,2000,Diane Venora,actress,Gertrude,3.0
3580610,Hamlet,1996,Perdita Weeks,actress,Second Player,44.0
3585205,Hamlet,2015,Katie (II) West,actress,Ophelia,7.0


### Count the number of "Rustler" characters in each decade of the history of film.

In [84]:
decade_count(cast, 'character', 'Rustler')

1880s 0
1890s 0
1900s 0
1910s 0
1920s 6
1930s 84
1940s 18
1950s 8
1960s 2
1970s 9
1980s 1
1990s 4
2000s 2
2010s 0
2020s 0


Unnamed: 0,title,year,name,type,character,n
13041,Pinto Canyon,1940,Victor Adamson,actor,Rustler,
13172,Two Gun Troubador,1939,Victor Adamson,actor,Rustler,
43766,The Last Stand,1938,Gene Alsace,actor,Rustler,
75824,Wanted,1967,Bruno Ari?,actor,Rustler,
93905,Silent Valley,1935,Jimmy Aubrey,actor,Rustler,
...,...,...,...,...,...,...
2361662,The Mysterious Avenger,1936,Blackie Whiteford,actor,Rustler,
2362278,The Marksman,1953,Russ Whiteman,actor,Rustler,9.0
2382302,The Cowboys,1972,Henry Wills,actor,Rustler,42.0
2404657,The Frontiersmen,1938,Bob (II) Woodward,actor,Rustler,


### Count the number of "Hamlet" characters in each decade.

In [85]:
decade_count(cast, 'character', 'Hamlet')

1880s 0
1890s 0
1900s 0
1910s 6
1920s 1
1930s 6
1940s 1
1950s 3
1960s 7
1970s 8
1980s 2
1990s 11
2000s 19
2010s 28
2020s 0


Unnamed: 0,title,year,name,type,character,n
6046,Sugar,2008,Hamlet Abreu,actor,Hamlet,58.0
94622,Top of the Town,1937,Mischa Auer,actor,Hamlet,6.0
120882,The Sound of Spying,2014,Jonathan Ball,actor,Hamlet,
153893,R~E~T~R~O~G~R~A~D~E,2018,Broderic Beard,actor,Hamlet,
155421,Suivez cet avion,1989,Fran?ois Beaulieu,actor,Hamlet,26.0
...,...,...,...,...,...,...
2533035,Hamlet_X,2003,Meret Becker,actress,Hamlet,
2639382,Bill Shakespeare in Hollywood,2011,Rafika Chawishe,actress,Hamlet,
2843568,Intikam Melegi/Kadin Hamlet,1976,Fatma Girik,actress,Hamlet,1.0
3229662,Hamlet,1921,Asta Nielsen,actress,Hamlet,1.0


### What are the 11 most common character names in movie history?

In [89]:
# filter table by character, then perform a count of the most common and return the top 11
cast['character'].value_counts().head(11)

Himself        19600
Dancer         12096
Extra          10953
Reporter        8154
Doctor          7436
Student         7177
Policeman       6883
Party Guest     6742
Nurse           6676
Bartender       6613
Minor Role      6255
Name: character, dtype: int64

### Who are the 10 people most often credited as "Herself" in film history?

In [93]:
# filter cast table by the character called 'Herself'
herself = cast[cast['character'] == 'Herself']
# count based on most common name
herself['name'].value_counts().head(10)

Queen Elizabeth II    11
Joyce Brothers         9
Margaret Thatcher      8
Mary Jo Pehl           7
Hillary Clinton        7
Joan Rivers            6
Sumie Sakai            6
Marilyn Monroe         5
Bunny Yeager           5
Caroline Rhea          5
Name: name, dtype: int64

### Who are the 10 people most often credited as "Himself" in film history?

In [94]:
# filter cast table by character called 'Himself'
himself = cast[cast['character'] == 'Himself']
# perform a name 
himself['name'].value_counts().head(10)

Adolf Hitler             100
Richard Nixon             46
Ronald Reagan             39
John F. Kennedy           34
Winston Churchill         24
George W. Bush            23
Benito Mussolini          23
Ron Jeremy                23
Martin Luther King        21
Franklin D. Roosevelt     20
Name: name, dtype: int64

### Which actors or actresses appeared in the most movies in the year 1945?

In [101]:
# filter cast table for titles appearing in 1945
titles_in_1945 = cast[cast['year'] == 1945]

# are there other types of casts other than actors and actresses? no.
print(titles_in_1945['type'].value_counts())

# find the most prolific stars of 1945
titles_in_1945['name'].value_counts().head()

actor      13540
actress     4877
Name: type, dtype: int64


Emmett Vogan       39
Sam (II) Harris    32
Harold Miller      29
Bess Flowers       29
Frank O'Connor     27
Name: name, dtype: int64

### Which actors or actresses appeared in the most movies in the year 1985?

In [102]:
# filter cast table for titles appearing in 1945
titles_in_1985 = cast[cast['year'] == 1985]

# are there other types of casts other than actors and actresses? no.
print(titles_in_1985['type'].value_counts())

# find the most prolific stars of 1945
titles_in_1985['name'].value_counts().head()

actor      19228
actress     8738
Name: type, dtype: int64


Mammootty        22
Shakti Kapoor    21
Sukumari         20
Lou Scheimer     15
Aruna Irani      15
Name: name, dtype: int64

### How many roles "Mammootty" has played in each year of his career.

In [126]:
# filter cast table by actor 'Mammootty'
mammootty = cast[cast['name'] == 'Mammootty']
# determine the years in which 'Mammootty' worked and count the number of roles per year
mammootty['year'].value_counts().sort_index()

1971     1
1980     2
1981     3
1982     7
1983    18
1984    16
1985    22
1986    18
1987     9
1988     7
1989    10
1990    15
1991     6
1992     3
1993     5
1994     5
1995     5
1996     3
1997     2
1998     4
1999     4
2000     7
2001     3
2002     3
2003     2
2004     8
2005     6
2006     8
2007     7
2008     9
2009    12
2010     9
2011     7
2012     7
2013     5
2014     6
2015     6
2016     4
2017     1
Name: year, dtype: int64

### What are the 10 most frequent roles that start with the phrase "Patron in"?

In [127]:
# filter cast table by character names that begin with 'Patron in'
patron_in_char = cast[cast['character'].str.match(r'(^Patron in)')]
# perform a count for the filtered table
patron_in_char['character'].value_counts().head(10)

Patron in Frisky Rabbit         16
Patron in Chinese Restaurant     9
Patron in the Coffee House       9
Patron in Billiard Parlor        5
Patron in Bar                    4
Patron in restaurant             4
Patron in cabaret                3
Patron in Restaurant             3
Patron in Club                   3
Patron in Quiet Bar              2
Name: character, dtype: int64

### What are the 10 most frequent roles that start with the word "Science"?

In [125]:
# filter search by character name beginning with 'Science'
science_char = cast[cast['character'].str.match(r'(^Science)')]
# 
science_char['character'].value_counts().head(10)

Science Teacher         60
Science Student          9
Science Fair Student     9
Science Fair Judge       6
Science Club Member      5
Science Kid              5
Science Promo Cadet      5
Science Reporter         5
Science                  4
Science Officer          3
Name: character, dtype: int64

### Count number of n-values of the roles that Judi Dench has played over her career.

In [131]:
# filter cast table by actress named 'Judi Dench'
judi_dench = cast[cast['name'] == 'Judi Dench']
# use filtered tables to count number of n-values and sort values
judi_dench['n'].value_counts().sort_index()

1.0     6
2.0     6
3.0     6
4.0     4
5.0     2
6.0     4
7.0     4
8.0     1
9.0     1
12.0    3
13.0    2
16.0    2
18.0    2
20.0    1
23.0    1
26.0    1
29.0    1
40.0    1
Name: n, dtype: int64

### Count number of n-values of Cary Grant's roles through his career.

In [132]:
# filter cast table by actress named 'Judi Dench'
cary_grant = cast[cast['name'] == 'Cary Grant']
# use filtered tables to count number of n-values and sort values
cary_grant['n'].value_counts().sort_index()

1.0     46
2.0     20
3.0      2
4.0      1
5.0      1
8.0      1
9.0      2
10.0     2
Name: n, dtype: int64

### Count number of n-value of the roles that Sidney Poitier has acted over the years.

In [134]:
# filter cast table by actress named 'Judi Dench'
sidney_poitier = cast[cast['name'] == 'Sidney Poitier']
# use filtered tables to count number of n-values and sort values
sidney_poitier['n'].value_counts().sort_index()

1.0     21
2.0      8
3.0      6
4.0      3
5.0      1
10.0     1
13.0     1
21.0     1
Name: n, dtype: int64

### How many leading (n=1) roles were available to actors, and how many to actresses, in the 1950s?

In [144]:
# filter cast table to the 1950s
titles_in_1950s = cast[cast['year'].isin(range(1950, 1960))]
# filter further to only view leading roles, then perform counts on actors and actresses
titles_in_1950s[titles_in_1950s['n'] == 1]['type'].value_counts()

actor      6616
actress    2965
Name: type, dtype: int64

### How many supporting (n=2) roles were available to actors, and how many to actresses, in the 1950s?

In [145]:
# filter cast table to the 1950s
titles_in_1950s = cast[cast['year'].isin(range(1950, 1960))]
# filter further to only view leading roles, then perform counts on actors and actresses
titles_in_1950s[titles_in_1950s['n'] == 2]['type'].value_counts()

actor      4564
actress    4556
Name: type, dtype: int64