In [86]:
import pandas as pd
import numpy as np

### Download the data and load it to Pandas. 

You can find them [here](https://drive.google.com/file/d/1NY6cmF9Shjw-dD7BD6bNmfcIVz-kQcFR/view?usp=sharing).

In [87]:
titles = pd.read_csv('./_data/Pandas_exercise/imdb_pandas/titles.csv')
titles.head()

Unnamed: 0,title,year
0,The Rising Son,1990
1,The Thousand Plane Raid,1969
2,Crucea de piatra,1993
3,Country,2000
4,Gaiking II,2011


In [88]:
cast = pd.read_csv('./_data/Pandas_exercise/imdb_pandas/cast.csv')
cast.head()

Unnamed: 0,title,year,name,type,character,n
0,Closet Monster,2015,Buffy #1,actor,Buffy 4,
1,Suuri illusioni,1985,Homo $,actor,Guests,22.0
2,Battle of the Sexes,2017,$hutter,actor,Bobby Riggs Fan,10.0
3,Secret in Their Eyes,2015,$hutter,actor,2002 Dodger Fan,
4,Steve Jobs,2015,$hutter,actor,1988 Opera House Patron,


In [89]:
def get_decade(year):
  return int(np.floor(year/10)) * 10

### Using groupby(), count the number of films that have been released in each decade in the history of cinema.

In [90]:
titles['decade'] = titles['year'].apply(get_decade)
titles.head()

Unnamed: 0,title,year,decade
0,The Rising Son,1990,1990
1,The Thousand Plane Raid,1969,1960
2,Crucea de piatra,1993,1990
3,Country,2000,2000
4,Gaiking II,2011,2010


In [91]:
titles_per_decade = titles[['title', 'decade']].groupby('decade').count().reset_index()
titles_per_decade

Unnamed: 0,decade,title
0,1890,3
1,1900,37
2,1910,6512
3,1920,8797
4,1930,10097
5,1940,8576
6,1950,12711
7,1960,17515
8,1970,18714
9,1980,20243


### Use groupby() count the number of "Hamlet" films made in each decade.

In [92]:
hamlet_per_decade = titles[titles['title'] == 'Hamlet']
hamlet_per_decade[['title', 'decade']].groupby('decade').count().reset_index()

Unnamed: 0,decade,title
0,1910,3
1,1920,1
2,1940,1
3,1950,1
4,1960,2
5,1970,2
6,1980,1
7,1990,2
8,2000,2
9,2010,3


### How many leading (n=1) roles were available to actors, and how many to actresses, in each year of the 1950s?

In [93]:
leading_cast_in_1950s = cast[(cast['n'] == 1) & (cast['year'].between(1950, 1959))]
leading_cast_in_1950s[['year', 'type', 'n']].groupby(['year', 'type']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,n
year,type,Unnamed: 2_level_1
1950,actor,626
1950,actress,281
1951,actor,647
1951,actress,281
1952,actor,607
1952,actress,293
1953,actor,652
1953,actress,301
1954,actor,634
1954,actress,310


### In the 1950s taken as a whole, how many total roles were available to actors, and how many to actresses, for each "n" number 1 through 5?

In [94]:
cast['decade'] = cast['year'].apply(get_decade)
cast.head()

Unnamed: 0,title,year,name,type,character,n,decade
0,Closet Monster,2015,Buffy #1,actor,Buffy 4,,2010
1,Suuri illusioni,1985,Homo $,actor,Guests,22.0,1980
2,Battle of the Sexes,2017,$hutter,actor,Bobby Riggs Fan,10.0,2010
3,Secret in Their Eyes,2015,$hutter,actor,2002 Dodger Fan,,2010
4,Steve Jobs,2015,$hutter,actor,1988 Opera House Patron,,2010


In [95]:
cast_5_roles = cast[
  (cast['n'].isin(range(1,6))) & (cast['decade'] == 1950)]
print(min(cast_5_roles['n']))
print(max(cast_5_roles['n']))
cast_5_roles.head()

1.0
5.0


Unnamed: 0,title,year,name,type,character,n,decade
270,Palletoori Pilla,1950,Subba Rao A.V.,actor,Kampanna Dora,4.0,1950
284,Brudebuketten,1953,Per Aabel,actor,H?yland jr.,3.0,1950
374,Jeopardy,1953,Lee Aaker,actor,Bobby Stilwin,4.0,1950
382,Take Me to Town,1953,Lee Aaker,actor,Corney Hall,5.0,1950
383,The Atomic City,1952,Lee Aaker,actor,Tommy Addison,5.0,1950


In [96]:
cast_5_roles[['type', 'n', 'title']].groupby(['type', 'n']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,title
type,n,Unnamed: 2_level_1
actor,1.0,6616
actor,2.0,4564
actor,3.0,5587
actor,4.0,5594
actor,5.0,5611
actress,1.0,2965
actress,2.0,4556
actress,3.0,3148
actress,4.0,2849
actress,5.0,2544


### Use groupby() to determine how many roles are listed for each of the Pink Panther movies.

In [111]:
cast[cast['title'].isna()]

Unnamed: 0,title,year,name,type,character,n,decade
9838,,2015,Israel Adam,actor,Landen's Friend #2,,2010
256933,,2015,Brock Brazda,actor,Brock,,2010
1057486,,2015,Landen James,actor,Landen,,2010
1484582,,2015,Elijah Meyer,actor,Landen's Friend,,2010
1484647,,2015,Emma (V) Meyer,actor,Brock's Girlfriend,,2010
1486995,,2015,Gabriel (II) Michael,actor,Psychopath,,2010


In [112]:
cast_pink_panther = cast[cast['title'].str.contains('Pink Panther').fillna(False)]
cast_pink_panther

Unnamed: 0,title,year,name,type,character,n,decade
955,The Pink Panther,2006,William Abadie,actor,Bizu,11.0,2000
1978,Revenge of the Pink Panther,1978,Fredric Abbott,actor,Douvier's Soldier,38.0,1970
4691,Curse of the Pink Panther,1983,William Abney,actor,Hugo the Houseman,33.0,1980
12589,Son of the Pink Panther,1993,Tony Adams,actor,Wedding Guest,,1990
12590,The Return of the Pink Panther,1975,Tony Adams,actor,Waiter,,1970
...,...,...,...,...,...,...,...
3582494,Revenge of the Pink Panther,1978,Elisabeth Welch,actress,Mrs. Wu,24.0,1970
3582965,The Pink Panther,1963,Meri Welles,actress,Monica Fawn,12.0,1960
3584647,The Pink Panther,2006,Colleen Werthmann,actress,Jane,,2000
3606096,The Pink Panther,2006,Jadin Wong,actress,Granny,,2000


In [118]:
cast_pink_panther[['title', 'n']].groupby(['title']).count().reset_index()

Unnamed: 0,title,n
0,Curse of the Pink Panther,63
1,Revenge of the Pink Panther,57
2,Son of the Pink Panther,43
3,The Pink Panther,66
4,The Pink Panther 2,36
5,The Pink Panther Strikes Again,61
6,The Return of the Pink Panther,27
7,Trail of the Pink Panther,32


### List, in order by year, each of the films in which Frank Oz has played more than 1 role.

In [127]:
cast_frank_oz = cast[cast['name'].str.contains('Frank Oz')].fillna()
cast_frank_oz.head()

Unnamed: 0,title,year,name,type,character,n,decade
1671990,An American Werewolf in London,1981,Frank Oz,actor,Mr. Collins,13.0,1980
1671991,An American Werewolf in London,1981,Frank Oz,actor,Miss Piggy,13.0,1980
1671992,Blues Brothers 2000,1998,Frank Oz,actor,Warden,4.0,1990
1671993,Follow That Bird,1985,Frank Oz,actor,Cookie Monster,3.0,1980
1671994,Follow That Bird,1985,Frank Oz,actor,Bert,3.0,1980


In [149]:
cast_frank_oz_n = cast_frank_oz[['year', 'title', 'n']].groupby(['year', 'title']).count().reset_index()
cast_frank_oz_n[cast_frank_oz_n['n'] > 1]

Unnamed: 0,year,title,n
0,1979,The Muppet Movie,8
3,1981,An American Werewolf in London,2
4,1981,The Great Muppet Caper,6
5,1982,The Dark Crystal,2
9,1984,The Muppets Take Manhattan,7
10,1985,Follow That Bird,3
14,1992,The Muppet Christmas Carol,7
15,1996,Muppet Treasure Island,4
17,1999,Muppets from Space,4
19,1999,The Adventures of Elmo in Grouchland,3


### List each of the characters that Frank Oz has portrayed at least twice.

In [157]:
cast_frank_oz_chars = cast_frank_oz[['character', 'n']].groupby('character').count().reset_index()
cast_frank_oz_chars[cast_frank_oz_chars['n'] > 1]

Unnamed: 0,character,n
0,Animal,6
2,Bert,3
5,Cookie Monster,3
10,Fozzie Bear,4
15,Grover,2
18,Miss Piggy,6
25,Sam the Eagle,5
34,Yoda,6
