In [2]:
import pandas as pd

### Download the data and load it to Pandas. 

You can find them [here](https://drive.google.com/file/d/1NY6cmF9Shjw-dD7BD6bNmfcIVz-kQcFR/view?usp=sharing).

In [3]:
titles = pd.read_csv('data/titles.csv')
titles.head()

Unnamed: 0,title,year
0,The Rising Son,1990
1,The Thousand Plane Raid,1969
2,Crucea de piatra,1993
3,Country,2000
4,Gaiking II,2011


In [4]:
cast = pd.read_csv('data/cast.csv')
cast.head()

Unnamed: 0,title,year,name,type,character,n
0,Closet Monster,2015,Buffy #1,actor,Buffy 4,
1,Suuri illusioni,1985,Homo $,actor,Guests,22.0
2,Battle of the Sexes,2017,$hutter,actor,Bobby Riggs Fan,10.0
3,Secret in Their Eyes,2015,$hutter,actor,2002 Dodger Fan,
4,Steve Jobs,2015,$hutter,actor,1988 Opera House Patron,


### Using `groupby()`, count the number of films that have been released in each decade in the history of cinema.

In [5]:
# create bins to define the decades
bins = [x*10 for x in range(189, 204)]
# use groupby on the categorical series to count number of titles for each bin
titles.groupby(pd.cut(titles['year'], bins)).title.count()

year
(1890, 1900]        7
(1900, 1910]       55
(1910, 1920]     7617
(1920, 1930]     8490
(1930, 1940]    10277
(1940, 1950]     8675
(1950, 1960]    13113
(1960, 1970]    18166
(1970, 1980]    18351
(1980, 1990]    20746
(1990, 2000]    22672
(2000, 2010]    42247
(2010, 2020]    61847
(2020, 2030]       67
Name: title, dtype: int64

### Use `groupby()` count the number of "Hamlet" films made in each decade.

In [6]:
# use previous bins decades and filter by title 'Hamlet'
titles[titles['title'] == 'Hamlet'].groupby(pd.cut(titles['year'], bins)).title.count()

year
(1890, 1900]    0
(1900, 1910]    1
(1910, 1920]    2
(1920, 1930]    1
(1930, 1940]    0
(1940, 1950]    1
(1950, 1960]    1
(1960, 1970]    2
(1970, 1980]    2
(1980, 1990]    2
(1990, 2000]    2
(2000, 2010]    1
(2010, 2020]    3
(2020, 2030]    0
Name: title, dtype: int64

### How many leading (n=1) roles were available to actors, and how many to actresses, in each year of the 1950s?

In [104]:
def df_filter(table, field, value):
    return table[table[field] == value]

cast.pipe(df_filter, 'n', 1).pipe(df_filter, 'type', 'actress'
                                  ).groupby(pd.cut(cast['year'], bins)).count()

Unnamed: 0,title,year,name,type,character,n
2455420,Queen of the Damned,2002,Aaliyah,actress,Queen Akasha,1.0
2455503,Til en ukjent,1990,Hilde Aaroe,actress,Ane,1.0
2455508,A Day Without a Mexican,2004,Caroline Aaron,actress,Aunt Gigi,1.0
2455520,Call Waiting,2004,Caroline Aaron,actress,Judy Baxter,1.0
2455521,Call Waiting,2004,Caroline Aaron,actress,Carol Lane,1.0
...,...,...,...,...,...,...
3634253,Doyumsuz,1990,Bahar ?ztan,actress,Bahar,1.0
3634265,Kadife,2012,Ayta? ?ztuna,actress,Kadife Ana,1.0
3634308,Dig og mig,2008,Julie R. ?lgaard,actress,Stephanie,1.0
3634328,Mimi og madammerne,1998,Tammi ?st,actress,Mimi,1.0


### In the 1950s taken as a whole, how many total roles were available to actors, and how many to actresses, for each "n" number 1 through 5?

In [112]:
for n in range(1, 6):
    print(cast.pipe(df_filter, 'n', n).pipe(df_filter, 'type', 'actress').groupby(pd.cut(cast['year'], bins)).count()['title'].iloc[6])

2975
4614
3181
2882
2606


In [113]:
for n in range(1, 6):
    print(cast.pipe(df_filter, 'n', n).pipe(df_filter, 'type', 'actor').groupby(pd.cut(cast['year'], bins)).count()['title'].iloc[6])

6709
4610
5638
5622
5608


### Use `groupby()` to determine how many roles are listed for each of the Pink Panther movies.

In [122]:
cast.pipe(df_filter, 'title', 'Pink Panther')

Unnamed: 0,title,year,name,type,character,n


### List, in order by year, each of the films in which Frank Oz has played more than 1 role.

In [204]:
cast.pipe(df_filter, 'name', 'Frank Oz').groupby('title').filter(lambda x: len(x) > 1).sort_values(by=['year', 'title'])

Unnamed: 0,title,year,name,type,character,n
1672035,The Muppet Movie,1979,Frank Oz,actor,Miss Piggy,2.0
1672036,The Muppet Movie,1979,Frank Oz,actor,Fozzie Bear,2.0
1672037,The Muppet Movie,1979,Frank Oz,actor,Animal,2.0
1672038,The Muppet Movie,1979,Frank Oz,actor,Sam the Eagle,2.0
1672039,The Muppet Movie,1979,Frank Oz,actor,Doc Hopper's Men,2.0
1672040,The Muppet Movie,1979,Frank Oz,actor,Marvin Suggs,2.0
1672041,The Muppet Movie,1979,Frank Oz,actor,Swedish Chef (assistant),2.0
1672042,The Muppet Movie,1979,Frank Oz,actor,Motorcycle Guy,2.0
1671990,An American Werewolf in London,1981,Frank Oz,actor,Mr. Collins,13.0
1671991,An American Werewolf in London,1981,Frank Oz,actor,Miss Piggy,13.0


### List each of the characters that Frank Oz has portrayed at least twice.

In [212]:
cast.pipe(df_filter, 'name', 'Frank Oz').groupby('character').filter(lambda x: len(x) > 1).sort_values('character')


Unnamed: 0,title,year,name,type,character,n
1672031,The Muppet Christmas Carol,1992,Frank Oz,actor,Animal,5.0
1672037,The Muppet Movie,1979,Frank Oz,actor,Animal,2.0
1672024,The Great Muppet Caper,1981,Frank Oz,actor,Animal,2.0
1672045,The Muppets Take Manhattan,1984,Frank Oz,actor,Animal,2.0
1672006,Muppets from Space,1999,Frank Oz,actor,Animal,7.0
1672003,Muppet Treasure Island,1996,Frank Oz,actor,Animal,14.0
1672016,The Adventures of Elmo in Grouchland,1999,Frank Oz,actor,Bert,16.0
1671994,Follow That Bird,1985,Frank Oz,actor,Bert,3.0
1672046,The Muppets Take Manhattan,1984,Frank Oz,actor,Bert,2.0
1672047,The Muppets Take Manhattan,1984,Frank Oz,actor,Cookie Monster,2.0
