In [1]:
import pandas as pd
import numpy as np

### Download the data and load it to Pandas. 

You can find them [here](https://drive.google.com/file/d/1NY6cmF9Shjw-dD7BD6bNmfcIVz-kQcFR/view?usp=sharing).

In [2]:
cast = pd.read_csv('data/cast.csv', index_col=None)
cast.head()

Unnamed: 0,title,year,name,type,character,n
0,Closet Monster,2015,Buffy #1,actor,Buffy 4,
1,Suuri illusioni,1985,Homo $,actor,Guests,22.0
2,Battle of the Sexes,2017,$hutter,actor,Bobby Riggs Fan,10.0
3,Secret in Their Eyes,2015,$hutter,actor,2002 Dodger Fan,
4,Steve Jobs,2015,$hutter,actor,1988 Opera House Patron,


In [3]:
release_dates = pd.read_csv('data/release_dates.csv', index_col=None,
                                       parse_dates=['date'], infer_datetime_format=True)
release_dates.head()

Unnamed: 0,title,year,country,date
0,"#73, Shaanthi Nivaasa",2007,India,2007-06-15
1,#Beings,2015,Romania,2015-01-29
2,#Declimax,2018,Netherlands,2018-01-21
3,#Ewankosau saranghaeyo,2015,Philippines,2015-01-21
4,#Horror,2015,USA,2015-11-20


In [4]:
release_dates.dtypes

title              object
year                int64
country            object
date       datetime64[ns]
dtype: object

### Count the number of movies with "Christmas" in their title for each month, that is released are the USA.

In [14]:
filtered = release_dates[release_dates['title'].str.contains('Christmas') & (release_dates['country'] == 'USA')].copy()
filtered['month'] = filtered['date'].dt.month
filtered.groupby('month').title.count()

month
1      2
2      2
4      1
5      1
6      1
7      1
8      2
9      2
10    14
11    38
12    47
Name: title, dtype: int64

### Count the number of movies with "Christmas" in their title for each month, that are released in Canada.

In [15]:
filtered = release_dates[release_dates['title'].str.contains('Christmas') & (release_dates['country'] == 'Canada')].copy()
filtered['month'] = filtered['date'].dt.month
filtered.groupby('month').title.count()

month
10    2
11    9
12    6
Name: title, dtype: int64

### Count the number of movies in which the titles start with "The Hobbit" for each month, that are released in the USA.

In [17]:
filtered = release_dates[release_dates['title'].str.startswith('The Hobbit') & (release_dates['country'] == 'USA')].copy()
filtered['month'] = filtered['date'].dt.month
filtered.groupby('month').title.count()

month
12    3
Name: title, dtype: int64

### Count the number of movies with "Romance" in their title for **each day of the week**, that are released in the USA.

In [21]:
filtered = release_dates[release_dates['title'].str.contains('Romance') & (release_dates['country'] == 'USA')].copy()
filtered['day_of_week'] = filtered['date'].dt.weekday
filtered.groupby('day_of_week').title.count()

day_of_week
0    21
1     4
2    11
3     6
4    18
5     8
6    32
Name: title, dtype: int64

### Count the number of movies with "Action" in their title for **each day of the week**, that are released in the USA.

In [23]:
filtered = release_dates[release_dates['title'].str.contains('Action') & (release_dates['country'] == 'USA')].copy()
filtered['day_of_week'] = filtered['date'].dt.weekday
filtered.groupby('day_of_week').title.count()

day_of_week
0     2
1     1
2     3
3     1
4    19
5     2
6     3
Name: title, dtype: int64

### On which date was each Judi Dench movie from the 1990s released in the USA?

In [43]:
judy = cast[(np.isin(cast['year'], range(1990,2000))) & (cast['name'] == 'Judi Dench')]
merged = judy.merge(release_dates, on=('title', 'year'))
merged[merged['country'] == 'USA']

Unnamed: 0,title,year,name,type,character,n,country,date
1,GoldenEye,1995,Judi Dench,actress,M,6.0,USA,1995-11-17
40,Hamlet,1996,Judi Dench,actress,Hecuba,12.0,USA,1996-12-25
71,Jack & Sarah,1995,Judi Dench,actress,Margaret,3.0,USA,1996-03-22
78,Mrs Brown,1997,Judi Dench,actress,Queen Victoria,1.0,USA,1997-10-03
101,Shakespeare in Love,1998,Judi Dench,actress,Queen Elizabeth,12.0,USA,1999-01-08
147,Tea with Mussolini,1999,Judi Dench,actress,Arabella,2.0,USA,1999-05-14
173,The World Is Not Enough,1999,Judi Dench,actress,M,6.0,USA,1999-11-19
246,Tomorrow Never Dies,1997,Judi Dench,actress,M,9.0,USA,1997-12-19


### In which months do films with the actress Judi Dench tend to be released in the USA?

In [44]:
judy = cast[cast['name'] == 'Judi Dench']
merged = judy.merge(release_dates, on=('title', 'year'))
merged = merged[merged['country'] == 'USA']
merged['month'] = merged['date'].dt.month
merged.groupby('month').title.count()

month
1      4
2      4
3      3
4      2
5      4
6      3
7      1
8      1
9      2
10     1
11    12
12     4
Name: title, dtype: int64

### In which months do films with the actor Tom Cruise tend to be released in the USA?

In [45]:
tom = cast[cast['name'] == 'Tom Cruise']
merged = tom.merge(release_dates, on=('title', 'year'))
merged = merged[merged['country'] == 'USA']
merged['month'] = merged['date'].dt.month
merged.groupby('month').title.count()

month
1      2
3      1
4      3
5      5
6      8
7      7
8      3
9      1
10     3
11     3
12    10
Name: title, dtype: int64