In [1]:
import pandas as pd
import numpy as np
import datetime as dt

### Download the data and load it to Pandas. 

You can find them [here](https://drive.google.com/file/d/1NY6cmF9Shjw-dD7BD6bNmfcIVz-kQcFR/view?usp=sharing).

In [2]:
cast = pd.read_csv('data/cast.csv', index_col=None)
cast.head()

Unnamed: 0,title,year,name,type,character,n
0,Closet Monster,2015,Buffy #1,actor,Buffy 4,
1,Suuri illusioni,1985,Homo $,actor,Guests,22.0
2,Battle of the Sexes,2017,$hutter,actor,Bobby Riggs Fan,10.0
3,Secret in Their Eyes,2015,$hutter,actor,2002 Dodger Fan,
4,Steve Jobs,2015,$hutter,actor,1988 Opera House Patron,


In [3]:
release_dates = pd.read_csv('data/release_dates.csv', index_col=None,
                                      parse_dates=['date'], infer_datetime_format=True)
release_dates.head()

Unnamed: 0,title,year,country,date
0,"#73, Shaanthi Nivaasa",2007,India,2007-06-15
1,#Beings,2015,Romania,2015-01-29
2,#Declimax,2018,Netherlands,2018-01-21
3,#Ewankosau saranghaeyo,2015,Philippines,2015-01-21
4,#Horror,2015,USA,2015-11-20


### Count the number of movies with "Christmas" in their title for each month, that is released are the USA.

In [4]:
# determine dtypes to help define new filter function
release_dates.dtypes

title              object
year                int64
country            object
date       datetime64[ns]
dtype: object

In [5]:
# filter_results function
# chooses return value based on the value type of the filtered field
def filter_results(table, field, value):
    if table[field].dtype == 'int64':
        return table[table[field] == value]
    else:
        return table[table[field].str.contains(value)]

In [6]:
release_dates.pipe(filter_results, 'title', 'Christmas').pipe(filter_results, 'country', 'USA')

Unnamed: 0,title,year,country,date
1191,12 Dog Days of Christmas,2014,USA,2014-11-28
1192,12 Dogs of Christmas: Great Puppy Rescue,2012,USA,2012-10-09
5904,A Belle for Christmas,2014,USA,2014-11-04
6163,A Cadaver Christmas,2011,USA,2011-04-02
6272,A Christmas Carol,1938,USA,1938-12-16
...,...,...,...,...
383807,The Nightmare Before Christmas,1993,USA,1993-10-29
395219,The Shootin' It Christmas Spectacular,2013,USA,2013-12-20
408785,This Christmas,2007,USA,2007-11-21
437275,What She Wants for Christmas,2012,USA,2012-12-01


### Count the number of movies with "Christmas" in their title for each month, that are released in Canada.

In [7]:
# determine date type
print(release_dates['date'].dtype.name)

datetime64[ns]


In [8]:
# determine min/max date values to search through
print(release_dates['date'].sort_values())

232584   1894-10-09
317627   1900-09-13
398115   1906-12-26
290219   1907-11-02
21527    1908-09-12
            ...    
365289   2021-12-07
74969    2021-12-24
284063   2022-03-25
312128   2022-09-01
165532   2023-03-01
Name: date, Length: 452656, dtype: datetime64[ns]


In [9]:
# create a monthly date range spanning the entire dated history of the release_date tables
bins = pd.date_range('1894-10', '2023-03', freq='MS')
canada_christmas_releases = release_dates.pipe(filter_results, 'title', 'Christmas').pipe(filter_results, 'country', 'Canada')
canada_christmas_releases.groupby(pd.cut(canada_christmas_releases['date'], bins))['title'].count().sort_values()

date
(1894-10-01, 1894-11-01]    0
(1980-02-01, 1980-03-01]    0
(1980-01-01, 1980-02-01]    0
(1979-12-01, 1980-01-01]    0
(1979-11-01, 1979-12-01]    0
                           ..
(2012-10-01, 2012-11-01]    1
(2011-11-01, 2011-12-01]    2
(2014-12-01, 2015-01-01]    2
(2014-11-01, 2014-12-01]    2
(2016-12-01, 2017-01-01]    3
Name: title, Length: 1541, dtype: int64

### Count the number of movies in which the titles start with "The Hobbit" for each month, that are released in the USA.

In [10]:
hobbit_usa_releases = release_dates.pipe(filter_results, 'title', 'The Hobbit').pipe(filter_results, 'country', 'USA')
hobbit_usa_releases.groupby(pd.cut(release_dates['date'], bins))['title'].count().sort_values()

date
(1894-10-01, 1894-11-01]    0
(1980-08-01, 1980-09-01]    0
(1980-07-01, 1980-08-01]    0
(1980-06-01, 1980-07-01]    0
(1980-05-01, 1980-06-01]    0
                           ..
(1937-10-01, 1937-11-01]    0
(1935-02-01, 1935-03-01]    0
(2014-12-01, 2015-01-01]    1
(2013-12-01, 2014-01-01]    1
(2012-12-01, 2013-01-01]    1
Name: title, Length: 1541, dtype: int64

### Count the number of movies with "Romance" in their title for **each day of the week**, that are released in the USA.

In [11]:
def filter_period(day):
    return dt.
release_dates.pipe(filter_results, 'title', 'Romance').pipe

Unnamed: 0,title,year,country,date
913,100% OFF: A Recession-Era Romance,2012,USA,2012-07-04
5988,A Blue Gum Romance,1913,Australia,1913-09-20
6171,A California Romance,1922,USA,1922-12-24
6408,A Circus Romance,1916,USA,1916-01-24
6539,A Crooked Romance,1917,USA,1917-09-30
...,...,...,...,...
438359,Where Romance Rides,1925,USA,1925-04-28
440631,Wild Romance,2006,Netherlands,2006-11-09
440632,Wild Romance,2006,Belgium,2006-11-15
440721,Wild West Romance,1928,USA,1928-06-10


### Count the number of movies with "Action" in their title for **each day of the week**, that are released in the USA.

### On which date was each Judi Dench movie from the 1990s released in the USA?

### In which months do films with the actress Judi Dench tend to be released in the USA?

### In which months do films with the actor Tom Cruise tend to be released in the USA?