In [126]:
import pandas as pd
import numpy as np
import datetime as dt

### Download the data and load it to Pandas. 

You can find them [here](https://drive.google.com/file/d/1NY6cmF9Shjw-dD7BD6bNmfcIVz-kQcFR/view?usp=sharing).

In [127]:
cast = pd.read_csv('data/cast.csv', index_col=None)
cast.head()

KeyboardInterrupt: 

In [None]:
release_dates = pd.read_csv('data/release_dates.csv', index_col=None,
                                      parse_dates=['date'], infer_datetime_format=True)
release_dates.head()

Unnamed: 0,title,year,country,date
0,"#73, Shaanthi Nivaasa",2007,India,2007-06-15
1,#Beings,2015,Romania,2015-01-29
2,#Declimax,2018,Netherlands,2018-01-21
3,#Ewankosau saranghaeyo,2015,Philippines,2015-01-21
4,#Horror,2015,USA,2015-11-20


### Count the number of movies with "Christmas" in their title for each month, that is released are the USA.

In [None]:
# determine dtypes to help define new filter function
release_dates.dtypes

title              object
year                int64
country            object
date       datetime64[ns]
dtype: object

In [None]:
# filter_results function
# chooses return value based on the value type of the filtered field
def filter_results(table, field, value):
    if table[field].dtype == 'int64':
        return table[table[field] == value]
    else:
        return table[table[field].str.contains(value)]

In [None]:
release_dates.pipe(filter_results, 'title', 'Christmas').pipe(filter_results, 'country', 'USA')

Unnamed: 0,title,year,country,date
1191,12 Dog Days of Christmas,2014,USA,2014-11-28
1192,12 Dogs of Christmas: Great Puppy Rescue,2012,USA,2012-10-09
5904,A Belle for Christmas,2014,USA,2014-11-04
6163,A Cadaver Christmas,2011,USA,2011-04-02
6272,A Christmas Carol,1938,USA,1938-12-16
...,...,...,...,...
383807,The Nightmare Before Christmas,1993,USA,1993-10-29
395219,The Shootin' It Christmas Spectacular,2013,USA,2013-12-20
408785,This Christmas,2007,USA,2007-11-21
437275,What She Wants for Christmas,2012,USA,2012-12-01


### Count the number of movies with "Christmas" in their title for each month, that are released in Canada.

In [None]:
# determine date type
print(release_dates['date'].dtype.name)

datetime64[ns]


In [None]:
# determine min/max date values to search through
print(release_dates['date'].sort_values())

232584   1894-10-09
317627   1900-09-13
398115   1906-12-26
290219   1907-11-02
21527    1908-09-12
            ...    
365289   2021-12-07
74969    2021-12-24
284063   2022-03-25
312128   2022-09-01
165532   2023-03-01
Name: date, Length: 452656, dtype: datetime64[ns]


In [None]:
# create a monthly date range spanning the entire dated history of the release_date tables
bins = pd.date_range('1894-10', '2023-03', freq='MS')
canada_christmas_releases = release_dates.pipe(filter_results, 'title', 'Christmas').pipe(filter_results, 'country', 'Canada')
canada_christmas_releases.groupby(pd.cut(canada_christmas_releases['date'], bins))['title'].count().sort_values()

date
(1894-10-01, 1894-11-01]    0
(1980-02-01, 1980-03-01]    0
(1980-01-01, 1980-02-01]    0
(1979-12-01, 1980-01-01]    0
(1979-11-01, 1979-12-01]    0
                           ..
(2012-10-01, 2012-11-01]    1
(2011-11-01, 2011-12-01]    2
(2014-12-01, 2015-01-01]    2
(2014-11-01, 2014-12-01]    2
(2016-12-01, 2017-01-01]    3
Name: title, Length: 1541, dtype: int64

### Count the number of movies in which the titles start with "The Hobbit" for each month, that are released in the USA.

In [None]:
hobbit_usa_releases = release_dates.pipe(filter_results, 'title', 'The Hobbit').pipe(filter_results, 'country', 'USA')
hobbit_usa_releases.groupby(pd.cut(release_dates['date'], bins))['title'].count().sort_values()

date
(1894-10-01, 1894-11-01]    0
(1980-08-01, 1980-09-01]    0
(1980-07-01, 1980-08-01]    0
(1980-06-01, 1980-07-01]    0
(1980-05-01, 1980-06-01]    0
                           ..
(1937-10-01, 1937-11-01]    0
(1935-02-01, 1935-03-01]    0
(2014-12-01, 2015-01-01]    1
(2013-12-01, 2014-01-01]    1
(2012-12-01, 2013-01-01]    1
Name: title, Length: 1541, dtype: int64

### Count the number of movies with "Romance" in their title for **each day of the week**, that are released in the USA.

In [None]:
def which_day(year_start, year_end, day):
    
    annual_list = []
    
    for year in range(year_start, year_end):
        
        if day == 'Monday':
            annual_list.extend(
                pd.date_range(
                    start=str(year),
                    end=str(year+1),
                    freq='W-MON').strftime('%m/%d/%Y').to_list())
            
        elif day == 'Tuesday':
            annual_list.extend(
                pd.date_range(
                    start=str(year),
                    end=str(year+1),
                    freq='W-TUE').strftime('%m/%d/%Y').to_list())
            
        elif day == 'Wednesday':
            annual_list.extend(
                pd.date_range(
                    start=str(year),
                    end=str(year+1),
                    freq='W-WED').strftime('%m/%d/%Y').to_list())
            
        elif day == 'Thursday':
            annual_list.extend(
                pd.date_range(
                    start=str(year),
                    end=str(year+1),
                    freq='W-THU').strftime('%m/%d/%Y').to_list())
            
        elif day == 'Friday':
            annual_list.extend(
                pd.date_range(
                    start=str(year),
                    end=str(year+1),
                    freq='W-FRI').strftime('%m/%d/%Y').to_list())
            
        elif day == 'Saturday':
            annual_list.extend(
                pd.date_range(
                    start=str(year),
                    end=str(year+1),
                    freq='W-SAT').strftime('%m/%d/%Y').to_list())
        
        elif day == 'Sunday':
            annual_list.extend(
                pd.date_range(
                    start=str(year),
                    end=str(year+1),
                    freq='W').strftime('%m/%d/%Y').to_list())
        
        else:
            print('No such day exists.')
            return None
        
    return annual_list

In [None]:
which_day(1922, 2024, 'Sunday')

['01/01/1922',
 '01/08/1922',
 '01/15/1922',
 '01/22/1922',
 '01/29/1922',
 '02/05/1922',
 '02/12/1922',
 '02/19/1922',
 '02/26/1922',
 '03/05/1922',
 '03/12/1922',
 '03/19/1922',
 '03/26/1922',
 '04/02/1922',
 '04/09/1922',
 '04/16/1922',
 '04/23/1922',
 '04/30/1922',
 '05/07/1922',
 '05/14/1922',
 '05/21/1922',
 '05/28/1922',
 '06/04/1922',
 '06/11/1922',
 '06/18/1922',
 '06/25/1922',
 '07/02/1922',
 '07/09/1922',
 '07/16/1922',
 '07/23/1922',
 '07/30/1922',
 '08/06/1922',
 '08/13/1922',
 '08/20/1922',
 '08/27/1922',
 '09/03/1922',
 '09/10/1922',
 '09/17/1922',
 '09/24/1922',
 '10/01/1922',
 '10/08/1922',
 '10/15/1922',
 '10/22/1922',
 '10/29/1922',
 '11/05/1922',
 '11/12/1922',
 '11/19/1922',
 '11/26/1922',
 '12/03/1922',
 '12/10/1922',
 '12/17/1922',
 '12/24/1922',
 '12/31/1922',
 '01/07/1923',
 '01/14/1923',
 '01/21/1923',
 '01/28/1923',
 '02/04/1923',
 '02/11/1923',
 '02/18/1923',
 '02/25/1923',
 '03/04/1923',
 '03/11/1923',
 '03/18/1923',
 '03/25/1923',
 '04/01/1923',
 '04/08/19

In [None]:
romance_titles = release_dates.pipe(filter_results, 'title', 'Romance')
romance_titles.sort_values('date')

Unnamed: 0,title,year,country,date
5988,A Blue Gum Romance,1913,Australia,1913-09-20
411457,Tillie's Punctured Romance,1914,USA,1914-12-21
448635,Young Romance,1915,USA,1915-01-21
343033,The Bachelor's Romance,1915,USA,1915-02-11
392256,The Romance of Elaine,1915,USA,1915-06-14
...,...,...,...,...
222409,Manhattan Romance,2015,USA,2015-10-02
292005,Romance Out of the Blue,2015,China,2015-10-23
291998,Romance Complicated,2016,India,2016-01-15
21168,American Romance,2016,USA,2016-10-23


In [None]:
for weekday in ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']:
    days = which_day(1894, 2024, weekday)
    day_count = romance_titles[romance_titles['date'].isin(days)].sort_values('date')['date'].count()
    print(f'{weekday} : {day_count}')

Sunday : 45
Monday : 65
Tuesday : 16
Wednesday : 34
Thursday : 53
Friday : 106
Saturday : 21


### Count the number of movies with "Action" in their title for **each day of the week**, that are released in the USA.

In [None]:
action_titles = release_dates.pipe(filter_results, 'title', 'Action')
action_titles.sort_values('date')

Unnamed: 0,title,year,country,date
13441,Action,1921,USA,1921-09-12
8462,A Man of Action,1923,USA,1923-06-03
13442,Action,1921,France,1924-01-11
8463,A Man of Action,1923,Denmark,1925-01-05
13443,Action,1921,Finland,1925-02-15
...,...,...,...,...
13451,Action Hero Biju,2016,United Arab Emirates,2016-02-25
13450,Action Hero Biju,2016,Kuwait,2016-02-25
426049,Untitled Disney Live-Action Fairy Tale,2017,USA,2017-12-22
426050,Untitled Disney Live-Action Fairy Tale,2019,USA,2019-03-29


In [None]:
for weekday in ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']:
    days = which_day(1894, 2024, weekday)
    day_count = action_titles[action_titles['date'].isin(days)].sort_values('date')['date'].count()
    print(f'{weekday} : {day_count}')

Sunday : 9
Monday : 16
Tuesday : 2
Wednesday : 36
Thursday : 54
Friday : 115
Saturday : 19


### On which date was each Judi Dench movie from the 1990s released in the USA?

### In which months do films with the actress Judi Dench tend to be released in the USA?

### In which months do films with the actor Tom Cruise tend to be released in the USA?