# Cinema weekly schedular

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals

import holidays
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from tmdbv3api import TMDb, Movie

from fastai.tabular import transform
from fastai.tabular import *
from joblib import load, dump
from torch import nn, optim

In [3]:
defaults.device = torch.device('cpu')

In [4]:
genres = load('../data/genres.joblib')

In [5]:
data = '../data/'
showings = load(data+'showings_extra_feature.joblib')
showings.replace(np.inf, 0, inplace=True)
showings.sort_values(by='showtime', inplace=True)
showings.reset_index(drop=True, inplace=True)

train_valid = showings.loc[(showings.year > 1999) & (showings.year < 2019)]
train_valid.reset_index(drop=True, inplace=True)
split = list(train_valid.loc[train_valid.year > 2016].index)

In [6]:
dep_var = 'coverage'
cat_names = ['room',
             'tmdb_original_language',
             'year',
             'month',
             'week',
             'day',
             'hour',
             'minute',
             'dayofweek',
             'dayofyear',
             'is_month_end',
             'is_month_start',
             'is_quarter_end',
             'is_quarter_start',
             'is_year_end',
             'is_year_start',
             'is_holiday',
             'days_since_release',
             'Action',
             'Adventure',
             'Animation',
             'Comedy',
             'Crime',
             'Documentary',
             'Drama',
             'Family',
             'Fantasy',
             'History',
             'Horror',
             'Music',
             'Mystery',
             'Romance',
             'Science Fiction',
             'TV Movie',
             'Thriller',
             'War',
             'Western']
cont_names = ['seats',
              'tmdb_popularity',
              'tmdb_vote_average',
              'tmdb_vote_count',
              'tmdb_runtime',
              'tmdb_budget',
              'weekday_cos',
              'weekday_sin',
              'day_month_cos',
              'day_month_sin',
              'month_year_cos',
              'month_year_sin',
              'day_year_cos',
              'day_year_sin',
              'hour_cos',
              'hour_sin',
              'clock_cos',
              'clock_sin',
              'min_cos',
              'min_sin']
procs = [FillMissing, Categorify, Normalize]

In [7]:
data = (TabularList.from_df(train_valid, cat_names=cat_names, cont_names=cont_names, procs=procs)
                   .split_by_idx(split)
                   .label_from_df(cols=dep_var)
                   .databunch())

INFO:numexpr.utils:NumExpr defaulting to 8 threads.


In [10]:
learn = tabular_learner(
                        data, layers=[1000, 500, 250, 125], emb_szs={'room': 333},
                        metrics=mae, y_range=[-.01,1.01],
                        emb_drop=0.15,
                        ps=(0.00, 0.20, 0.15, 0.95)
                       )

learn.opt_func = partial(
    optim.Adam,
    betas   = (0.99, 0.99), # default: (0.9, 0.99)
    eps     = 1e-3,         # default: 1e-8
    amsgrad = False         # default: False
)

learn.load('final_model')

1 # Just to avoid having to see the output of the above statement which shows all the information the object contains

1

# New start

In [11]:
location = showings.loc[(showings.year == 2019) & (showings.location == 260) & (showings.month == 9) & (showings.day >= 16) & (showings.day <= 22)]

In [12]:
movies  = location.std_movienr.unique().tolist()
rooms   = location.room.unique().tolist()
week    = location.day.unique().tolist()
hours   = range(14, 23)
minutes = range(0, 60, 10)

In [13]:
# These numbers are specific to this very test and corresponds to when we have
# different version of the same movie in this case 2D and 3D of Toy Story 4
# In addition we also removed a mismatched movie that had a runtime of 0 minutes
duplicates = [
    'BVI20180368',
    'BVI20190413',
    'EUR20120343',
    'NOR20180530',
    'FOX20190401'
]
for i in duplicates:
    movies.remove(i)

In [14]:
dets = dict()
for movie in movies:
    dets[movie] = location.loc[location.std_movienr == movie].iloc[0]

In [15]:
%%time
schedule = list()
for movie in movies:
    det = dets[movie]
    for day in week:
        det['day'] = day
        for room in rooms:
            det['room'] = room
            for hour in hours:
                det['hour'] = hour
                for minute in minutes:
                    det['minute']   = minute
                    det['showtime'] = dt.datetime.strptime('2020-01-'+str(day)+' '+str(hour)+':'+str(minute), '%Y-%m-%d %H:%M')
                    schedule.append(deepcopy(det))

Wall time: 2.96 s


In [16]:
schedule = pd.DataFrame(schedule)
schedule.reset_index(drop=True, inplace=True)

In [17]:
print('Number of movies:', len(movies))
print('Number of days:', len(week))
print('Number of rooms:', len(rooms))
print('Number of combinations:', schedule.shape[0])

Number of movies: 19
Number of days: 7
Number of rooms: 6
Number of combinations: 43092


### Extracting datetime related information

In [18]:
transform.add_datepart(schedule, 'showtime', drop=False, time=True)
transform.add_cyclic_datepart(schedule, 'showtime', drop=False, time=True)
showtimes = schedule.showtime.tolist()
norway_holidays = holidays.Norway(include_sundays=False)
showings_holidays = [t in norway_holidays for t in showtimes]
schedule['is_holiday'] = showings_holidays

times   = schedule.showtime.tolist()
release = schedule.tmdb_release_date.tolist()
since_release = list()
for t, r in zip(times, release):
    if r != '':
        days_since_release = (t - dt.datetime.strptime(r, '%Y-%m-%d')).days
    else:
        days_since_release = 100
    if days_since_release < 0: days_since_release = -1
    if days_since_release > 150: days_since_release = 150
    since_release.append(days_since_release)
schedule['days_since_release'] = since_release

In [19]:
# Since the data the model has been trained on contained missing values in the
# runtime feature we need to have one nan value present due to the limitations 
# of the library used to make the initial model
schedule.loc[0, 'tmdb_runtime'] = np.nan

In [20]:
schedule.reset_index(drop=True, inplace=True)
test = (TabularList.from_df(schedule,
                            cat_names=cat_names, cont_names=cont_names, procs=procs)
                   .split_none()
                   .label_from_df(cols=dep_var))
test.valid = test.train
test = test.databunch()

learn.data.valid_dl = test.valid_dl
preds_test = learn.get_preds(ds_type=DatasetType.Valid)

schedule['preds'] = preds_test[0].T[0].tolist()

In [35]:
def append_to_list(l:list, title:str, showtime:dt.datetime.timestamp, runtime:int, pred:float, screening:pd.Series, extra=20):
    """Function for adding information to a list in an ordered manner."""
    l.append(
        {'title':         title,
         'runtime':       runtime,
         'pred_coverage': pred,
         'start':         showtime,
         'end':           showtime + dt.timedelta(minutes=runtime+extra),
         'screening':     screening
        }
    )

In [36]:
def create_suggestion(screenings:pd.DataFrame, week:list, rooms:list, extra:int=20):
    """
    This function generates a schedule based on the predictions made.
    """
    screenings.sort_values(by='preds', inplace=True)
    schedule = dict()
    all_screenings = list()
    for day in week:
        schedule[day] = dict()
        for room in rooms:
            schedule[day][room] = list()
            temp = screenings.loc[(screenings.day == day) & (screenings.room == room)]
            for i, title, showtime, runtime, coverage in zip(temp.index, temp.tmdb_title, temp.showtime, temp.tmdb_runtime, temp.preds):
                if len(schedule[day][room]) == 0:
                    screening = screenings.loc[i]
                    append_to_list(schedule[day][room], title, showtime, runtime, coverage, screening)
                    all_screenings.append(screening)
                    continue
                try:
                    start, end = showtime, showtime+dt.timedelta(minutes=runtime+extra)
                    for j in schedule[day][room]:
                        if not (
                            start < j['start'] > end
                            or
                            start > j['end'] < end
                        ): raise Exception()
                    screening = screenings.loc[i]
                    append_to_list(schedule[day][room], title, showtime, runtime, coverage, screening)
                    all_screenings.append(screening)
                except:
                    continue
            schedule[day][room] = pd.DataFrame(schedule[day][room]).sort_values(by='start')
    all_screenings = pd.DataFrame(all_screenings)
    
    return all_screenings

In [40]:
%time screenings = create_suggestion(schedule, week, rooms)

Wall time: 471 ms


In [24]:
week

[16, 17, 18, 19, 20, 21, 22]

In [25]:
rooms

[26003, 26001, 26002, 26006, 26005, 26004]

In [26]:
screenings.loc[(screenings.day == 21) & (screenings.room == 26003), ('showtime', 'tmdb_original_title', 'tmdb_title', 'tmdb_runtime', 'preds')].sort_values(by='showtime')

Unnamed: 0,showtime,tmdb_original_title,tmdb_title,tmdb_runtime,preds
10698,2020-01-21 15:00:00,Askeladden - I Soria Moria slott,The Ash Lad: In Search of the Golden Castle,99.0,0.016382
10710,2020-01-21 17:00:00,Askeladden - I Soria Moria slott,The Ash Lad: In Search of the Golden Castle,99.0,0.01243
10722,2020-01-21 19:00:00,Askeladden - I Soria Moria slott,The Ash Lad: In Search of the Golden Castle,99.0,0.014645
10739,2020-01-21 21:50:00,Askeladden - I Soria Moria slott,The Ash Lad: In Search of the Golden Castle,99.0,0.020347


In [27]:
location.loc[(location.day == 21) & (location.room == 26003), ('showtime', 'tmdb_original_title', 'tmdb_title', 'tmdb_runtime', 'coverage')].sort_values(by='showtime')

Unnamed: 0,showtime,tmdb_original_title,tmdb_title,tmdb_runtime,coverage
1085583,2019-09-21 13:00:00,Askeladden - I Soria Moria slott,The Ash Lad: In Search of the Golden Castle,99.0,0.042254
1085637,2019-09-21 15:00:00,Askeladden - I Soria Moria slott,The Ash Lad: In Search of the Golden Castle,99.0,0.197183
1085760,2019-09-21 17:15:00,Askeladden - I Soria Moria slott,The Ash Lad: In Search of the Golden Castle,99.0,0.187793
1085919,2019-09-21 19:15:00,Rambo: Last Blood,Rambo: Last Blood,89.0,0.192488
1086105,2019-09-21 21:30:00,Rambo: Last Blood,Rambo: Last Blood,89.0,0.15493


### The movies that were in the original schedule

In [28]:
location.tmdb_title.value_counts()

Downton Abbey                                         20
Toy Story 4                                           17
The Ash Lad: In Search of the Golden Castle           14
It Chapter Two                                        10
Beware of Children                                     9
Tomboy                                                 8
Good Boys                                              8
Rambo: Last Blood                                      7
Once Upon a Time… in Hollywood                         7
Dora and the Lost City of Gold                         7
Angel Has Fallen                                       7
Ad Astra                                               6
Fast & Furious Presents: Hobbs & Shaw                  4
Amazing Grace                                          4
Queen of Hearts                                        3
Scary Stories to Tell in the Dark                      3
Yomeddine                                              3
Kaptein Sabeltann og jakten på 

### The movies that were in the suggested schedule

In [29]:
screenings.tmdb_title.value_counts()

The Ash Lad: In Search of the Golden Castle    74
Orps: The Movie                                33
It Chapter Two                                 30
Toy Story 4                                    19
Downton Abbey                                   9
Yomeddine                                       3
Angel Has Fallen                                2
Name: tmdb_title, dtype: int64

### The number of screenings between the original and the suggestion

In [33]:
print('Screenings in the original schedule:', location.shape[0])
print('Screenings in the suggested schedule:', screenings.shape[0])

Screenings in the original schedule: 145
Screenings in the suggested schedule: 170
