## Settings

In [1]:
import os
data_folder = './DATA'

In [2]:
import sqlite3
conn = sqlite3.connect(os.path.join(data_folder, 'data.db'))
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
print("Opened database successfully")

Opened database successfully


In [3]:
nomination_file_name = 'best_actress_1950_bafta'

In [4]:
import academy_award_predictor_constants as c

In [5]:
PREDICT_YEAR = 2019
MIN_YEAR = 1950
EXCLUDE_EARLIER_FILMS = True
FILL_NA = True

In [6]:
INCLUDE_ACADEMY_AWARD = {
    'INCLUDE_FILM': True,
    'INCLUDE_PERSON': True,
    'INCLUDE_CREW': True,
}
INCLUDE_OTHER_EVENTS = {
    'ALL': {
        'INCLUDE_FILM': True,
        'INCLUDE_PERSON': True
    },
    'TYPES': {
        'INCLUDE_FILM': True,
        'INCLUDE_PERSON': True
    },
    'INDIVIDUAL': {
        'INCLUDE_FILM': True,
        'INCLUDE_PERSON': True
    },
}
INCLUDE_KEYWORDS = False
INCLUDE_GENRES = True
INCLUDE_COLORS = False
INCLUDE_ASPECT_RATIOS = False
INCLUDE_CERTIFICATIONS = False
INCLUDE_REVIEW_SOURCES = False
INCLUDE_REVIEW_CRITICS = False

INCLUDE_BOX_OFFICE = False
INCLUDE_BUDGET = False
INCLUDE_RUNTIME = False
INCLUDE_FIRSTRELEASE_NUMERICAL = False
INCLUDE_FIRSTRELEASE_CATEGORICAL = True
INCLUDE_USRELEASE_NUMERICAL = False
INCLUDE_USRELEASE_CATEGORICAL = False
INCLUDE_METACRITIC = True
INCLUDE_PERSON_AGE = True

INCLUDE_EVENT_YEAR = False
INCLUDE_NOMINATION_ID = False
INCLUDE_FILM_IDS = False

In [7]:
def any_academy_award_selected():
    return any(x for x in INCLUDE_ACADEMY_AWARD.values())
def any_other_event_selected_by_key(key):
    return any(x for x in INCLUDE_OTHER_EVENTS[key].values())
def any_other_event_selected_by_include(include):
    return any([any([key_inner for key_inner, val_inner in x_outer.items() if key_inner == include and val_inner]) for x_outer in INCLUDE_OTHER_EVENTS.values()])
def any_other_event_selected():
    return any(any_other_event_selected_by_key(key) for key in INCLUDE_OTHER_EVENTS.keys())

## Get Other Events

In [8]:
cursor.execute(
    "SELECT e.*, "
    "(SELECT MIN(ee.Year) + e.Offset FROM EventEditions ee WHERE EventId = e.Id) as FirstYear,  "
    "(SELECT MAX(ee.Year) + e.Offset FROM EventEditions ee WHERE EventId = e.Id) as LastYear, "
    "CASE WHEN e.IsAcademyAwards THEN 'AcademyAwards' "
    "ELSE  "
        "CASE WHEN e.IsGuild THEN 'Guild' "
        "ELSE  "
            "CASE WHEN e.IsFestival THEN 'Festival' "
            "ELSE 'Other' "
            "END "
        "END "
    "END as EventType "
    "FROM Events e "
    "WHERE (SELECT MIN(ee.Year) + e.Offset FROM EventEditions ee WHERE EventId = e.Id) <= ?",
    (MIN_YEAR, )
)
award_events = []
for row in cursor.fetchall():
    award_events.append(dict(row))
    
print('Total events:', len(award_events))
non_academy_award_events = list(filter(lambda ev: ev['IsAcademyAwards'] is None, award_events))
print('Non Academy Award Events:', len(non_academy_award_events))
guild_events = list(filter(lambda ev: ev['IsGuild'] == 1, award_events))
print('Guild Events:', len(guild_events))
print(" - ".join(list(map(lambda ev: ev['Name'], guild_events))))
critic_events = list(filter(lambda ev: ev['IsCritic'] == 1, award_events))
print('Critic Events:', len(critic_events))
print(" - ".join(list(map(lambda ev: ev['Name'], critic_events))))
festival_events = list(filter(lambda ev: ev['IsFestival'] == 1, award_events))
print('Festival Events:', len(festival_events))
print(" - ".join(list(map(lambda ev: ev['Name'], festival_events))))
other_events = list(filter(lambda ev: ev['IsOther'] == 1, award_events))
print('Other Events:', len(other_events))
print(" - ".join(list(map(lambda ev: ev['Name'], other_events))))

Total events: 8
Non Academy Award Events: 7
Guild Events: 3
BAFTA Awards - Directors Guild of America, USA - Writers Guild of America, USA
Critic Events: 1
New York Film Critics Circle Awards
Festival Events: 2
Cannes Film Festival - Venice Film Festival
Other Events: 1
Golden Globes, USA


In [9]:
def set_or_update_key(nomination, key, value):
    if key in nomination:
        nomination[key] += value
    else:
        nomination[key] = value
        
def set_or_update_highest(nomination, key, value):
    if key in nomination:
        if value > nomination[key]:
            nomination[key] = value
    else:
        nomination[key] = value

## Get Info for Specific Events

In [10]:
def get_person_win_or_nomination_specific_event_rising_star_previous_years(is_winner, event_id, nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n  "
        "JOIN Person_Nomination pn ON pn.NominationId = n.Id "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId  "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId  "
        "JOIN Events e ON e.Id = ee.EventId  "
        "WHERE 1=1  "
        "AND ee.Year + e.Offset < ( "
        " SELECT year_ee.Year + year_e.Offset "
        " FROM Nominations year_n "
        " JOIN EventEditions year_ee ON year_ee.Id = year_n.EventEditionId "
        " JOIN Events year_e ON year_e.Id = year_ee.EventId "
        " WHERE year_n.Id = ? "
        ") "
        "AND ee.EventId = ? "
        "AND n.IsWinner = ? "
        "AND ac.IsRisingStar = 1  "
        "AND pn.PersonId IN ( "
        " SELECT PersonId FROM "
        " Person_Nomination "
        " WHERE NominationId = ? "
        ") ",
        (nomination_id, event_id, is_winner, nomination_id)
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

def get_person_win_or_nomination_specific_event_rising_star_same_year(is_winner, event_id, nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n  "
        "JOIN Person_Nomination pn ON pn.NominationId = n.Id "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId  "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId  "
        "JOIN Events e ON e.Id = ee.EventId  "
        "WHERE 1=1  "
        "AND ee.Year + e.Offset = ( "
        " SELECT year_ee.Year + year_e.Offset "
        " FROM Nominations year_n "
        " JOIN EventEditions year_ee ON year_ee.Id = year_n.EventEditionId "
        " JOIN Events year_e ON year_e.Id = year_ee.EventId "
        " WHERE year_n.Id = ? "
        ") "
        "AND ee.EventId = ? "
        "AND n.IsWinner = ? "
        "AND ac.IsRisingStar = 1  "
        "AND pn.PersonId IN ( "
        " SELECT PersonId FROM "
        " Person_Nomination "
        " WHERE NominationId = ? "
        ") ",
        (nomination_id, event_id, is_winner, nomination_id)
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

def get_person_win_or_nomination_specific_event_non_best_actress_previous_years(is_winner, event_id, nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n  "
        "JOIN Person_Nomination pn ON pn.NominationId = n.Id "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId  "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId  "
        "JOIN Events e ON e.Id = ee.EventId  "
        "WHERE 1=1  "
        "AND ee.Year + e.Offset < ( "
        " SELECT year_ee.Year + year_e.Offset "
        " FROM Nominations year_n "
        " JOIN EventEditions year_ee ON year_ee.Id = year_n.EventEditionId "
        " JOIN Events year_e ON year_e.Id = year_ee.EventId "
        " WHERE year_n.Id = ? "
        ") "
        "AND ee.EventId = ? "
        "AND n.IsWinner = ? "
        "AND ac.IsBestActress IS NULL  "
        "AND ac.IsBestSupportingActress IS NULL  "
        "AND ac.IsRisingStar IS NULL  "
        "AND pn.PersonId IN ( "
        " SELECT PersonId FROM "
        " Person_Nomination "
        " WHERE NominationId = ? "
        ") ",
        (nomination_id, event_id, is_winner, nomination_id)
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

def get_person_win_or_nomination_specific_event_non_best_actress_same_year(is_winner, event_id, nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n  "
        "JOIN Person_Nomination pn ON pn.NominationId = n.Id "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId  "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId  "
        "JOIN Events e ON e.Id = ee.EventId  "
        "WHERE 1=1  "
        "AND ee.Year + e.Offset = ( "
        " SELECT year_ee.Year + year_e.Offset "
        " FROM Nominations year_n "
        " JOIN EventEditions year_ee ON year_ee.Id = year_n.EventEditionId "
        " JOIN Events year_e ON year_e.Id = year_ee.EventId "
        " WHERE year_n.Id = ? "
        ") "
        "AND ee.EventId = ? "
        "AND n.IsWinner = ? "
        "AND ac.IsBestActress IS NULL  "
        "AND ac.IsBestSupportingActress IS NULL  "
        "AND ac.IsRisingStar IS NULL  "
        "AND pn.PersonId IN ( "
        " SELECT PersonId FROM "
        " Person_Nomination "
        " WHERE NominationId = ? "
        ") ",
        (nomination_id, event_id, is_winner, nomination_id)
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

def get_person_win_or_nomination_specific_event_best_supporting_actress_previous_years(is_winner, event_id, nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n  "
        "JOIN Person_Nomination pn ON pn.NominationId = n.Id "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId  "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId  "
        "JOIN Events e ON e.Id = ee.EventId  "
        "WHERE 1=1  "
        "AND ee.Year + e.Offset < ( "
        " SELECT year_ee.Year + year_e.Offset "
        " FROM Nominations year_n "
        " JOIN EventEditions year_ee ON year_ee.Id = year_n.EventEditionId "
        " JOIN Events year_e ON year_e.Id = year_ee.EventId "
        " WHERE year_n.Id = ? "
        ") "
        "AND ee.EventId = ? "
        "AND n.IsWinner = ? "
        "AND ac.IsBestSupportingActress = 1  "
        "AND pn.PersonId IN ( "
        " SELECT PersonId FROM "
        " Person_Nomination "
        " WHERE NominationId = ? "
        ") ",
        (nomination_id, event_id, is_winner, nomination_id)
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

def get_person_win_or_nomination_specific_event_best_supporting_actress_same_year(is_winner, event_id, nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n  "
        "JOIN Person_Nomination pn ON pn.NominationId = n.Id "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId  "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId  "
        "JOIN Events e ON e.Id = ee.EventId  "
        "WHERE 1=1  "
        "AND ee.Year + e.Offset = ( "
        " SELECT year_ee.Year + year_e.Offset "
        " FROM Nominations year_n "
        " JOIN EventEditions year_ee ON year_ee.Id = year_n.EventEditionId "
        " JOIN Events year_e ON year_e.Id = year_ee.EventId "
        " WHERE year_n.Id = ? "
        ") "
        "AND ee.EventId = ? "
        "AND n.IsWinner = ? "
        "AND ac.IsBestSupportingActress = 1 "
        "AND pn.PersonId IN ( "
        " SELECT PersonId FROM "
        " Person_Nomination "
        " WHERE NominationId = ? "
        ") "
        "AND n.FilmId NOT IN ( "
            "SELECT FilmId FROM "
            "Nominations "
            "WHERE Id = ? "
        ") ",
        (nomination_id, event_id, is_winner, nomination_id, nomination_id)
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

def get_person_win_or_nomination_specific_event_best_actress_previous_years(is_winner, event_id, nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n  "
        "JOIN Person_Nomination pn ON pn.NominationId = n.Id "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId  "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId  "
        "JOIN Events e ON e.Id = ee.EventId  "
        "WHERE 1=1  "
        "AND ee.Year + e.Offset < ( "
        " SELECT year_ee.Year + year_e.Offset "
        " FROM Nominations year_n "
        " JOIN EventEditions year_ee ON year_ee.Id = year_n.EventEditionId "
        " JOIN Events year_e ON year_e.Id = year_ee.EventId "
        " WHERE year_n.Id = ? "
        ") "
        "AND ee.EventId = ? "
        "AND n.IsWinner = ? "
        "AND ac.IsBestActress = 1  "
        "AND pn.PersonId IN ( "
        " SELECT PersonId FROM "
        " Person_Nomination "
        " WHERE NominationId = ? "
        ") ",
        (nomination_id, event_id, is_winner, nomination_id)
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

def get_person_win_or_nomination_specific_event_best_actress_same_year(is_winner, event_id, nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n  "
        "JOIN Person_Nomination pn ON pn.NominationId = n.Id "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId  "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId  "
        "JOIN Events e ON e.Id = ee.EventId  "
        "WHERE 1=1  "
        "AND ee.Year + e.Offset = ( "
        " SELECT year_ee.Year + year_e.Offset "
        " FROM Nominations year_n "
        " JOIN EventEditions year_ee ON year_ee.Id = year_n.EventEditionId "
        " JOIN Events year_e ON year_e.Id = year_ee.EventId "
        " WHERE year_n.Id = ? "
        ") "
        "AND ee.EventId = ? "
        "AND n.IsWinner = ? "
        "AND ac.IsBestActress = 1 "
        "AND pn.PersonId IN ( "
        " SELECT PersonId FROM "
        " Person_Nomination "
        " WHERE NominationId = ? "
        ") "
        "AND n.FilmId NOT IN ( "
            "SELECT FilmId FROM "
            "Nominations "
            "WHERE Id = ? "
        ") ",
        (nomination_id, event_id, is_winner, nomination_id, nomination_id)
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

In [11]:
def get_film_win_or_nomination_specific_event_non_best_actress(is_winner, event_id, nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId "
        "WHERE 1=1 "
        "AND ee.EventId = ? "
        "AND n.IsWinner = ? "
        "AND ac.IsBestActress IS NULL "
        "AND ac.IsBestSupportingActress IS NULL "
        "AND n.Id <> ? "
        "AND n.FilmId IN ( "
            "SELECT FilmId FROM "
            "Nominations "
            "WHERE Id = ? "
        ") ",
        (event_id, is_winner, nomination_id, nomination_id )
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

def get_film_win_or_nomination_specific_event_best_supporting_actress_other_person(is_winner, event_id, nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId "
        "JOIN Person_Nomination pn ON pn.NominationId = n.Id "
        "WHERE 1=1 "
        "AND ee.EventId = ? "
        "AND n.IsWinner = ? "
        "AND ac.IsBestSupportingActress = 1 "
        "AND n.Id <> ? "
        "AND n.FilmId IN ( "
            "SELECT FilmId FROM "
            "Nominations "
            "WHERE Id = ? "
        ") "
        "AND pn.PersonId NOT IN ( "
        " SELECT PersonId FROM "
        " Person_Nomination "
        " WHERE NominationId = ? "
        ") ",
        (event_id, is_winner, nomination_id, nomination_id, nomination_id )
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0 

def get_film_win_or_nomination_specific_event_best_supporting_actress_same_person(is_winner, event_id, nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId "
        "JOIN Person_Nomination pn ON pn.NominationId = n.Id "
        "WHERE 1=1 "
        "AND ee.EventId = ? "
        "AND n.IsWinner = ? "
        "AND ac.IsBestSupportingActress = 1 "
        "AND n.Id <> ? "
        "AND n.FilmId IN ( "
            "SELECT FilmId FROM "
            "Nominations "
            "WHERE Id = ? "
        ") "
        "AND pn.PersonId IN ( "
        " SELECT PersonId FROM "
        " Person_Nomination "
        " WHERE NominationId = ? "
        ") ",
        (event_id, is_winner, nomination_id, nomination_id, nomination_id )
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0 

def get_film_win_or_nomination_specific_event_best_actress_other_person(is_winner, event_id, nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId "
        "JOIN Person_Nomination pn ON pn.NominationId = n.Id "
        "WHERE 1=1 "
        "AND ee.EventId = ? "
        "AND n.IsWinner = ? "
        "AND ac.IsBestActress = 1 "
        "AND n.Id <> ? "
        "AND n.FilmId IN ( "
            "SELECT FilmId FROM "
            "Nominations "
            "WHERE Id = ? "
        ") "
        "AND pn.PersonId NOT IN ( "
        " SELECT PersonId FROM "
        " Person_Nomination "
        " WHERE NominationId = ? "
        ") ",
        (event_id, is_winner, nomination_id, nomination_id, nomination_id )
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

def get_film_win_or_nomination_specific_event_best_actress_same_person(is_winner, event_id, nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId "
        "JOIN Person_Nomination pn ON pn.NominationId = n.Id "
        "WHERE 1=1 "
        "AND ee.EventId = ? "
        "AND n.IsWinner = ? "
        "AND ac.IsBestActress = 1 "
        "AND n.Id <> ? "
        "AND n.FilmId IN ( "
            "SELECT FilmId FROM "
            "Nominations "
            "WHERE Id = ? "
        ") "
        "AND pn.PersonId IN ( "
        " SELECT PersonId FROM "
        " Person_Nomination "
        " WHERE NominationId = ? "
        ") ",
        (event_id, is_winner, nomination_id, nomination_id, nomination_id )
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0    

In [12]:
def set_event_count(nomination, event, nomination_type, key, count):
    if any_other_event_selected_by_key('ALL'):
        set_or_update_key(nomination, "{0}_{1}_{2}_{3}".format(c.NOMINATIONS_PREFIX, 'OtherEvents', nomination_type, key), count)
    if any_other_event_selected_by_key('INDIVIDUAL'):
        set_or_update_key(nomination, "{0}_{1}_{2}_{3}_{4}".format(c.NOMINATIONS_PREFIX, 'EventId', event['ImdbId'], nomination_type, key), count)
    if any_other_event_selected_by_key('TYPES'):
        set_or_update_key(nomination, "{0}_{1}_{2}_{3}_{4}".format(c.NOMINATIONS_PREFIX, 'EventType', event['EventType'], nomination_type, key), count)

In [13]:
def get_event_counts(nomination, event):
    nomination_id = nomination['Nomination_Id']
    
    if any_other_event_selected_by_include('INCLUDE_FILM'):
        set_event_count(nomination, event,
                        c.FILM_PREFIX,
                        'BestActressSamePersonWinCount',
                        get_film_win_or_nomination_specific_event_best_actress_same_person(1, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.FILM_PREFIX,
                        'BestActressSamePersonNomCount',
                        get_film_win_or_nomination_specific_event_best_actress_same_person(0, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.FILM_PREFIX,
                        'BestActressOtherPersonWinCount',
                        get_film_win_or_nomination_specific_event_best_actress_other_person(1, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.FILM_PREFIX,
                        'BestActressOtherPersonNomCount',
                        get_film_win_or_nomination_specific_event_best_actress_other_person(0, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.FILM_PREFIX,
                        'BestSupportingActressSamePersonWinCount',
                        get_film_win_or_nomination_specific_event_best_supporting_actress_same_person(1, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.FILM_PREFIX,
                        'BestSupportingActressSamePersonNomCount',
                        get_film_win_or_nomination_specific_event_best_supporting_actress_same_person(0, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.FILM_PREFIX,
                        'BestSupportingActressOtherPersonWinCount',
                        get_film_win_or_nomination_specific_event_best_supporting_actress_other_person(1, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.FILM_PREFIX,
                        'BestSupportingActressOtherPersonNomCount',
                        get_film_win_or_nomination_specific_event_best_supporting_actress_other_person(0, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.FILM_PREFIX,
                        'NonBestActressWinCount',
                        get_film_win_or_nomination_specific_event_non_best_actress(1, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.FILM_PREFIX,
                        'NonBestActressNomCount',
                        get_film_win_or_nomination_specific_event_non_best_actress(0, event['Id'], nomination_id)
                       )
    
    
    if any_other_event_selected_by_include('INCLUDE_PERSON'):
        set_event_count(nomination, event,
                        c.PERSON_PREFIX,
                        'BestActressSameYearWinCount',
                        get_person_win_or_nomination_specific_event_best_actress_same_year(1, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.PERSON_PREFIX,
                        'BestActressSameYearNomCount',
                        get_person_win_or_nomination_specific_event_best_actress_same_year(0, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.PERSON_PREFIX,
                        'BestActressPrevYearWinCount',
                        get_person_win_or_nomination_specific_event_best_actress_previous_years(1, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.PERSON_PREFIX,
                        'BestActressPrevYearNomCount',
                        get_person_win_or_nomination_specific_event_best_actress_previous_years(0, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.PERSON_PREFIX,
                        'BestSupportingActressSameYearWinCount',
                        get_person_win_or_nomination_specific_event_best_supporting_actress_same_year(1, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.PERSON_PREFIX,
                        'BestSupportingActressSameYearNomCount',
                        get_person_win_or_nomination_specific_event_best_supporting_actress_same_year(0, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.PERSON_PREFIX,
                        'BestSupportingActressPrevYearWinCount',
                        get_person_win_or_nomination_specific_event_best_supporting_actress_previous_years(1, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.PERSON_PREFIX,
                        'BestSupportingActressPrevYearNomCount',
                        get_person_win_or_nomination_specific_event_best_supporting_actress_previous_years(0, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.PERSON_PREFIX,
                        'NonBestActressSameYearWinCount',
                        get_person_win_or_nomination_specific_event_non_best_actress_same_year(1, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.PERSON_PREFIX,
                        'NonBestActressSameYearNomCount',
                        get_person_win_or_nomination_specific_event_non_best_actress_same_year(0, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.PERSON_PREFIX,
                        'NonBestActressPrevYearWinCount',
                        get_person_win_or_nomination_specific_event_non_best_actress_previous_years(1, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.PERSON_PREFIX,
                        'NonBestActressPrevYearNomCount',
                        get_person_win_or_nomination_specific_event_non_best_actress_previous_years(0, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.PERSON_PREFIX,
                        'RisingStarSameYearWinCount',
                        get_person_win_or_nomination_specific_event_rising_star_same_year(1, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.PERSON_PREFIX,
                        'RisingStarSameYearNomCount',
                        get_person_win_or_nomination_specific_event_rising_star_same_year(0, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.PERSON_PREFIX,
                        'RisingStarPrevYearWinCount',
                        get_person_win_or_nomination_specific_event_rising_star_previous_years(1, event['Id'], nomination_id)
                       )
        set_event_count(nomination, event,
                        c.PERSON_PREFIX,
                        'RisingStarPrevYearNomCount',
                        get_person_win_or_nomination_specific_event_rising_star_previous_years(0, event['Id'], nomination_id)
                       )

## Get Info for Academy Awards

In [14]:
def get_person_other_academy_award_non_best_actress_nominations_previous_years(nomination_id, is_winner):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n  "
        "JOIN Person_Nomination pn ON pn.NominationId = n.Id "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId  "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId  "
        "JOIN Events e ON e.Id = ee.EventId  "
        "WHERE 1=1  "
        "AND ee.Year + e.Offset < ( "
        " SELECT year_ee.Year + year_e.Offset "
        " FROM Nominations year_n "
        " JOIN EventEditions year_ee ON year_ee.Id = year_n.EventEditionId "
        " JOIN Events year_e ON year_e.Id = year_ee.EventId "
        " WHERE year_n.Id = ? "
        ") "
        "AND e.IsAcademyAwards = 1   "
        "AND ac.IsBestActress IS NULL  "
        "AND ac.IsBestSupportingActress IS NULL  "
        "AND n.Id <> ? "
        "AND pn.PersonId IN ( "
        " SELECT PersonId FROM "
        " Person_Nomination "
        " WHERE NominationId = ? "
        ") "
        "AND n.IsWinner = ? ",
        (nomination_id, nomination_id, nomination_id, is_winner )
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

def get_person_other_academy_award_non_best_actress_nominations_same_year(nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n  "
        "JOIN Person_Nomination pn ON pn.NominationId = n.Id "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId  "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId  "
        "JOIN Events e ON e.Id = ee.EventId  "
        "WHERE 1=1  "
        "AND ee.Year + e.Offset = ( "
        " SELECT year_ee.Year + year_e.Offset "
        " FROM Nominations year_n "
        " JOIN EventEditions year_ee ON year_ee.Id = year_n.EventEditionId "
        " JOIN Events year_e ON year_e.Id = year_ee.EventId "
        " WHERE year_n.Id = ? "
        ") "
        "AND e.IsAcademyAwards = 1   "
        "AND ac.IsBestActress IS NULL  "
        "AND ac.IsBestSupportingActress IS NULL  "
        "AND n.Id <> ? "
        "AND pn.PersonId IN ( "
        " SELECT PersonId FROM "
        " Person_Nomination "
        " WHERE NominationId = ? "
        ") ",
        (nomination_id, nomination_id, nomination_id )
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

def get_person_other_academy_award_best_supporting_actress_nominations_previous_years(nomination_id, is_winner):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n  "
        "JOIN Person_Nomination pn ON pn.NominationId = n.Id "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId  "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId  "
        "JOIN Events e ON e.Id = ee.EventId  "
        "WHERE 1=1  "
        "AND ee.Year + e.Offset < ( "
        " SELECT year_ee.Year + year_e.Offset "
        " FROM Nominations year_n "
        " JOIN EventEditions year_ee ON year_ee.Id = year_n.EventEditionId "
        " JOIN Events year_e ON year_e.Id = year_ee.EventId "
        " WHERE year_n.Id = ? "
        ") "
        "AND e.IsAcademyAwards = 1   "
        "AND ac.IsBestSupportingActress = 1  "
        "AND n.Id <> ? "
        "AND pn.PersonId IN ( "
        " SELECT PersonId FROM "
        " Person_Nomination "
        " WHERE NominationId = ? "
        ") "
        "AND n.IsWinner = ? ",
        (nomination_id, nomination_id, nomination_id, is_winner )
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

def get_person_other_academy_award_best_supporting_actress_nominations_same_year(nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n  "
        "JOIN Person_Nomination pn ON pn.NominationId = n.Id "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId  "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId  "
        "JOIN Events e ON e.Id = ee.EventId  "
        "WHERE 1=1  "
        "AND ee.Year + e.Offset = ( "
        " SELECT year_ee.Year + year_e.Offset "
        " FROM Nominations year_n "
        " JOIN EventEditions year_ee ON year_ee.Id = year_n.EventEditionId "
        " JOIN Events year_e ON year_e.Id = year_ee.EventId "
        " WHERE year_n.Id = ? "
        ") "
        "AND e.IsAcademyAwards = 1   "
        "AND ac.IsBestSupportingActress = 1  "
        "AND n.Id <> ? "
        "AND pn.PersonId IN ( "
        " SELECT PersonId FROM "
        " Person_Nomination "
        " WHERE NominationId = ? "
        ") ",
        (nomination_id, nomination_id, nomination_id )
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

def get_person_other_academy_award_best_actress_nominations_previous_years(nomination_id, is_winner):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n  "
        "JOIN Person_Nomination pn ON pn.NominationId = n.Id "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId  "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId  "
        "JOIN Events e ON e.Id = ee.EventId  "
        "WHERE 1=1  "
        "AND ee.Year + e.Offset < ( "
        " SELECT year_ee.Year + year_e.Offset "
        " FROM Nominations year_n "
        " JOIN EventEditions year_ee ON year_ee.Id = year_n.EventEditionId "
        " JOIN Events year_e ON year_e.Id = year_ee.EventId "
        " WHERE year_n.Id = ? "
        ") "
        "AND e.IsAcademyAwards = 1   "
        "AND ac.IsBestActress = 1  "
        "AND n.Id <> ? "
        "AND pn.PersonId IN ( "
        " SELECT PersonId FROM "
        " Person_Nomination "
        " WHERE NominationId = ? "
        ") "
        "AND n.IsWinner = ? ",
        (nomination_id, nomination_id, nomination_id, is_winner )
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

def get_person_other_academy_award_best_actress_nominations_same_year(nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n  "
        "JOIN Person_Nomination pn ON pn.NominationId = n.Id "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId  "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId  "
        "JOIN Events e ON e.Id = ee.EventId  "
        "WHERE 1=1  "
        "AND ee.Year + e.Offset = ( "
        " SELECT year_ee.Year + year_e.Offset "
        " FROM Nominations year_n "
        " JOIN EventEditions year_ee ON year_ee.Id = year_n.EventEditionId "
        " JOIN Events year_e ON year_e.Id = year_ee.EventId "
        " WHERE year_n.Id = ? "
        ") "
        "AND e.IsAcademyAwards = 1   "
        "AND ac.IsBestActress = 1  "
        "AND n.Id <> ? "
        "AND pn.PersonId IN ( "
        " SELECT PersonId FROM "
        " Person_Nomination "
        " WHERE NominationId = ? "
        ") ",
        (nomination_id, nomination_id, nomination_id )
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

In [15]:
def get_crew_other_academy_award_nominations_previous_years(nomination_id, is_winner):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n   "
        "JOIN Person_Nomination pn ON pn.NominationId = n.Id  "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId   "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId   "
        "JOIN Events e ON e.Id = ee.EventId  "
        "WHERE 1=1   "
        "AND ee.Year + e.Offset < (  "
        " SELECT year_ee.Year + year_e.Offset  "
        " FROM Nominations year_n  "
        " JOIN EventEditions year_ee ON year_ee.Id = year_n.EventEditionId  "
        " JOIN Events year_e ON year_e.Id = year_ee.EventId  "
        " WHERE year_n.Id = ?  "
        ")  "
        "AND e.IsAcademyAwards = 1    "
        "AND ac.IsMusicOrSong IS NULL  "
        "AND pn.PersonId NOT IN (  "
        " SELECT PersonId FROM  "
        " Person_Nomination  "
        " WHERE NominationId = ?  "
        ") "
        "AND n.FilmId <> ( "
        "  SELECT notfilm_nomination.FilmId FROM Nominations notfilm_nomination "
        "  WHERE notfilm_nomination.Id = ? "
        ") "
        "AND pn.PersonId IN ( "
        "  SELECT credits_c.PersonId FROM Credits credits_c "
        "  WHERE FilmId = ( "
        "    SELECT credits_nomination.FilmId FROM Nominations credits_nomination "
        "    WHERE credits_nomination.Id = ? "
        "  ) "
        ") "
        "AND n.IsWinner = ? ",
        (nomination_id, nomination_id, nomination_id, nomination_id, is_winner )
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

def get_crew_other_academy_award_nominations_same_year(nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n   "
        "JOIN Person_Nomination pn ON pn.NominationId = n.Id  "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId   "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId   "
        "JOIN Events e ON e.Id = ee.EventId  "
        "WHERE 1=1   "
        "AND ee.Year + e.Offset = (  "
        " SELECT year_ee.Year + year_e.Offset  "
        " FROM Nominations year_n  "
        " JOIN EventEditions year_ee ON year_ee.Id = year_n.EventEditionId  "
        " JOIN Events year_e ON year_e.Id = year_ee.EventId  "
        " WHERE year_n.Id = ?  "
        ")  "
        "AND e.IsAcademyAwards = 1    "
        "AND ac.IsMusicOrSong IS NULL  "
        "AND pn.PersonId NOT IN (  "
        " SELECT PersonId FROM  "
        " Person_Nomination  "
        " WHERE NominationId = ?  "
        ") "
        "AND n.FilmId <> ( "
        "  SELECT notfilm_nomination.FilmId FROM Nominations notfilm_nomination "
        "  WHERE notfilm_nomination.Id = ? "
        ") "
        "AND pn.PersonId IN ( "
        "  SELECT credits_c.PersonId FROM Credits credits_c "
        "  WHERE FilmId = ( "
        "    SELECT credits_nomination.FilmId FROM Nominations credits_nomination "
        "    WHERE credits_nomination.Id = ? "
        "  ) "
        ") ",
        (nomination_id, nomination_id, nomination_id, nomination_id )
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

In [16]:
def get_film_best_picture_academy_award_nominations(nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId "
        "JOIN Events e ON e.Id = ee.EventId "
        "WHERE 1=1 "
        "AND e.IsAcademyAwards "
        "AND ac.IsBestPicture = 1 "
        "AND ac.IsBestDirector IS NULL "
        "AND ac.IsWriting IS NULL "
        "AND ac.IsBestActor IS NULL "
        "AND ac.IsBestActress IS NULL "
        "AND n.FilmId IN ( "
            "SELECT FilmId FROM "
            "Nominations "
            "WHERE Id = ? "
        ") ",
        (nomination_id, )
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

def get_film_director_academy_award_nominations(nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId "
        "JOIN Events e ON e.Id = ee.EventId "
        "WHERE 1=1 "
        "AND e.IsAcademyAwards "
        "AND ac.IsBestPicture IS NULL "
        "AND ac.IsBestDirector = 1 "
        "AND ac.IsWriting IS NULL "
        "AND ac.IsBestActor IS NULL "
        "AND ac.IsBestActress IS NULL "
        "AND n.FilmId IN ( "
            "SELECT FilmId FROM "
            "Nominations "
            "WHERE Id = ? "
        ") ",
        (nomination_id, )
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

def get_film_actor_academy_award_nominations(nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId "
        "JOIN Events e ON e.Id = ee.EventId "
        "WHERE 1=1 "
        "AND e.IsAcademyAwards "
        "AND ac.IsBestPicture IS NULL "
        "AND ac.IsBestDirector IS NULL "
        "AND ac.IsWriting IS NULL "
        "AND ac.IsBestActor = 1 "
        "AND ac.IsBestActress IS NULL "
        "AND n.FilmId IN ( "
            "SELECT FilmId FROM "
            "Nominations "
            "WHERE Id = ? "
        ") ",
        (nomination_id, )
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

def get_film_actress_academy_award_nominations(nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId "
        "JOIN Events e ON e.Id = ee.EventId "
        "WHERE 1=1 "
        "AND e.IsAcademyAwards "
        "AND ac.IsBestPicture IS NULL "
        "AND ac.IsBestDirector IS NULL "
        "AND ac.IsWriting IS NULL "
        "AND ac.IsBestActor IS NULL "
        "AND ac.IsBestActress = 1 "
        "AND n.FilmId IN ( "
            "SELECT FilmId FROM "
            "Nominations "
            "WHERE Id = ? "
        ") ",
        (nomination_id, )
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

def get_film_screenplay_academy_award_nominations(nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId "
        "JOIN Events e ON e.Id = ee.EventId "
        "WHERE 1=1 "
        "AND e.IsAcademyAwards "
        "AND ac.IsBestPicture IS NULL "
        "AND ac.IsBestDirector IS NULL "
        "AND ac.IsWriting = 1 "
        "AND ac.IsBestActor IS NULL "
        "AND ac.IsBestActress IS NULL "
        "AND n.FilmId IN ( "
            "SELECT FilmId FROM "
            "Nominations "
            "WHERE Id = ? "
        ") ",
        (nomination_id, )
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

def get_film_other_academy_award_nominations(nomination_id):
    cursor.execute(
        "SELECT COUNT(DISTINCT(n.Id)) "
        "FROM Nominations n "
        "JOIN AwardCategories ac ON ac.Id = n.AwardCategoryId "
        "JOIN EventEditions ee ON ee.Id = n.EventEditionId "
        "JOIN Events e ON e.Id = ee.EventId "
        "WHERE 1=1 "
        "AND e.IsAcademyAwards "
        "AND ac.IsBestPicture IS NULL "
        "AND ac.IsBestPicture IS NULL "
        "AND ac.IsBestDirector IS NULL "
        "AND ac.IsWriting IS NULL "
        "AND ac.IsBestActor IS NULL "
        "AND ac.IsBestActress IS NULL "
        "AND n.FilmId IN ( "
            "SELECT FilmId FROM "
            "Nominations "
            "WHERE Id = ? "
        ") ",
        (nomination_id, )
    )
    data_count = cursor.fetchone()
    if data_count is not None:
        return data_count[0]
    return 0

In [17]:
def set_academy_award_counts(nomination):
    nomination_id = nomination['Nomination_Id']
    
    if INCLUDE_ACADEMY_AWARD['INCLUDE_FILM']:
        director_nom_count = get_film_director_academy_award_nominations(nomination_id)
        screenplay_nom_count = get_film_screenplay_academy_award_nominations(nomination_id)
        actor_nom_count = get_film_actor_academy_award_nominations(nomination_id)
        actress_nom_count = get_film_actor_academy_award_nominations(nomination_id)
        best_picture_nom_count = get_film_best_picture_academy_award_nominations(nomination_id)

        has_major_award_nom = director_nom_count > 0 and screenplay_nom_count > 0 and actor_nom_count > 0 and best_picture_nom_count > 0

        set_or_update_key(nomination,
                          "{0}_{1}_{2}_{3}".format(c.NOMINATIONS_PREFIX, 'Academy', c.FILM_PREFIX, 'HasMajorNoms'),
                          1 if has_major_award_nom else 0)

        set_or_update_key(nomination,
                          "{0}_{1}_{2}_{3}".format(c.NOMINATIONS_PREFIX, 'Academy', c.FILM_PREFIX, 'DirectorNomCount'),
                          director_nom_count)
        set_or_update_key(nomination,
                          "{0}_{1}_{2}_{3}".format(c.NOMINATIONS_PREFIX, 'Academy', c.FILM_PREFIX, 'ScreenplayNomCount'),
                          screenplay_nom_count)
        set_or_update_key(nomination,
                          "{0}_{1}_{2}_{3}".format(c.NOMINATIONS_PREFIX, 'Academy', c.FILM_PREFIX, 'ActorNomCount'),
                          actor_nom_count)
        set_or_update_key(nomination,
                          "{0}_{1}_{2}_{3}".format(c.NOMINATIONS_PREFIX, 'Academy', c.FILM_PREFIX, 'ActressNomCount'),
                          actress_nom_count)
        set_or_update_key(nomination,
                          "{0}_{1}_{2}_{3}".format(c.NOMINATIONS_PREFIX, 'Academy', c.FILM_PREFIX, 'BestPictureNomCount'),
                          best_picture_nom_count)

        set_or_update_key(nomination,
                          "{0}_{1}_{2}_{3}".format(c.NOMINATIONS_PREFIX, 'Academy', c.FILM_PREFIX, 'OtherNomCount'),
                          get_film_other_academy_award_nominations(nomination_id))

    if INCLUDE_ACADEMY_AWARD['INCLUDE_PERSON']:  
        set_or_update_key(nomination,
                          "{0}_{1}_{2}_{3}".format(c.NOMINATIONS_PREFIX, 'Academy', c.PERSON_PREFIX, 'OtherBestActressNomSameYearCount'),
                          get_person_other_academy_award_best_actress_nominations_same_year(nomination_id)
                         )
        set_or_update_key(nomination,
                          "{0}_{1}_{2}_{3}".format(c.NOMINATIONS_PREFIX, 'Academy', c.PERSON_PREFIX, 'OtherBestActressWinPrevYearCount'),
                          get_person_other_academy_award_best_actress_nominations_previous_years(nomination_id, 1)
                         )
        set_or_update_key(nomination,
                          "{0}_{1}_{2}_{3}".format(c.NOMINATIONS_PREFIX, 'Academy', c.PERSON_PREFIX, 'OtherBestActressNomPrevYearCount'),
                          get_person_other_academy_award_best_actress_nominations_previous_years(nomination_id, 0)
                         )
        set_or_update_key(nomination,
                          "{0}_{1}_{2}_{3}".format(c.NOMINATIONS_PREFIX, 'Academy', c.PERSON_PREFIX, 'OtherBestSupportingActressNomSameYearCount'),
                          get_person_other_academy_award_best_supporting_actress_nominations_same_year(nomination_id)
                         )
        set_or_update_key(nomination,
                          "{0}_{1}_{2}_{3}".format(c.NOMINATIONS_PREFIX, 'Academy', c.PERSON_PREFIX, 'OtherBestSupportingActressWinPrevYearCount'),
                          get_person_other_academy_award_best_supporting_actress_nominations_previous_years(nomination_id, 1))
        set_or_update_key(nomination,
                          "{0}_{1}_{2}_{3}".format(c.NOMINATIONS_PREFIX, 'Academy', c.PERSON_PREFIX, 'OtherBestSupportingActressNomPrevYearCount'),
                          get_person_other_academy_award_best_supporting_actress_nominations_previous_years(nomination_id, 0))
        set_or_update_key(nomination,
                          "{0}_{1}_{2}_{3}".format(c.NOMINATIONS_PREFIX, 'Academy', c.PERSON_PREFIX, 'OtherNonBestActressNomSameYearCount'),
                          get_person_other_academy_award_non_best_actress_nominations_same_year(nomination_id)
                         )
        set_or_update_key(nomination,
                          "{0}_{1}_{2}_{3}".format(c.NOMINATIONS_PREFIX, 'Academy', c.PERSON_PREFIX, 'OtherNonBestActressWinPrevYearCount'),
                          get_person_other_academy_award_non_best_actress_nominations_previous_years(nomination_id, 1)
                         )
        set_or_update_key(nomination,
                          "{0}_{1}_{2}_{3}".format(c.NOMINATIONS_PREFIX, 'Academy', c.PERSON_PREFIX, 'OtherNonBestActressNomPrevYearCount'),
                          get_person_other_academy_award_non_best_actress_nominations_previous_years(nomination_id, 0)
                         )
        
    if INCLUDE_ACADEMY_AWARD['INCLUDE_CREW']:    
        set_or_update_key(nomination,
                          "{0}_{1}_{2}_{3}".format(c.NOMINATIONS_PREFIX, 'Academy', c.CREW_PREFIX, 'OtherNomSameYearCount'),
                          get_crew_other_academy_award_nominations_same_year(nomination_id)
                         )
        set_or_update_key(nomination,
                          "{0}_{1}_{2}_{3}".format(c.NOMINATIONS_PREFIX, 'Academy', c.CREW_PREFIX, 'OtherWinPrevYearCount'),
                          get_crew_other_academy_award_nominations_previous_years(nomination_id, 1)
                         )
        set_or_update_key(nomination,
                          "{0}_{1}_{2}_{3}".format(c.NOMINATIONS_PREFIX, 'Academy', c.CREW_PREFIX, 'OtherNomPrevYearCount'),
                          get_crew_other_academy_award_nominations_previous_years(nomination_id, 0)
                         )

In [18]:
def set_other_nomination_details(nomination):
    if any_academy_award_selected():
        set_academy_award_counts(nomination)

    if any_other_event_selected():
        for event in non_academy_award_events:
            if event['FirstYear'] <= nomination['Event_Year']:
                get_event_counts(nomination, event)

In [19]:
def get_film_keywords(film_id):
    cursor.execute(
        "SELECT kw.Id, kw.Name "
        "FROM Keywords kw "
        "JOIN Film_Keyword fk ON fk.KeywordId = kw.Id "
        "WHERE 1=1 "
        "AND fk.FilmId = ? ",
        (film_id, )
    )
    return [
        dict(row) for row in cursor.fetchall()
    ]


def set_film_keywords(nomination):
    for keyword in get_film_keywords(nomination['Nomination_Film_Id']):
        set_or_update_key(nomination,
                          "{0}_{1}_{2}".format(c.KEYWORD_PREFIX, str(keyword['Id']), keyword['Name'].replace(' ', '-')), 
                          1)

In [20]:
def get_film_genres(film_id):
    cursor.execute(
        "SELECT g.Name FROM Genres g "
        "JOIN Film_Genre fg "
        "ON fg.GenreId = g.Id "
        "WHERE 1=1 "
        "AND fg.FilmId = ? ",
        (film_id, )
    )
    return [
        row[0].strip() for row in cursor.fetchall()
    ]

def set_film_genres(nomination):
    for genre in get_film_genres(nomination['Nomination_Film_Id']):
        set_or_update_key(nomination,
                          "{0}_{1}".format(c.GENRE_PREFIX, genre),
                          1
                         )

In [21]:
def get_film_aspect_ratio(film_id):
    cursor.execute(
        "SELECT ar.Name "
        "FROM AspectRatios ar "
        "JOIN Films f ON f.AspectRatioId = ar.Id "
        "WHERE  1 = 1 "
        "AND f.Id = ? ",
        (film_id, )
    )
    return [
        dict(row) for row in cursor.fetchall()
    ]

def set_film_aspect_ratio(nomination):
    for aspect_ratio in get_film_aspect_ratio(nomination['Nomination_Film_Id']):
        set_or_update_highest(nomination, "{0}_{1}".format(c.ASPECT_RATIO_PREFIX, aspect_ratio['Name']), 1)

In [22]:
def get_film_certification(film_id):
    cursor.execute(
        "SELECT ce.Name "
        "FROM Certifications ce "
        "JOIN Films f ON f.UsCertificationId = ce.Id "
        "WHERE  1 = 1 "
        "AND f.Id = ? ",
        (film_id, )
    )
    return [
        dict(row) for row in cursor.fetchall()
    ]

def set_film_certification(nomination):
    for certification in get_film_certification(nomination['Nomination_Film_Id']):
        set_or_update_highest(nomination, "{0}_{1}".format(c.CERTIFICATION_PREFIX, certification['Name']), 1)

In [23]:
def get_film_colours(film_id):
    cursor.execute(
        "SELECT DISTINCT(bc.Id), bc.IsBlackWhite, bc.IsColor, c.IsFraction "
        "FROM Colors c "
        "JOIN Film_Color fc ON fc.ColorId = c.Id "
        "JOIN BaseColors bc ON bc.Id = c.BaseColorId "
        "WHERE  1 = 1 "
        "AND c.IsEditionSpecific IS NULL "
        "AND fc.FilmId = ? ",
        (film_id, )
    )
    return [
        dict(row) for row in cursor.fetchall()
    ]

def set_film_colour(nomination):
    for colour in get_film_colours(nomination['Nomination_Film_Id']):
        if colour['IsBlackWhite'] == 1:
            if colour['IsFraction'] == 1:
                set_or_update_highest(nomination, "{0}_{1}".format(c.COLOR_PREFIX, 'BlackWhiteFraction'), 1)
            else:
                set_or_update_highest(nomination, "{0}_{1}".format(c.COLOR_PREFIX, 'BlackWhite'), 1)
        else:
            if colour['IsFraction'] == 1:
                set_or_update_highest(nomination, "{0}_{1}".format(c.COLOR_PREFIX, 'ColorFraction'), 1)
            else:
                set_or_update_highest(nomination, "{0}_{1}".format(c.COLOR_PREFIX, 'Color'), 1)

In [24]:
def get_film_reviews(film_id):
    cursor.execute(
        "SELECT fr.Score, "
        "fr.CriticId, c.Code AS CriticCode, "
        "fr.ReviewSourceId, rs.Code as ReviewSourceCode "
        "FROM Film_Review fr "
        "JOIN ReviewSources rs ON rs.Id = fr.ReviewSourceId "
        "JOIN Critics c ON c.Id = fr.CriticId "
        "WHERE FilmId = ? ",
        (film_id, )
    )
    return [
        dict(row) for row in cursor.fetchall()
    ]

def set_film_reviews(nomination):
    for review in get_film_reviews(nomination['Nomination_Film_Id']):
        if INCLUDE_REVIEW_SOURCES:
            set_or_update_highest(nomination, "{0}_{1}_{2}".format(c.REVIEW_PREFIX, 'Source', review['ReviewSourceCode']), review['Score'])
        if INCLUDE_REVIEW_CRITICS:
            set_or_update_highest(nomination, "{0}_{1}_{2}".format(c.REVIEW_PREFIX, 'Critic', review['CriticCode']), review['Score'])

In [25]:
import timeit

def get_nomination_details(nomination):
    event_start_time = timeit.default_timer()
    print(nomination['Nomination_Film_Title'])
    if INCLUDE_GENRES:
        set_film_genres(nomination)
    if INCLUDE_KEYWORDS:
        set_film_keywords(nomination)
    if INCLUDE_ASPECT_RATIOS:
        set_film_aspect_ratio(nomination)
    if INCLUDE_CERTIFICATIONS:
        set_film_certification(nomination)
    if INCLUDE_COLORS:
        set_film_colour(nomination)
    if INCLUDE_REVIEW_SOURCES or INCLUDE_REVIEW_CRITICS:
        set_film_reviews(nomination)
    set_other_nomination_details(nomination)
    
    print("--- %0.3fs seconds to get event ---" % (timeit.default_timer() - event_start_time))
    return nomination

In [26]:
cursor.execute(
    "SELECT * FROM v_academyBestActress "
    "WHERE Event_Year >= ? ",
    (MIN_YEAR if EXCLUDE_EARLIER_FILMS else 1900,)
)
nominations = []
start_time = timeit.default_timer()
for row in cursor.fetchall():
    nominations.append(get_nomination_details(dict(row)))
print("--- %0.3fs seconds to get all events ---" % (timeit.default_timer() - start_time))

Bombshell  
--- 3.657s seconds to get event ---
Harriet  
--- 3.434s seconds to get event ---
Judy  
--- 3.387s seconds to get event ---
Little Women  
--- 4.266s seconds to get event ---
Marriage Story  
--- 3.522s seconds to get event ---
The Favourite  
--- 3.668s seconds to get event ---
The Wife  
--- 3.297s seconds to get event ---
A Star Is Born  
--- 4.253s seconds to get event ---
Can You Ever Forgive Me?  
--- 3.652s seconds to get event ---
Roma  
--- 3.701s seconds to get event ---
Three Billboards Outside Ebbing, Missouri  
--- 3.393s seconds to get event ---
I, Tonya  
--- 3.795s seconds to get event ---
The Post  
--- 4.196s seconds to get event ---
The Shape of Water  
--- 4.003s seconds to get event ---
Lady Bird  
--- 3.731s seconds to get event ---
La La Land  
--- 4.730s seconds to get event ---
Elle  
--- 3.584s seconds to get event ---
Florence Foster Jenkins  
--- 4.321s seconds to get event ---
Jackie  
--- 4.434s seconds to get event ---
Loving  
--- 3.854s sec

--- 2.950s seconds to get event ---
Working Girl  
--- 3.163s seconds to get event ---
Evil Angels  
--- 3.116s seconds to get event ---
Gorillas in the Mist: The Story of Dian Fossey  
--- 3.034s seconds to get event ---
Moonstruck  
--- 3.367s seconds to get event ---
Fatal Attraction  
--- 3.301s seconds to get event ---
Broadcast News  
--- 3.060s seconds to get event ---
Ironweed  
--- 2.893s seconds to get event ---
Anna  
--- 2.909s seconds to get event ---
Children of a Lesser God  
--- 3.088s seconds to get event ---
The Morning After  
--- 3.701s seconds to get event ---
Peggy Sue Got Married  
--- 3.231s seconds to get event ---
Aliens  
--- 3.115s seconds to get event ---
Crimes of the Heart  
--- 2.777s seconds to get event ---
The Trip to Bountiful  
--- 2.966s seconds to get event ---
Agnes of God  
--- 2.959s seconds to get event ---
Sweet Dreams  
--- 2.871s seconds to get event ---
Out of Africa  
--- 3.025s seconds to get event ---
The Color Purple  
--- 3.073s secon

--- 2.877s seconds to get event ---
Separate Tables  
--- 2.638s seconds to get event ---
Cat on a Hot Tin Roof  
--- 2.356s seconds to get event ---
Auntie Mame  
--- 2.363s seconds to get event ---
Some Came Running  
--- 2.540s seconds to get event ---
The Three Faces of Eve  
--- 3.109s seconds to get event ---
Wild Is the Wind  
--- 2.740s seconds to get event ---
Heaven Knows, Mr. Allison  
--- 2.384s seconds to get event ---
Raintree County  
--- 2.331s seconds to get event ---
Peyton Place  
--- 2.327s seconds to get event ---
Anastasia  
--- 2.271s seconds to get event ---
Baby Doll  
--- 2.297s seconds to get event ---
The King and I  
--- 2.410s seconds to get event ---
The Rainmaker  
--- 2.308s seconds to get event ---
The Bad Seed  
--- 2.429s seconds to get event ---
The Rose Tattoo  
--- 2.217s seconds to get event ---
Interrupted Melody  
--- 2.371s seconds to get event ---
Love Is a Many-Splendored Thing  
--- 2.256s seconds to get event ---
Summertime  
--- 2.363s se

In [27]:
import pandas as pd
df_nominations = pd.DataFrame(nominations)
df_nominations.head()

Unnamed: 0,Event_Year,Film_BoxOffice_OpeningWeekend,Film_BoxOffice_USA,Film_BoxOffice_WorldWide,Film_Budget,Film_FirstReleaseMonth_Cat_April,Film_FirstReleaseMonth_Cat_August,Film_FirstReleaseMonth_Cat_December,Film_FirstReleaseMonth_Cat_February,Film_FirstReleaseMonth_Cat_January,...,Nominations_OtherEvents_Person_NonBestActressPrevYearNomCount,Nominations_OtherEvents_Person_NonBestActressPrevYearWinCount,Nominations_OtherEvents_Person_NonBestActressSameYearNomCount,Nominations_OtherEvents_Person_NonBestActressSameYearWinCount,Nominations_OtherEvents_Person_RisingStarPrevYearNomCount,Nominations_OtherEvents_Person_RisingStarPrevYearWinCount,Nominations_OtherEvents_Person_RisingStarSameYearNomCount,Nominations_OtherEvents_Person_RisingStarSameYearWinCount,Person_Age,Person_Name
0,2019,319157.0,29529804.0,31952534.0,32000000.0,0,0,1,0,0,...,1,0,0,0,0,0,0,0,44,Charlize Theron
1,2019,11676720.0,43059080.0,43259080.0,17000000.0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,32,Cynthia Erivo
2,2019,2916548.0,24092297.0,38840713.0,,0,1,0,0,0,...,0,0,0,0,0,0,0,0,50,Renée Zellweger
3,2019,16755310.0,87698881.0,132311659.0,40000000.0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,25,Saoirse Ronan
4,2019,,,323382.0,18600000.0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,35,Scarlett Johansson


In [28]:
def get_cols_by_startswith(df_noms, key):
    return [col for col in df_noms.columns if col.startswith(key)]

## Drop columns returned with view

In [29]:
def drop_by_startswith(df_noms, key):
    cols = get_cols_by_startswith(df_noms, key)
    df_noms.drop(columns=cols, inplace=True)

if not INCLUDE_BOX_OFFICE:
    drop_by_startswith(df_nominations, "{0}_BoxOffice".format(c.FILM_PREFIX))
if not INCLUDE_BUDGET:
    drop_by_startswith(df_nominations, "{0}_Budget".format(c.FILM_PREFIX))
if not INCLUDE_RUNTIME:
    drop_by_startswith(df_nominations, "{0}_Runtime".format(c.FILM_PREFIX))
if not INCLUDE_FIRSTRELEASE_NUMERICAL:
    drop_by_startswith(df_nominations, "{0}_FirstReleaseMonth_Num".format(c.FILM_PREFIX))
if not INCLUDE_FIRSTRELEASE_CATEGORICAL:
    drop_by_startswith(df_nominations, "{0}_FirstReleaseMonth_Cat".format(c.FILM_PREFIX))
if not INCLUDE_USRELEASE_NUMERICAL:
    drop_by_startswith(df_nominations, "{0}_USReleaseMonth_Num".format(c.FILM_PREFIX))
if not INCLUDE_USRELEASE_CATEGORICAL:
    drop_by_startswith(df_nominations, "{0}_USReleaseMonth_Cat".format(c.FILM_PREFIX))
if not INCLUDE_METACRITIC:
    drop_by_startswith(df_nominations, "{0}_Metacritic".format(c.FILM_PREFIX))
if not INCLUDE_PERSON_AGE:
    drop_by_startswith(df_nominations, "{0}_Age".format(c.PERSON_PREFIX))
if not INCLUDE_NOMINATION_ID:
    drop_by_startswith(df_nominations, "Nomination_Id")
if not INCLUDE_FILM_IDS:
    drop_by_startswith(df_nominations, "Nomination_Film_ImdbId")
    drop_by_startswith(df_nominations, "Nomination_Film_Id")

df_nominations.head(10)

Unnamed: 0,Event_Year,Film_FirstReleaseMonth_Cat_April,Film_FirstReleaseMonth_Cat_August,Film_FirstReleaseMonth_Cat_December,Film_FirstReleaseMonth_Cat_February,Film_FirstReleaseMonth_Cat_January,Film_FirstReleaseMonth_Cat_July,Film_FirstReleaseMonth_Cat_June,Film_FirstReleaseMonth_Cat_March,Film_FirstReleaseMonth_Cat_May,...,Nominations_OtherEvents_Person_NonBestActressPrevYearNomCount,Nominations_OtherEvents_Person_NonBestActressPrevYearWinCount,Nominations_OtherEvents_Person_NonBestActressSameYearNomCount,Nominations_OtherEvents_Person_NonBestActressSameYearWinCount,Nominations_OtherEvents_Person_RisingStarPrevYearNomCount,Nominations_OtherEvents_Person_RisingStarPrevYearWinCount,Nominations_OtherEvents_Person_RisingStarSameYearNomCount,Nominations_OtherEvents_Person_RisingStarSameYearWinCount,Person_Age,Person_Name
0,2019,0,0,1,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,44,Charlize Theron
1,2019,0,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,32,Cynthia Erivo
2,2019,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,50,Renée Zellweger
3,2019,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,25,Saoirse Ronan
4,2019,0,1,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,35,Scarlett Johansson
5,2018,0,1,0,0,0,0,0,0,0,...,3,4,0,0,0,0,0,0,44,Olivia Colman
6,2018,0,0,0,0,0,0,0,0,0,...,7,4,0,0,0,0,0,0,71,Glenn Close
7,2018,0,1,0,0,0,0,0,0,0,...,0,1,0,2,0,0,0,0,32,Lady Gaga
8,2018,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,48,Melissa McCarthy
9,2018,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,25,Yalitza Aparicio


## FillNA

In [30]:
def fillna_by_startswith(df_noms, func, key):
    cols = get_cols_by_startswith(df_noms, key)
    for col in cols:
        val = func(df_noms[col])
        df_noms.fillna({col: val}, inplace=True)

def fillna(df_noms):
    fillna_by_startswith(df_noms, lambda series: 0, c.KEYWORD_PREFIX)
    fillna_by_startswith(df_noms, lambda series: 0, c.GENRE_PREFIX)
    fillna_by_startswith(df_noms, lambda series: 0, c.ASPECT_RATIO_PREFIX)
    fillna_by_startswith(df_noms, lambda series: 0, c.CERTIFICATION_PREFIX)
    fillna_by_startswith(df_noms, lambda series: 0, c.COLOR_PREFIX)
    fillna_by_startswith(df_noms, lambda series: series.mean(), c.REVIEW_PREFIX)
    fillna_by_startswith(df_noms, lambda series: series.mean(), "{0}_BoxOffice".format(c.FILM_PREFIX))
    fillna_by_startswith(df_noms, lambda series: series.mean(), "{0}_Budget".format(c.FILM_PREFIX))

In [31]:
if FILL_NA:
    fillna(df_nominations)

In [32]:
df_nominations.head(10)

Unnamed: 0,Event_Year,Film_FirstReleaseMonth_Cat_April,Film_FirstReleaseMonth_Cat_August,Film_FirstReleaseMonth_Cat_December,Film_FirstReleaseMonth_Cat_February,Film_FirstReleaseMonth_Cat_January,Film_FirstReleaseMonth_Cat_July,Film_FirstReleaseMonth_Cat_June,Film_FirstReleaseMonth_Cat_March,Film_FirstReleaseMonth_Cat_May,...,Nominations_OtherEvents_Person_NonBestActressPrevYearNomCount,Nominations_OtherEvents_Person_NonBestActressPrevYearWinCount,Nominations_OtherEvents_Person_NonBestActressSameYearNomCount,Nominations_OtherEvents_Person_NonBestActressSameYearWinCount,Nominations_OtherEvents_Person_RisingStarPrevYearNomCount,Nominations_OtherEvents_Person_RisingStarPrevYearWinCount,Nominations_OtherEvents_Person_RisingStarSameYearNomCount,Nominations_OtherEvents_Person_RisingStarSameYearWinCount,Person_Age,Person_Name
0,2019,0,0,1,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,44,Charlize Theron
1,2019,0,0,0,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,32,Cynthia Erivo
2,2019,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,50,Renée Zellweger
3,2019,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,25,Saoirse Ronan
4,2019,0,1,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,35,Scarlett Johansson
5,2018,0,1,0,0,0,0,0,0,0,...,3,4,0,0,0,0,0,0,44,Olivia Colman
6,2018,0,0,0,0,0,0,0,0,0,...,7,4,0,0,0,0,0,0,71,Glenn Close
7,2018,0,1,0,0,0,0,0,0,0,...,0,1,0,2,0,0,0,0,32,Lady Gaga
8,2018,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,48,Melissa McCarthy
9,2018,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,25,Yalitza Aparicio


In [33]:
def process_post_split(df_noms):
    if not INCLUDE_EVENT_YEAR:
        drop_by_startswith(df_noms, "Event_Year")
    df_noms.reset_index(inplace=True)
    drop_by_startswith(df_noms, "index")
    return df_noms

def split_data(df_noms):
    df_noms_data = process_post_split(df_noms[df_noms['Event_Year']<PREDICT_YEAR])
    df_noms_predict = process_post_split(df_noms[df_noms['Event_Year']==PREDICT_YEAR])
    drop_by_startswith(df_noms_predict, "Nomination_Is_Winner")
    return df_noms_data, df_noms_predict

In [34]:
def store_nomination_info(df_noms):
    df_noms_data, df_noms_predict = split_data(df_noms)
    data_file_name = "{0}_{1}{2}".format(nomination_file_name, c.NOMINATION_FILE_DATA, c.NOMINATION_FILE_PREFIX)
    df_noms_data.to_csv(os.path.join(data_folder, data_file_name))
    predict_file_name = "{0}_{1}{2}".format(nomination_file_name, c.NOMINATION_FILE_PREDICT, c.NOMINATION_FILE_PREFIX)
    df_noms_predict.to_csv(os.path.join(data_folder, predict_file_name))

In [35]:
store_nomination_info(df_nominations)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)
