In [1]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from collections import Counter
import datetime
import pytz

In [2]:
### Load the pickle files to pandas dataframes. 

movie_data_df = pd.read_pickle("movie_data_df.pkl")
actor_data_df = pd.read_pickle("actor_data_df.pkl")
movie_awards_df = pd.read_pickle("movie_awards_df.pkl")
music_awards_df = pd.read_pickle("music_awards_df.pkl")

In [3]:
# Display the heads.
display(movie_data_df.head())
display(actor_data_df.head())
display(movie_awards_df.head())
display(music_awards_df.head())

Unnamed: 0,imdbID,Title,Year,Rated,Released,Runtime,Genre,Director,Writer,Lead_Actors,...,Production,Keyword_List,Movie_Crew,Movie_Cast,Budget,Opening_Weekend_Gross,Worldwide_Gross,Soundtrack_Credits,alternative_titles,Supporting_Actors
0,tt0056952,The Cool World,1963,,1964-11-02 00:00:00,105,"[Crime, Drama]",Shirley Clarke,"[Shirley Clarke, Carl Lee, Warren Miller]","[Rony Clanton, Carl Lee, Yolanda Rodríguez]",...,,"[jazz, gang, racism, woman director, harlem]","[Hugh A. Robertson, Robert Rossen, Carl Lee, F...","[Rony Clanton, Carl Lee, Yolanda Rodríguez, Cl...",0,,,,0,"[Clarence Williams III, Richard Ward, Antonio ..."
1,tt0065944,King: A Filmed Record... Montgomery to Memphis,1970,,1970-03-24 00:00:00+00:00,185,"[Documentary, Biography, History]","Sidney Lumet, Joseph L. Mankiewicz","[Mitchell Grayson, Ely A. Landau]","[Paul Newman, Joanne Woodward, Ruby Dee, James...",...,,"[civil war, alabama, racism, segregation]","[Sidney Lumet, Joseph L. Mankiewicz, Richard K...","[Martin Luther King, Coretta Scott King, Ralph...",0,,,,2,"[Harry Belafonte, James Baldwin, Martin Luther..."
2,tt0066559,What Do You Say to a Naked Lady?,1970,X,1970-02-18 00:00:00,85,"[Documentary, Comedy]",Allen Funt,[Allen Funt],"[Joie Addison, Laura Huston, Martin Meyers, Ka...",...,,"[camera, naked, candid camera, elevator, react...","[Urs Furrer, Steve Karmen, Allen Funt, Tom Man...","[Allen Funt, Joie Addison, Laura Huston, Marti...",0,,,,2,"[Joan Bell, Ernie Monah, Susanna Clemm, Donna ..."
3,tt0067741,Shaft,1971,R,1971-07-02 00:00:00,100,"[Action, Crime, Thriller]",Gordon Parks,"[Ernest Tidyman, John D.F. Black, Ernest Tidyman]","[Richard Roundtree, Moses Gunn, Charles Cioffi...",...,"[Metro Goldwyn Mayer, Shaft Productions Ltd.]","[new york city, black people, ghetto, daughter...","[Ernest Tidyman, Isaac Hayes, Gordon Parks, Jo...","[Richard Roundtree, Moses Gunn, Charles Cioffi...",4000000,,,"[[nm0005002, Isaac Hayes], [nm0005002, Isaac H...",1,"[Rex Robbins, George Strus, Joseph Leon, Marga..."
4,tt0068358,Charley-One-Eye,1973,R,1973-04-18 00:00:00+00:00,96,[Western],Don Chaffey,[Keith Leonard],"[Richard Roundtree, Roy Thinnes, Nigel Davenpo...",...,,"[civil war, on the run, racism]","[James Swann, John Cameron, Don Chaffey, Tim H...","[Richard Roundtree, Roy Thinnes, Nigel Davenpo...",0,,,,3,"[Rafael Albaicín, Luis Aller, Aldo Sambrell, J..."


Unnamed: 0,IMDb_ID,TMDb_ID,Gender,Birthday,Movie_Credits,actor
0,nm1004267,195666,1,1976-02-08,"{Snowflakes,""Alien: Containment"",Blooded,""Shoo...",Sharon Duncan-Brewster
1,nm2351246,479206,1,1981-08-27,"{""A Stone Cold Christmas"",""Bobbi Kristina"",""Br...",Demetria McKinney
2,nm4555351,1975743,1,1989-05-11,"{""Look Again"",""The Lovely Rejects"",""Healing Me...",Lauren Neal
3,nm0376200,83976,2,1938-02-01,"{""For the Love of a Dog"",""Ghost Fever"",Clunker...",Sherman Hemsley
4,nm0940851,21356,2,1969-07-25,"{""After All"",""The Inheritance"",""The Temptation...",D.B. Woodside


Unnamed: 0,year_award,winner
0,2005-04-01,Nicollette Sheridan
1,1979-04-01,Brad Davis
2,1964-04-01,Martin Ritt
3,1963-04-01,Janet Margolin
4,2008-04-01,Grey's Anatomy


Unnamed: 0,year_award,winner
0,2020-05-19,"{""Billie Eilish""}{""Finneas O'Connell"","" Rob Ki..."
1,2020-05-19,"{""Bon Iver""}{""BJ Burton"","" Brad Cook"","" Chris ..."
2,2020-05-19,"{""Ariana Grande""}{""Charles Anderson"","" Tommy B..."
3,2020-05-19,"{H.E.R.}{""Rodney Darkchild Jerkins"","" Joseph..."
4,2020-05-19,"{Khalid}{""Disclosure "","" Denis Kosiak"","" Ingma..."


In [4]:
### Define the pre-processing functions.

def normalise_list_names(names):
    """Normalise names in a list by making all letters uppercase."""
    if names is not None:
        names = [name.upper() for name in names]
    return names

def normalise_string_names(names):
    """Normalise names by making all letters uppercase."""
    names = names.upper()
    return names

def clean_soundtrack_credits(st_credits):
    """Remove credit duplicates and imdb_ids; append uppercase names to list."""
    st_credits_final = []
    if st_credits is not None:
        for credit in st_credits:
            if credit[1].upper() not in st_credits_final:
                st_credits_final.append(credit[1].upper())
    return st_credits_final

def clean_writers(writer_list):
    """Remove any text in brackets from the Writer column."""
    writers = []
    for writer in writer_list:
        writer = re.sub("(\(.+\))", "", writer).strip()
        writers.append(writer)
    return writers

def get_total_movie_awards(names, date):
    """Get the total number of movie/acting awards won amongst a group of individuals before the given date."""
    award_count = 0
    for name in names:
        award_count += len(movie_awards_df[(movie_awards_df["year_award"] < date) & (movie_awards_df["winner"].str.contains(name, na=False, regex=False))])
    return award_count

def get_total_music_awards(names, date):
    """Get the total number of music awards won amongst a group of individuals before the given date."""
    award_count = 0
    for name in names: 
        award_count += len(music_awards_df[(music_awards_df["year_award"] < date) & (music_awards_df["winner"].str.contains(name, na=False, regex=False))])
    return award_count

def calculate_black_actor_proportion(actors):
    """Calculate the proportion of Black actors that make up a movie's cast."""
    count = 0
    for name in actors:
        if actor_data_df["actor"].str.contains(name, na=False).any():
            count += 1

    if count != 0:
        proportion = count / len(actors)
        return f'{proportion:.2f}'
    else:
        return 0    

def create_word_freq_list(df_column):
    """Create a sorted word frequency list."""
    plot_words = df_column.sum() # concatenate all plots
    word_counts = plot_words.value_counts() # get word counts
    return word_counts

In [5]:
# Convert Director names into list of names. 
movie_data_df["Director"] = movie_data_df["Director"].map(lambda x: x.split(",")) 

In [6]:
# Normalise text in relevant columns and dataframes.
movie_data_df["Director"] = movie_data_df["Director"].map(lambda x: normalise_list_names(x))
movie_data_df["Writer"] = movie_data_df["Writer"].map(lambda x: normalise_list_names(x))
movie_data_df["Lead_Actors"] = movie_data_df["Lead_Actors"].map(lambda x: normalise_list_names(x))
movie_data_df["Supporting_Actors"] = movie_data_df["Supporting_Actors"].map(lambda x: normalise_list_names(x))
movie_data_df["Movie_Cast"] = movie_data_df["Movie_Cast"].map(lambda x: normalise_list_names(x))
movie_data_df["Movie_Crew"] = movie_data_df["Movie_Crew"].map(lambda x: normalise_list_names(x))
actor_data_df["actor"] = actor_data_df["actor"].astype(str).map(lambda x: normalise_string_names(x))
movie_awards_df["winner"] = movie_awards_df["winner"].astype(str).map(lambda x: normalise_string_names(x))
music_awards_df["winner"] = music_awards_df["winner"].astype(str).map(lambda x: normalise_string_names(x))

In [7]:
# Clean and normalise soundtrack_credits column.
movie_data_df["Soundtrack_Credits"] = movie_data_df["Soundtrack_Credits"].map(lambda x: clean_soundtrack_credits(x))

In [8]:
# Normalise all datetime objects by making them timezone aware (utc).
music_awards_df["year_award"] = pd.to_datetime(music_awards_df["year_award"], utc=True)
movie_awards_df["year_award"] = pd.to_datetime(movie_awards_df["year_award"], utc=True)
movie_data_df["Released"] = movie_data_df["Released"].apply(lambda d: d.replace(tzinfo=pytz.utc))

In [9]:
# Clean Writer column by removing any text in brackets.
movie_data_df["Writer"] = movie_data_df["Writer"].apply(lambda x: clean_writers(x))

In [10]:
# Add the new total_awards columns to the dataframe.
movie_data_df["Total_Awards_Lead_Actors"] = movie_data_df.apply(lambda row: get_total_movie_awards(row["Lead_Actors"], row["Released"]), axis=1)
movie_data_df["Total_Awards_Supporting_Actors"] = movie_data_df.apply(lambda row: get_total_movie_awards(row["Supporting_Actors"], row["Released"]), axis=1)
movie_data_df["Total_Awards_Movie_Cast"] = movie_data_df.apply(lambda row: get_total_movie_awards(row["Movie_Cast"], row["Released"]), axis=1)
movie_data_df["Total_Awards_Director"] = movie_data_df.apply(lambda row: get_total_movie_awards(row["Director"], row["Released"]), axis=1)
movie_data_df["Total_Awards_Writer"] = movie_data_df.apply(lambda row: get_total_movie_awards(row["Writer"], row["Released"]), axis=1)
movie_data_df["Total_Awards_Movie_Crew"] = movie_data_df.apply(lambda row: get_total_movie_awards(row["Movie_Crew"], row["Released"]), axis=1)
movie_data_df["Total_Awards_Soundtrack_Credits"] = movie_data_df.apply(lambda row: get_total_music_awards(row["Soundtrack_Credits"], row["Released"]), axis=1)

In [11]:
# Locate the rows where Total_Awards_Lead_Actors + Total_Awards_Supporting_Actors does not equal Total_Awards_Movie_Cast.
inconsistent_rows = movie_data_df[(movie_data_df["Total_Awards_Lead_Actors"] + movie_data_df["Total_Awards_Supporting_Actors"] != movie_data_df["Total_Awards_Movie_Cast"])]
indexes = inconsistent_rows.index

# Iterate through the indexes to further inspect the cause of the inconsistencies.
for index in indexes:
    print("Index: " + str(index)) 
    print(inconsistent_rows["Title"][index]) 
    print("Lead Actors")
    display(inconsistent_rows["Lead_Actors"][index])
    display(inconsistent_rows["Total_Awards_Lead_Actors"][index])
    print("Supporting Actors")
    display(inconsistent_rows["Supporting_Actors"][index])
    display(inconsistent_rows["Total_Awards_Supporting_Actors"][index])
    print("Movie Cast")
    display(inconsistent_rows["Movie_Cast"][index])
    display(inconsistent_rows["Total_Awards_Movie_Cast"][index])
    print("Next row...")

# Make a note of what needs to be changed.

Index: 1
King: A Filmed Record... Montgomery to Memphis
Lead Actors


['PAUL NEWMAN', 'JOANNE WOODWARD', 'RUBY DEE', 'JAMES EARL JONES']

10

Supporting Actors


['HARRY BELAFONTE',
 'JAMES BALDWIN',
 'MARTIN LUTHER KING',
 'JOAN BAEZ',
 'CORETTA SCOTT KING',
 'BEN GAZZARA',
 'LEONARD BERNSTEIN',
 'TONY BENNETT',
 'MAHALIA JACKSON',
 'CHARLTON HESTON',
 'RALPH ABERNATHY',
 'JAMES GARNER',
 'MARLON BRANDO']

15

Movie Cast


['MARTIN LUTHER KING',
 'CORETTA SCOTT KING',
 'RALPH ABERNATHY',
 'JOAN BAEZ',
 'JAMES BALDWIN',
 'HARRY BELAFONTE',
 'TONY BENNETT',
 'LEONARD BERNSTEIN',
 'MARLON BRANDO',
 'RUBY DEE',
 'JAMES GARNER',
 'BEN GAZZARA',
 'CHARLTON HESTON',
 'MAHALIA JACKSON',
 'JAMES EARL JONES']

15

Next row...
Index: 145
Presumed Innocent
Lead Actors


['HARRISON FORD', 'RAUL JULIA', 'GRETA SCACCHI']

5

Supporting Actors


['ANNA MARIA HORSFORD',
 'RAÚL JULIÁ',
 'BRIAN DENNEHY',
 'PAUL WINFIELD',
 'JOHN SPENCER',
 'JOE GRIFASI',
 'BONNIE BEDELIA',
 'CHRISTINE ESTABROOK',
 'MICHAEL TOLAN',
 'MADISON ARNOLD',
 'TOM MARDIROSIAN',
 'SAB SHIMONO',
 'BRADLEY WHITFORD']

1

Movie Cast


['HARRISON FORD',
 'BRIAN DENNEHY',
 'RAÚL JULIÁ',
 'BONNIE BEDELIA',
 'PAUL WINFIELD',
 'GRETA SCACCHI',
 'JOHN SPENCER',
 'JOE GRIFASI',
 'TOM MARDIROSIAN',
 'ANNA MARIA HORSFORD',
 'SAB SHIMONO',
 'BRADLEY WHITFORD',
 'CHRISTINE ESTABROOK',
 'MICHAEL TOLAN',
 'MADISON ARNOLD']

3

Next row...
Index: 281
Things to Do in Denver When You're Dead
Lead Actors


['ANDY GARCIA', 'CHRISTOPHER WALKEN', 'CHRISTOPHER LLOYD']

2

Supporting Actors


['GABRIELLE ANWAR',
 'TREAT WILLIAMS',
 'ANDY GARCÍA',
 'JENNY MCCARTHY',
 'SARAH TRIGGER',
 'WILLIAM FORSYTHE',
 'BILL ERWIN',
 'MICHAEL NICOLOSI',
 'STEVE BUSCEMI',
 'FAIRUZA BALK',
 'DAVID STRATTON',
 'JACK WARDEN',
 'BILL NUNN']

3

Movie Cast


['ANDY GARCÍA',
 'CHRISTOPHER LLOYD',
 'WILLIAM FORSYTHE',
 'BILL NUNN',
 'TREAT WILLIAMS',
 'JACK WARDEN',
 'STEVE BUSCEMI',
 'FAIRUZA BALK',
 'GABRIELLE ANWAR',
 'CHRISTOPHER WALKEN',
 'MICHAEL NICOLOSI',
 'SARAH TRIGGER',
 'DAVID STRATTON',
 'JENNY MCCARTHY',
 'BILL ERWIN']

4

Next row...
Index: 896
The Air I Breathe
Lead Actors


['BRENDAN FRASER', 'SARAH MICHELLE GELLAR', 'ANDY GARCIA']

3

Supporting Actors


['FOREST WHITAKER',
 'EVAN PARKE',
 'ANDY GARCÍA',
 'CLARK GREGG',
 'CECILIA SUÁREZ',
 'TODD STASHWICK',
 'KEVIN BACON',
 'EMILE HIRSCH',
 'VICTOR RIVERS',
 'KELLY HU',
 'TAYLOR NICHOLS',
 'JULIE DELPY',
 'JON BERNTHAL']

3

Movie Cast


['BRENDAN FRASER',
 'ANDY GARCÍA',
 'KEVIN BACON',
 'SARAH MICHELLE GELLAR',
 'CLARK GREGG',
 'EMILE HIRSCH',
 'FOREST WHITAKER',
 'KELLY HU',
 'EVAN PARKE',
 'TAYLOR NICHOLS',
 'VICTOR RIVERS',
 'CECILIA SUÁREZ',
 'TODD STASHWICK',
 'JON BERNTHAL',
 'JULIE DELPY']

4

Next row...
Index: 942
Killa Season
Lead Actors


["CAM'RON", 'DURREL MOHAMMAD', 'JUELZ SANTANA', 'MICHAEL WILLIAMS']

1

Supporting Actors


['FUNKMASTER FLEX',
 'MICHAEL KENNETH WILLIAMS',
 'OSAS IGHODARO',
 'GREYSON CRUZ']

0

Movie Cast


["CAM'RON",
 'DURREL MOHAMMAD',
 'JUELZ SANTANA',
 'MICHAEL KENNETH WILLIAMS',
 'FUNKMASTER FLEX',
 'GREYSON CRUZ',
 'OSAS IGHODARO']

0

Next row...
Index: 966
The Perfect Holiday
Lead Actors


['GABRIELLE UNION', 'MORRIS CHESTNUT', 'QUEEN LATIFAH']

1

Supporting Actors


["JOHN 'B.J.' BRYANT",
 'PEDRO KIM',
 'KHAIL BRYANT',
 'FRANK BONSANGUE',
 'MODI',
 'PAUL WOODBUM',
 'MALIK HAMMOND',
 'KATT WILLIAMS',
 'JILL MARIE JONES',
 'DAVID ANZUELO',
 'CHARLIE MURPHY',
 'RACHEL TRUE',
 'FAIZON LOVE']

2

Movie Cast


['MORRIS CHESTNUT',
 'GABRIELLE UNION',
 'FAIZON LOVE',
 'CHARLIE MURPHY',
 'KATT WILLIAMS',
 'JILL MARIE JONES',
 'RACHEL TRUE',
 'MALIK HAMMOND',
 'KHAIL BRYANT',
 'PEDRO KIM',
 'DAVID ANZUELO',
 'FRANK BONSANGUE',
 "JOHN 'B.J.' BRYANT",
 'PAUL WOODBUM',
 'MODI']

2

Next row...
Index: 1067
La linea
Lead Actors


['RAY LIOTTA', 'ANDY GARCIA', 'ESAI MORALES']

3

Supporting Actors


['JASON CONNERY',
 'VICTORIA ELIZABETH',
 'JOE MORTON',
 'ANDY GARCÍA',
 'BRUCE DAVISON',
 'JORDI VILASUSO',
 'VALERIE CRUZ',
 'DANNY TREJO',
 'KEVIN GAGE',
 'ARMAND ASSANTE',
 'DAVID ACKERT',
 'GARY DANIELS',
 'MICHAEL DELORENZO']

5

Movie Cast


['RAY LIOTTA',
 'ANDY GARCÍA',
 'ESAI MORALES',
 'ARMAND ASSANTE',
 'VALERIE CRUZ',
 'KEVIN GAGE',
 'BRUCE DAVISON',
 'JOE MORTON',
 'DANNY TREJO',
 'GARY DANIELS',
 'MICHAEL DELORENZO',
 'DAVID ACKERT',
 'JASON CONNERY',
 'VICTORIA ELIZABETH',
 'JORDI VILASUSO']

6

Next row...
Index: 1093
AFI Life Achievement Award: A Tribute to Al Pacino
Lead Actors


['GABRIELLE ANWAR', 'JON AVNET', 'CAROLE BAYER SAGER']

10

Supporting Actors


['SEAN CONNERY',
 'FRANCIS FORD COPPOLA',
 'GEORGE CLOONEY',
 'JAMIE FOXX',
 'ANDY GARCÍA',
 'ED HARRIS',
 'AL PACINO',
 "CHRIS O'DONNELL",
 'GEORGE LOPEZ',
 'SIDNEY LUMET',
 'SAMUEL L. JACKSON',
 'MICHAEL MANN',
 'KIRK DOUGLAS',
 'ROBIN WILLIAMS']

66

Movie Cast


['AL PACINO',
 'JAMIE FOXX',
 'ROBIN WILLIAMS',
 'SEAN CONNERY',
 'GABRIELLE ANWAR',
 'GEORGE CLOONEY',
 'FRANCIS FORD COPPOLA',
 'KIRK DOUGLAS',
 'ANDY GARCÍA',
 'ED HARRIS',
 'SAMUEL L. JACKSON',
 'GEORGE LOPEZ',
 'SIDNEY LUMET',
 'MICHAEL MANN',
 "CHRIS O'DONNELL"]

66

Next row...
Index: 1096
The People Speak
Lead Actors


['DAVID STRATHAIRN', 'MATT DAMON', 'MARISA TOMEI', 'VIGGO MORTENSEN']

8

Supporting Actors


['JOSH BROLIN',
 'SANDRA OH',
 'ROSARIO DAWSON',
 'DON CHEADLE',
 'KERRY WASHINGTON',
 'DANNY GLOVER',
 'MICHAEL EALY',
 'JOHN LEGEND',
 "Q'ORIANKA KILCHER",
 'SEAN PENN',
 'MORGAN FREEMAN',
 'BENJAMIN BRATT']

13

Movie Cast


['ROSARIO DAWSON',
 'MORGAN FREEMAN',
 'MARISA TOMEI',
 'SEAN PENN',
 'VIGGO MORTENSEN',
 'JOSH BROLIN',
 'KERRY WASHINGTON',
 'DAVID STRATHAIRN',
 'DON CHEADLE',
 'SANDRA OH',
 'BENJAMIN BRATT',
 'DANNY GLOVER',
 'MICHAEL EALY',
 'JOHN LEGEND',
 "Q'ORIANKA KILCHER"]

18

Next row...
Index: 1156
Belly 2: Millionaire Boyz Club
Lead Actors


['GAME', 'TAYE HANSBERRY', 'SKI CARR', 'MICHAEL KENNETH WILLIAMS']

4

Supporting Actors


['WC',
 'ADAM AMBRUSO',
 'SHARI HEADLEY',
 'JAYCEON TERRELL TAYLOR',
 'SEAN BLAKEMORE',
 'ARIA LONDON',
 'JENNIFER RODRIGUEZ']

8

Movie Cast


['JAYCEON TERRELL TAYLOR',
 'TAYE HANSBERRY',
 'SKI CARR',
 'MICHAEL KENNETH WILLIAMS',
 'ADAM AMBRUSO',
 'WC',
 'SEAN BLAKEMORE',
 'SHARI HEADLEY',
 'ARIA LONDON',
 'JENNIFER RODRIGUEZ']

8

Next row...
Index: 1160
Between the World and Me
Lead Actors


['MAHERSHALA ALI', 'ANGELA BASSETT', 'TA-NEHISI COATES', 'ANGELA DAVIS']

3

Supporting Actors


['YARA SHAHIDI',
 'PHYLICIA RASHĀD',
 'JOE MORTON',
 'KENDRICK SAMPSON',
 'WENDELL PIERCE',
 'JANET MOCK',
 'ALICIA GARZA',
 'MJ RODRIGUEZ',
 'SUSAN KELECHI WATSON',
 'JHARREL JEROME',
 'PAULETTA WASHINGTON',
 'COURTNEY B. VANCE',
 'BLACK THOUGHT']

1

Movie Cast


['TA-NEHISI COATES',
 'ANGELA BASSETT',
 'ALICIA GARZA',
 'JHARREL JEROME',
 'JANET MOCK',
 'JOE MORTON',
 'WENDELL PIERCE',
 'PHYLICIA RASHĀD',
 'MJ RODRIGUEZ',
 'KENDRICK SAMPSON',
 'YARA SHAHIDI',
 'BLACK THOUGHT',
 'COURTNEY B. VANCE',
 'PAULETTA WASHINGTON',
 'SUSAN KELECHI WATSON']

2

Next row...
Index: 1418
8
Lead Actors


['KEVIN BACON', 'DUSTIN LANCE BLACK', 'DAVID BOIES', 'MATT BOMER']

3

Supporting Actors


['BRIDGER ZADINA',
 'JOHN C. REILLY',
 'BRAD PITT',
 'GEORGE CLOONEY',
 'JANE LYNCH',
 'MARTIN SHEEN',
 'GEORGE TAKEI',
 'JAMIE LEE CURTIS',
 'JESSE TYLER FERGUSON',
 "RORY O'MALLEY",
 'CHRISTINE LAHTI',
 'CHRIS COLFER',
 'YEARDLEY SMITH']

40

Movie Cast


['KEVIN BACON',
 'MATT BOMER',
 'GEORGE CLOONEY',
 'CHRIS COLFER',
 'JAMIE LEE CURTIS',
 'JESSE TYLER FERGUSON',
 'JANE LYNCH',
 "RORY O'MALLEY",
 'CHRISTINE LAHTI',
 'BRAD PITT',
 'JOHN C. REILLY',
 'MARTIN SHEEN',
 'GEORGE TAKEI',
 'BRIDGER ZADINA',
 'YEARDLEY SMITH']

42

Next row...
Index: 1442
The Furious Gods: Making Prometheus
Lead Actors


['STEVE BURG', 'IDRIS ELBA', 'MICHAEL ELLENBERG']

2

Supporting Actors


['JON SPAIHTS',
 'ALLEN MARIS',
 'PIETRO SCALIA',
 'MARK HUFFAM',
 'CHARLIZE THERON',
 'JANTY YATES',
 'RIDLEY SCOTT',
 'STEVEN MESSING',
 'DAMON LINDELOF',
 'NOOMI RAPACE',
 'ARTHUR MAX',
 'NEVILLE PAGE',
 'DARIUSZ WOLSKI',
 'GUY PEARCE']

10

Movie Cast


['RIDLEY SCOTT',
 'NOOMI RAPACE',
 'CHARLIZE THERON',
 'GUY PEARCE',
 'DAMON LINDELOF',
 'JON SPAIHTS',
 'PIETRO SCALIA',
 'DARIUSZ WOLSKI',
 'ARTHUR MAX',
 'JANTY YATES',
 'MICHAEL ELLENBERG',
 'MARK HUFFAM',
 'ALLEN MARIS',
 'STEVEN MESSING',
 'NEVILLE PAGE']

10

Next row...
Index: 1570
Avengers: Infinity War
Lead Actors


['ROBERT DOWNEY JR.', 'CHRIS HEMSWORTH', 'MARK RUFFALO', 'CHRIS EVANS']

6

Supporting Actors


['TOM HIDDLESTON',
 'CHADWICK BOSEMAN',
 'ELIZABETH OLSEN',
 'SCARLETT JOHANSSON',
 'ZOE SALDANA',
 'BENEDICT CUMBERBATCH',
 'KAREN GILLAN',
 'DON CHEADLE',
 'PAUL BETTANY',
 'TOM HOLLAND',
 'ANTHONY MACKIE',
 'SEBASTIAN STAN']

11

Movie Cast


['ROBERT DOWNEY JR.',
 'CHRIS HEMSWORTH',
 'CHRIS EVANS',
 'SCARLETT JOHANSSON',
 'BENEDICT CUMBERBATCH',
 'TOM HOLLAND',
 'CHADWICK BOSEMAN',
 'DON CHEADLE',
 'ZOE SALDANA',
 'KAREN GILLAN',
 'ELIZABETH OLSEN',
 'PAUL BETTANY',
 'ANTHONY MACKIE',
 'SEBASTIAN STAN',
 'TOM HIDDLESTON']

15

Next row...
Index: 1577
Lennon or McCartney
Lead Actors


['AARON PAUL', 'AASIF MANDVI', 'ADAM LAMBERT']

1

Supporting Actors


['JIMMY CLIFF',
 'RENO COLLIER',
 'DYLAN BRUCE',
 'TOMMY CHONG',
 'KELLY ASBURY',
 'ASHLEIGH BALL',
 'MARGARET CHO',
 'PIERRE BOUVIER',
 'NIMRÓD ANTAL',
 'RODRIGO CORTÉS',
 'IAN ASTBURY',
 'LEVAR BURTON',
 'STEPHEN BISHOP',
 'JEFF BAENA',
 'DAVID BYRNE']

2

Movie Cast


['NIMRÓD ANTAL',
 'KELLY ASBURY',
 'IAN ASTBURY',
 'JEFF BAENA',
 'ASHLEIGH BALL',
 'STEPHEN BISHOP',
 'PIERRE BOUVIER',
 'DYLAN BRUCE',
 'LEVAR BURTON',
 'DAVID BYRNE',
 'MARGARET CHO',
 'TOMMY CHONG',
 'JIMMY CLIFF',
 'RENO COLLIER',
 'RODRIGO CORTÉS']

2

Next row...
Index: 1634
Taylor Swift: The 1989 World Tour Live
Lead Actors


['TAYLOR SWIFT', 'UZO ADUBA', 'LILY ALDRIDGE', 'ABIGAIL ANDERSON']

3

Supporting Actors


['MICK JAGGER',
 'MARISKA HARGITAY',
 'LENA DUNHAM',
 'JAIME KING',
 'LORDE',
 'KARLIE KLOSS',
 'CARA DELEVINGNE',
 'SELENA GOMEZ',
 'NICK JONAS',
 'ELLEN DEGENERES',
 'JOHN LEGEND',
 'HEIDI KLUM',
 'IDINA MENZEL',
 'ELLIE GOULDING']

11

Movie Cast


['TAYLOR SWIFT',
 'ELLEN DEGENERES',
 'CARA DELEVINGNE',
 'LENA DUNHAM',
 'SELENA GOMEZ',
 'ELLIE GOULDING',
 'MARISKA HARGITAY',
 'MICK JAGGER',
 'NICK JONAS',
 'JAIME KING',
 'KARLIE KLOSS',
 'HEIDI KLUM',
 'JOHN LEGEND',
 'LORDE',
 'IDINA MENZEL']

13

Next row...
Index: 1646
Roy Orbison: One of the Lonely Ones
Lead Actors


['ROY ORBISON', 'CLARKE PETERS', 'THE BEATLES', 'HAROLD BRADLEY']

1

Supporting Actors


['WESLEY ORBISON',
 'BARBARA ORBISON',
 'ROY ORBISON JR.',
 'ALEX ORBISON',
 'BOBBY GOLDSBORO',
 'FRED FOSTER',
 'JEFF LYNNE',
 'MARIANNE FAITHFULL',
 'T BONE BURNETT',
 'JERRY KENNEDY',
 'TERRY WIDLAKE',
 'JOE MELSON']

4

Movie Cast


['CLARKE PETERS',
 'ROY ORBISON',
 'ALEX ORBISON',
 'ROY ORBISON JR.',
 'WESLEY ORBISON',
 'T BONE BURNETT',
 'JOE MELSON',
 'FRED FOSTER',
 'JEFF LYNNE',
 'TERRY WIDLAKE',
 'BARBARA ORBISON',
 'MARIANNE FAITHFULL',
 'BOBBY GOLDSBORO',
 'JERRY KENNEDY',
 'HAROLD BRADLEY']

4

Next row...
Index: 1661
The Long Way Home: Making the Martian
Lead Actors


['JESSICA CHASTAIN', 'MATT DAMON', 'JEFF DANIELS']

13

Supporting Actors


['KATE MARA',
 'MICHAEL PEÑA',
 'AKSEL HENNIE',
 'RIDLEY SCOTT',
 'CHIWETEL EJIOFOR',
 'SEBASTIAN STAN']

6

Movie Cast


['RIDLEY SCOTT',
 'MATT DAMON',
 'JESSICA CHASTAIN',
 'CHIWETEL EJIOFOR',
 'MICHAEL PEÑA',
 'SEBASTIAN STAN',
 'KATE MARA',
 'AKSEL HENNIE']

15

Next row...
Index: 1702
Jay and Silent Bob Reboot
Lead Actors


['JOEY LAUREN ADAMS', 'BEN AFFLECK', 'FRED ARMISEN', 'DIEDRICH BADER']

5

Supporting Actors


['CRAIG ROBINSON',
 'JASON MEWES',
 'VAL KILMER',
 'HARLEY QUINN SMITH',
 'MELISSA BENOIST',
 "BRIAN O'HALLORAN",
 'TOMMY CHONG',
 'ROSARIO DAWSON',
 'APARNA BRIELLE',
 'JORDAN MONSANTO',
 'KEVIN SMITH',
 'SHANNON ELIZABETH',
 'JOE MANGANIELLO',
 'ALICE WEN',
 'JASON LEE']

2

Movie Cast


['KEVIN SMITH',
 'JASON MEWES',
 'HARLEY QUINN SMITH',
 "BRIAN O'HALLORAN",
 'JASON LEE',
 'SHANNON ELIZABETH',
 'ROSARIO DAWSON',
 'ALICE WEN',
 'APARNA BRIELLE',
 'MELISSA BENOIST',
 'VAL KILMER',
 'TOMMY CHONG',
 'CRAIG ROBINSON',
 'JOE MANGANIELLO',
 'JORDAN MONSANTO']

2

Next row...


In [12]:
# Index 1: add 'PAUL NEWMAN' and 'JOANNE WOODWARD' to 'Movie_Cast'
movie_data_df["Movie_Cast"][1].extend(["PAUL NEWMAN", "JOANNE WOODWARD"])

# Index 145: remove 'RAÚL JULIÁ' from 'Supporting_Cast' and add to 'Lead_Actors', add 'RAUL JULIA' to 'Movie_Cast'
movie_data_df["Supporting_Actors"][145].remove("RAÚL JULIÁ")
movie_data_df["Lead_Actors"][145].append("RAÚL JULIÁ")
movie_data_df["Movie_Cast"][145].append("RAUL JULIA")

# Index 281: remove 'ANDY GARCÍA' from 'Supporting_Cast' and add to 'Lead_Actors', add 'ANDY GARCIA' to 'Movie_Cast'
movie_data_df["Supporting_Actors"][281].remove("ANDY GARCÍA")
movie_data_df["Lead_Actors"][281].append("ANDY GARCÍA")
movie_data_df["Movie_Cast"][281].append("ANDY GARCIA")

# Index 896: remove 'ANDY GARCÍA' from 'Supporting_Cast' and add to 'Lead_Actors', add 'ANDY GARCIA' to 'Movie_Cast'
movie_data_df["Supporting_Actors"][896].remove("ANDY GARCÍA")
movie_data_df["Lead_Actors"][896].append("ANDY GARCÍA")
movie_data_df["Movie_Cast"][896].append("ANDY GARCIA")

# Index 942: remove 'MICHAEL WILLIAMS' from 'Lead_Actors'
movie_data_df["Lead_Actors"][942].remove("MICHAEL WILLIAMS")

# Index 966: add 'QUEEN LATIFAH' to 'Movie_Cast'
movie_data_df["Movie_Cast"][966].append("QUEEN LATIFAH")

# Index 1067: 'ANDY GARCÍA' from 'Supporting_Cast' and add to 'Lead_Actors', add 'ANDY GARCIA' to 'Movie_Cast'
movie_data_df["Supporting_Actors"][1067].remove("ANDY GARCÍA")
movie_data_df["Lead_Actors"][1067].append("ANDY GARCÍA")
movie_data_df["Movie_Cast"][1067].append("ANDY GARCIA")

# Index 1093: add 'JON AVNET', 'CAROLE BAYER SAGER' and 'ANDY GARCIA' to 'Movie_Cast', add 'ANDY GARCIA' to 'Supporting_Actors'
movie_data_df["Movie_Cast"][1093].extend(["JON AVNET", "CAROLE BAYER SAGER", "ANDY GARCIA"])
movie_data_df["Supporting_Actors"][1093].append("ANDY GARCIA")

# Index 1096: add 'MATT DAMON' to 'Movie_Cast'
movie_data_df["Movie_Cast"][1096].append("MATT DAMON")

# Index 1156: remove 'GAME' from 'Lead_Actors', add 'JAYCEON TERRELL TAYLOR' to 'Lead_Actors'
movie_data_df["Lead_Actors"][1156].remove("GAME")
movie_data_df["Lead_Actors"][1156].append("JAYCEON TERRELL TAYLOR")

# Index 1160: add 'ANGELA DAVIS' and 'MAHERSHALA ALI' to 'Movie_Cast'
movie_data_df["Movie_Cast"][1160].extend(["ANGELA DAVIS", "MAHERSHALA ALI"])

# Index 1418: add 'DUSTIN LANCE BLACK' and 'DAVID BOIES' to 'Movie_Cast'
movie_data_df["Movie_Cast"][1418].extend(["DUSTIN LANCE BLACK", "DAVID BOIES"])

# Index 1442: add 'IDRIS ELBA' to 'Movie_Cast'
movie_data_df["Movie_Cast"][1442].append("IDRIS ELBA")

# Index 1570: add 'MARK RUFFALO' to 'Movie_Cast'
movie_data_df["Movie_Cast"][1570].append("MARK RUFFALO")

# Index 1577: add 'AARON PAUL', 'AASIF MANDVI', 'ADAM LAMBERT' to 'Movie_Cast'
movie_data_df["Movie_Cast"][1577].extend(["AARON PAUL", "AASIF MANDVI", "ADAM LAMBERT"])

# Index 1634: add 'UZO ADUBA', 'LILY ALDRIDGE', 'ABIGAIL ANDERSON' to 'Movie_Cast'
movie_data_df["Movie_Cast"][1634].extend(["UZO ADUBA", "LILY ALDRIDGE", "ABIGAIL ANDERSON"])

# Index 1646: add 'THE BEATLES' to 'Movie_Cast'
movie_data_df["Movie_Cast"][1646].append("THE BEATLES")

# Index 1661: add 'JEFF DANIELS' to 'Movie_Cast'
movie_data_df["Movie_Cast"][1661].append("JEFF DANIELS")

# Index 1702: add 'JOEY LAUREN ADAMS', 'BEN AFFLECK', 'FRED ARMISEN', 'DIEDRICH BADER' to 'Movie_Cast'
movie_data_df["Movie_Cast"][1702].extend(["JOEY LAUREN ADAMS", "BEN AFFLECK", "FRED ARMISEN", "DIEDRICH BADER"])

In [13]:
# Update the total_awards columns.
movie_data_df["Total_Awards_Lead_Actors"] = movie_data_df.apply(lambda row: get_total_movie_awards(row["Lead_Actors"], row["Released"]), axis=1)
movie_data_df["Total_Awards_Supporting_Actors"] = movie_data_df.apply(lambda row: get_total_movie_awards(row["Supporting_Actors"], row["Released"]), axis=1)
movie_data_df["Total_Awards_Movie_Cast"] = movie_data_df.apply(lambda row: get_total_movie_awards(row["Movie_Cast"], row["Released"]), axis=1)
movie_data_df["Total_Awards_Director"] = movie_data_df.apply(lambda row: get_total_movie_awards(row["Director"], row["Released"]), axis=1)
movie_data_df["Total_Awards_Writer"] = movie_data_df.apply(lambda row: get_total_movie_awards(row["Writer"], row["Released"]), axis=1)
movie_data_df["Total_Awards_Movie_Crew"] = movie_data_df.apply(lambda row: get_total_movie_awards(row["Movie_Crew"], row["Released"]), axis=1)
movie_data_df["Total_Awards_Soundtrack_Credits"] = movie_data_df.apply(lambda row: get_total_music_awards(row["Soundtrack_Credits"], row["Released"]), axis=1)

In [14]:
# Check inconsistencies again.
movie_data_df[(movie_data_df["Total_Awards_Lead_Actors"] + movie_data_df["Total_Awards_Supporting_Actors"] != movie_data_df["Total_Awards_Movie_Cast"])]

Unnamed: 0,imdbID,Title,Year,Rated,Released,Runtime,Genre,Director,Writer,Lead_Actors,...,Soundtrack_Credits,alternative_titles,Supporting_Actors,Total_Awards_Lead_Actors,Total_Awards_Supporting_Actors,Total_Awards_Movie_Cast,Total_Awards_Director,Total_Awards_Writer,Total_Awards_Movie_Crew,Total_Awards_Soundtrack_Credits


In [15]:
# Calculate the black actor proportions for each film. 
movie_data_df["Black_Lead_Proportion"] = movie_data_df.apply(lambda row: calculate_black_actor_proportion(row["Lead_Actors"]), axis=1)
movie_data_df["Black_Support_Proportion"] = movie_data_df.apply(lambda row: calculate_black_actor_proportion(row["Supporting_Actors"]), axis=1)
movie_data_df["Black_Cast_Proportion"] = movie_data_df.apply(lambda row: calculate_black_actor_proportion(row["Movie_Cast"]), axis=1)

In [16]:
# Convert new columns types to float. 
movie_data_df["Black_Lead_Proportion"] = movie_data_df["Black_Lead_Proportion"].astype(float)
movie_data_df["Black_Support_Proportion"] = movie_data_df["Black_Support_Proportion"].astype(float)
movie_data_df["Black_Cast_Proportion"] = movie_data_df["Black_Cast_Proportion"].astype(float)

In [22]:
# Save dataframe to pickle. 
movie_data_df.to_pickle("./movie_data_df_2.pkl")