In [1]:
import pandas as pd

#### Check value counts for each `gender` column 

In [2]:
movies_2011_and_beyond = pd.read_csv('my_data/movies_2011_and_beyond.csv')
movies_2011_and_beyond.head()

Unnamed: 0,movieTitle,directorName,directorGender,writerName,writerGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId
0,Fast Five,Neal H. Moritz,male,,,Chris Morgan,male,Justin Lin,male,Vin Diesel,male,,626100000.0,tt1596343
1,Albert Nobbs,Rodrigo García,male,,,Glenn Close,female,,,Bonnie Curtis,female,,,tt1602098
2,Albert Nobbs,Rodrigo García,male,,,George Moore,male,,,Bonnie Curtis,female,,,tt1602098
3,300: Rise of an Empire,Noam Murro,male,,,Kurt Johnstad,male,,,Thomas Tull,male,110000000.0,337580051.0,tt1253863
4,Fast Five,Neal H. Moritz,male,,,Chris Morgan,male,Justin Lin,male,Neal H. Moritz,male,,626100000.0,tt1596343


### Custom Functions
* add docstrings / make pretty for all finalized functions that are used

In [3]:
# check for all unique values in gender columns, will use my column names within function since they're all the same

def check_unique_gender_values(df):
    gender_columns = ['directorGender', 'writerGender', 'screenwriterGender', 'executiveproducerGender', 'producerGender']
    for col in gender_columns:
        print(col)
        print(df[col].unique())

In [4]:
check_unique_gender_values(movies_2011_and_beyond)

directorGender
['male' 'female' nan 'transgender female' 'non-binary' 'transgender male']
writerGender
[nan 'male']
screenwriterGender
['male' 'female' nan 'transgender female' 'non-binary']
executiveproducerGender
['male' nan 'female']
producerGender
['male' 'female' 'transgender female' nan]


In [6]:
# investigate writerGender because this should probably be dropped and changed to screenwriter
movies_2011_and_beyond.writerName.unique()

array([nan, 'Slavoj Žižek', 'Rana Abrar'], dtype=object)

In [7]:
movies_2011_and_beyond.loc[movies_2011_and_beyond.writerName == 'Rana Abrar']

Unnamed: 0,movieTitle,directorName,directorGender,writerName,writerGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId
15766,Black Briefcase: The Nuclear Trigger,Rana Abrar,male,Rana Abrar,male,,,,,,,,,tt7903992
16053,Son Of Kashmir Burhan,Rana Abrar,male,Rana Abrar,male,,,,,,,,,tt7675680
16390,The Evil Marriage,Rana Abrar,male,Rana Abrar,male,,,,,,,,,tt8614336


In [None]:
# check to see if these movies are in Bechdel dataset - if not, all 3 will be dropped because there is not much useful
# information for these movies

In [8]:
bechdel_df = pd.read_csv('my_data/bechdel_test_movies.csv')
bechdel_df.head()

Unnamed: 0,year,title,score,passing,imdb_id,imdb_link
0,2019,Alita: Battle Angel,3,1,tt0437086,http://us.imdb.com/title/tt0437086/
1,2019,American Woman,3,1,tt9109492,http://us.imdb.com/title/tt9109492/
2,2019,Close,3,1,tt5316540,http://us.imdb.com/title/tt5316540/
3,2019,Cold Pursuit,1,0,tt5719748,http://us.imdb.com/title/tt5719748/
4,2019,How to Train Your Dragon: The Hidden World,2,0,tt2386490,http://us.imdb.com/title/tt2386490/


In [16]:
# could turn this imdb_id finder into a function
rana_abrar_imdb_ids = []
for movie in movies_2011_and_beyond.loc[movies_2011_and_beyond.writerName == 'Rana Abrar'].values:
    rana_abrar_imdb_ids.append(movie[-1])
rana_abrar_imdb_ids

['tt7903992', 'tt7675680', 'tt8614336']

In [14]:
def locate_rows_by_imdb_ids(dataset, name_of_imdb_id_col, imdb_id_list):
    selected_rows = []
    for imdb_id in imdb_id_list:
        row = dataset.loc[dataset[name_of_imdb_id_col] == imdb_id]
        selected_rows.append(row)
    return selected_rows

In [19]:
locate_rows_by_imdb_ids(bechdel_df, name_of_imdb_id_col='imdb_id', imdb_id_list=rana_abrar_imdb_ids)

[Empty DataFrame
 Columns: [year, title, score, passing, imdb_id, imdb_link]
 Index: [], Empty DataFrame
 Columns: [year, title, score, passing, imdb_id, imdb_link]
 Index: [], Empty DataFrame
 Columns: [year, title, score, passing, imdb_id, imdb_link]
 Index: []]

In [21]:
# confirm this function is actually saying there are no matching movies in Bechdel df rather than just being a 
# broken function

bechdel_df.loc[bechdel_df.imdb_id == 'tt7903992']

Unnamed: 0,year,title,score,passing,imdb_id,imdb_link


In [22]:
bechdel_df.loc[bechdel_df.title == 'The Evil Marriage']

Unnamed: 0,year,title,score,passing,imdb_id,imdb_link


In [23]:
# check to see if function works on movies that are definitely in both lists

movies_2011_and_beyond.loc[movies_2011_and_beyond.movieTitle == 'Alita: Battle Angel']

Unnamed: 0,movieTitle,directorName,directorGender,writerName,writerGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId
8292,Alita: Battle Angel,Robert Rodriguez,male,,,James Cameron,male,,,James Cameron,male,170000000.0,359700000.0,tt0437086
8299,Alita: Battle Angel,Robert Rodriguez,male,,,Laeta Kalogridis,female,,,James Cameron,male,170000000.0,359700000.0,tt0437086
8307,Alita: Battle Angel,Robert Rodriguez,male,,,James Cameron,male,,,Robert Rodriguez,male,170000000.0,359700000.0,tt0437086
8314,Alita: Battle Angel,Robert Rodriguez,male,,,Laeta Kalogridis,female,,,Robert Rodriguez,male,170000000.0,359700000.0,tt0437086
8331,Alita: Battle Angel,Robert Rodriguez,male,,,James Cameron,male,,,Jon Landau,male,170000000.0,359700000.0,tt0437086
8333,Alita: Battle Angel,Robert Rodriguez,male,,,Laeta Kalogridis,female,,,Jon Landau,male,170000000.0,359700000.0,tt0437086


In [24]:
movies_2011_and_beyond.loc[movies_2011_and_beyond.movieTitle == 'Close']

Unnamed: 0,movieTitle,directorName,directorGender,writerName,writerGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId
15209,Close,Vicky Jewson,female,,,Vicky Jewson,female,,,,,,,tt5316540


In [25]:
def locate_imdb_ids(df, column_to_search, value_list_to_search, imdb_id_column):
    rows_to_search = []
    imdb_ids = []
    for val in value_list_to_search:
        row = df.loc[df[column_to_search] == val]
        rows_to_search.append(row)
    for row in rows_to_search.values:
        print(row)

In [27]:
# okay that didn't really work

In [30]:
def locate_rows_in_df_by_values(df, column_to_search, value_list_to_search):
    for val in value_list_to_search:
        print(df.loc[df[column_to_search] == val])

In [32]:
movies_to_search = ['Close', 'Alita: Battle Angel', 'American Woman']
locate_rows_in_df_by_values(movies_2011_and_beyond, 'movieTitle', movies_to_search)

      movieTitle  directorName directorGender writerName writerGender  \
15209      Close  Vicky Jewson         female        NaN          NaN   

      screenwriterName screenwriterGender executiveproducerName  \
15209     Vicky Jewson             female                   NaN   

      executiveproducerGender producerName producerGender  budgetAmount  \
15209                     NaN          NaN            NaN           NaN   

       boxofficeAmount     imdbId  
15209              NaN  tt5316540  
               movieTitle      directorName directorGender writerName  \
8292  Alita: Battle Angel  Robert Rodriguez           male        NaN   
8299  Alita: Battle Angel  Robert Rodriguez           male        NaN   
8307  Alita: Battle Angel  Robert Rodriguez           male        NaN   
8314  Alita: Battle Angel  Robert Rodriguez           male        NaN   
8331  Alita: Battle Angel  Robert Rodriguez           male        NaN   
8333  Alita: Battle Angel  Robert Rodriguez           mal

In [34]:
test_imdb_ids = ['tt0437086', 'tt5316540']
locate_rows_by_imdb_ids(movies_2011_and_beyond, 'imdbId', test_imdb_ids)

[               movieTitle      directorName directorGender writerName  \
 8292  Alita: Battle Angel  Robert Rodriguez           male        NaN   
 8299  Alita: Battle Angel  Robert Rodriguez           male        NaN   
 8307  Alita: Battle Angel  Robert Rodriguez           male        NaN   
 8314  Alita: Battle Angel  Robert Rodriguez           male        NaN   
 8331  Alita: Battle Angel  Robert Rodriguez           male        NaN   
 8333  Alita: Battle Angel  Robert Rodriguez           male        NaN   
 
      writerGender  screenwriterName screenwriterGender executiveproducerName  \
 8292          NaN     James Cameron               male                   NaN   
 8299          NaN  Laeta Kalogridis             female                   NaN   
 8307          NaN     James Cameron               male                   NaN   
 8314          NaN  Laeta Kalogridis             female                   NaN   
 8331          NaN     James Cameron               male                   N

In [35]:
locate_rows_by_imdb_ids(bechdel_df, 'imdb_id', test_imdb_ids)

[   year                title  score  passing    imdb_id  \
 0  2019  Alita: Battle Angel      3        1  tt0437086   
 
                              imdb_link  
 0  http://us.imdb.com/title/tt0437086/  ,
    year  title  score  passing    imdb_id                            imdb_link
 2  2019  Close      3        1  tt5316540  http://us.imdb.com/title/tt5316540/]

In [33]:
movies_2011_and_beyond.loc[movies_2011_and_beyond.movieTitle == 'American Woman']

Unnamed: 0,movieTitle,directorName,directorGender,writerName,writerGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId


In [36]:
# well the outputs don't look pretty, but at least they work!

In [37]:
# the whole point of all that was just to see if I could drop the writerName and writerGender columns

In [38]:
movies_2011_and_beyond.loc[movies_2011_and_beyond.writerName == 'Slavoj Žižek']

Unnamed: 0,movieTitle,directorName,directorGender,writerName,writerGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId
11007,The Pervert's Guide to Ideology,Sophie Fiennes,female,Slavoj Žižek,male,Slavoj Žižek,male,,,,,,,tt2152198


In [39]:
# lol

In [40]:
# check to see if this movie is in the bechdel dataset
locate_rows_by_imdb_ids(bechdel_df, 'imdb_id', 'tt2152198')

[Empty DataFrame
 Columns: [year, title, score, passing, imdb_id, imdb_link]
 Index: [], Empty DataFrame
 Columns: [year, title, score, passing, imdb_id, imdb_link]
 Index: [], Empty DataFrame
 Columns: [year, title, score, passing, imdb_id, imdb_link]
 Index: [], Empty DataFrame
 Columns: [year, title, score, passing, imdb_id, imdb_link]
 Index: [], Empty DataFrame
 Columns: [year, title, score, passing, imdb_id, imdb_link]
 Index: [], Empty DataFrame
 Columns: [year, title, score, passing, imdb_id, imdb_link]
 Index: [], Empty DataFrame
 Columns: [year, title, score, passing, imdb_id, imdb_link]
 Index: [], Empty DataFrame
 Columns: [year, title, score, passing, imdb_id, imdb_link]
 Index: [], Empty DataFrame
 Columns: [year, title, score, passing, imdb_id, imdb_link]
 Index: []]

In [41]:
bechdel_df.loc[bechdel_df.title == 'The Pervert\'s Guide to Ideology']

Unnamed: 0,year,title,score,passing,imdb_id,imdb_link


In [42]:
# drop writerName and writerGender from movies_200 df
movies_2011_and_beyond.drop(columns=['writerName', 'writerGender'], axis=1, inplace=True)

In [43]:
movies_2011_and_beyond.head()

Unnamed: 0,movieTitle,directorName,directorGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId
0,Fast Five,Neal H. Moritz,male,Chris Morgan,male,Justin Lin,male,Vin Diesel,male,,626100000.0,tt1596343
1,Albert Nobbs,Rodrigo García,male,Glenn Close,female,,,Bonnie Curtis,female,,,tt1602098
2,Albert Nobbs,Rodrigo García,male,George Moore,male,,,Bonnie Curtis,female,,,tt1602098
3,300: Rise of an Empire,Noam Murro,male,Kurt Johnstad,male,,,Thomas Tull,male,110000000.0,337580051.0,tt1253863
4,Fast Five,Neal H. Moritz,male,Chris Morgan,male,Justin Lin,male,Neal H. Moritz,male,,626100000.0,tt1596343


#### Convert gender columns to a boolean `FTNB` column to include cis and transgender females, transgender males, and non-binary people
* Need to be able to drop duplicate rows so that there is only one row for each movie
* Each movie row would indicate whether one of its directors, writers, or producers is of an underrepresented gender
* Would like an additional column for presence of underrepresented gender in ANY of those listed positions

In [44]:
# test subset of dataframe - will use index slicing for sample df 
movies_2011_and_beyond.loc[movies_2011_and_beyond.directorGender == 'transgender male']

Unnamed: 0,movieTitle,directorName,directorGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId
14917,Strong Island,Yance Ford,transgender male,,,,,,,,,tt5873150
15646,A Kid Like Jake,Silas Howard,transgender male,,,,,,,,,tt6884200


In [47]:
test_indices = []
test_indices.append(14917)
test_indices.append(15646)

In [48]:
test_indices

[14917, 15646]

In [49]:
movies_2011_and_beyond.loc[movies_2011_and_beyond.screenwriterGender == 'non-binary']

Unnamed: 0,movieTitle,directorName,directorGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId
3747,Afternoon Delight,Jill Soloway,non-binary,Jill Soloway,non-binary,,,Jen Chaiken,female,,,tt2312890
3750,Afternoon Delight,Jill Soloway,non-binary,Jill Soloway,non-binary,,,Sebastian Dungan,male,,,tt2312890
11491,My Prairie Home,Chelsea McMullan,female,Rae Spoon,non-binary,,,,,,,tt3203462


In [50]:
test_indices.append(3747)
test_indices.append(11491)

In [65]:
# movies_2011_and_beyond.loc[movies_2011_and_beyond.producerGender == 'transgender female']

In [52]:
# the list above contains movies with transgender females, females, and males, so can use these for test df
test_indices.append(8957)
test_indices.append(9060)
test_indices.append(8961)
test_indices.append(7410)

In [53]:
test_indices

[14917, 15646, 3747, 11491, 8957, 9060, 8961, 7410]

In [56]:
movies_2011_and_beyond[1000:1200]

Unnamed: 0,movieTitle,directorName,directorGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId
1000,Thor,Kenneth Branagh,male,Zack Stentz,male,Stan Lee,male,Kevin Feige,male,150000000.0,8.531648e+06,tt0800369
1001,Thor,Kenneth Branagh,male,Larry Lieber,male,Patricia Whitcher,female,Kevin Feige,male,150000000.0,8.531648e+06,tt0800369
1002,Thor,Kenneth Branagh,male,Zack Stentz,male,Patricia Whitcher,female,Kevin Feige,male,150000000.0,8.531648e+06,tt0800369
1003,Thor,Kenneth Branagh,male,Don Payne,male,Stan Lee,male,Kevin Feige,male,150000000.0,8.531648e+06,tt0800369
1004,Thor,Kenneth Branagh,male,Mark Protosevich,male,Louis D'Esposito,male,Kevin Feige,male,150000000.0,8.531648e+06,tt0800369
1005,Thor,Kenneth Branagh,male,Mark Protosevich,male,Alan Fine,male,Kevin Feige,male,150000000.0,8.531648e+06,tt0800369
1006,Thor,Kenneth Branagh,male,Don Payne,male,Patricia Whitcher,female,Kevin Feige,male,150000000.0,8.531648e+06,tt0800369
1007,Thor,Kenneth Branagh,male,Mark Protosevich,male,David Maisel,male,Kevin Feige,male,150000000.0,8.531648e+06,tt0800369
1008,Thor,Kenneth Branagh,male,Jack Kirby,male,Louis D'Esposito,male,Kevin Feige,male,150000000.0,8.531648e+06,tt0800369
1009,Thor,Kenneth Branagh,male,Jack Kirby,male,Alan Fine,male,Kevin Feige,male,150000000.0,8.531648e+06,tt0800369


In [59]:
# check all instances of Fast Five, Get the Gringo, and Girl Walks into a Bar 

In [60]:
movies_2011_and_beyond.loc[movies_2011_and_beyond.movieTitle == 'Fast Five']

Unnamed: 0,movieTitle,directorName,directorGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId
0,Fast Five,Neal H. Moritz,male,Chris Morgan,male,Justin Lin,male,Vin Diesel,male,,626100000.0,tt1596343
4,Fast Five,Neal H. Moritz,male,Chris Morgan,male,Justin Lin,male,Neal H. Moritz,male,,626100000.0,tt1596343
5,Fast Five,Justin Lin,male,Chris Morgan,male,Justin Lin,male,Vin Diesel,male,,626100000.0,tt1596343
9,Fast Five,Justin Lin,male,Chris Morgan,male,Justin Lin,male,Neal H. Moritz,male,,626100000.0,tt1596343
18,Fast Five,Chris Morgan,male,Chris Morgan,male,Justin Lin,male,Vin Diesel,male,,626100000.0,tt1596343
19,Fast Five,Vin Diesel,male,Chris Morgan,male,Justin Lin,male,Vin Diesel,male,,626100000.0,tt1596343
21,Fast Five,Chris Morgan,male,Chris Morgan,male,Justin Lin,male,Neal H. Moritz,male,,626100000.0,tt1596343
22,Fast Five,Vin Diesel,male,Chris Morgan,male,Justin Lin,male,Neal H. Moritz,male,,626100000.0,tt1596343
27,Fast Five,Gary Scott Thompson,male,Chris Morgan,male,Justin Lin,male,Vin Diesel,male,,626100000.0,tt1596343
34,Fast Five,Gary Scott Thompson,male,Chris Morgan,male,Justin Lin,male,Neal H. Moritz,male,,626100000.0,tt1596343


In [61]:
movies_2011_and_beyond.loc[movies_2011_and_beyond.movieTitle == 'Get the Gringo']

Unnamed: 0,movieTitle,directorName,directorGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId
11,Get the Gringo,Adrian Grunberg,male,Mel Gibson,male,,,Mel Gibson,male,,,tt1567609
25,Get the Gringo,Adrian Grunberg,male,Mel Gibson,male,,,Bruce Davey,male,,,tt1567609


In [62]:
# there is a third producer for Get the Gringo - Stacy Perskie, cisgender male - so this movie would still be 
# good to use for test

In [63]:
movies_2011_and_beyond.loc[movies_2011_and_beyond.movieTitle == 'Girl Walks into a Bar']

Unnamed: 0,movieTitle,directorName,directorGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId
1181,Girl Walks into a Bar,Sebastian Gutierrez,male,Sebastian Gutierrez,male,,,Steve Bing,male,,,tt1682246


In [None]:
# Girl Walks into a Bar is edited by female Lisa Bromwell, but all the other producers are male
# will have to add other roles like film editor, cinematographer, etc as later wish list for project

In [64]:
# add some movies with all cisgender males to test df
test_indices.append(0) # Fast Five
test_indices.append(18) # Fast Five
test_indices.append(11) # Get the Gringo
test_indices.append(25) # Get the Gringo
test_indices.append(1181) # Girl Walks into a Bar

In [66]:
gender_columns = ['directorGender', 'screenwriterGender', 'executiveproducerGender', 'producerGender']
gender_values = ['female', 'male', 'non-binary', 'transgender female', 'transgender male']

In [67]:
test_indices

[14917, 15646, 3747, 11491, 8957, 9060, 8961, 7410, 0, 18, 11, 25, 1181]

In [68]:
test_df = movies_2011_and_beyond.iloc[test_indices]

In [69]:
test_df

Unnamed: 0,movieTitle,directorName,directorGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId
14917,Strong Island,Yance Ford,transgender male,,,,,,,,,tt5873150
15646,A Kid Like Jake,Silas Howard,transgender male,,,,,,,,,tt6884200
3747,Afternoon Delight,Jill Soloway,non-binary,Jill Soloway,non-binary,,,Jen Chaiken,female,,,tt2312890
11491,My Prairie Home,Chelsea McMullan,female,Rae Spoon,non-binary,,,,,,,tt3203462
8957,Cloud Atlas,Lilly Wachowski,transgender female,Lilly Wachowski,female,Pearry Teo,male,Lana Wachowski,transgender female,,,tt1371111
9060,Cloud Atlas,Lilly Wachowski,female,Lilly Wachowski,transgender female,Pearry Teo,male,Lilly Wachowski,transgender female,,,tt1371111
8961,Cloud Atlas,Tom Tykwer,male,Lilly Wachowski,female,Pearry Teo,male,Lilly Wachowski,transgender female,,,tt1371111
7410,Turning,Anohni,transgender female,Anohni,transgender female,,,Anohni,transgender female,,,tt2219894
0,Fast Five,Neal H. Moritz,male,Chris Morgan,male,Justin Lin,male,Vin Diesel,male,,626100000.0,tt1596343
18,Fast Five,Chris Morgan,male,Chris Morgan,male,Justin Lin,male,Vin Diesel,male,,626100000.0,tt1596343


In [70]:
test_df.directorGender.str.contains('female')

14917    False
15646    False
3747     False
11491     True
8957      True
9060      True
8961     False
7410      True
0        False
18       False
11       False
25       False
1181     False
Name: directorGender, dtype: bool

In [75]:
for idx, value in enumerate(test_df.directorGender.str.contains('female')):
    print(idx, value)

0 False
1 False
2 False
3 True
4 True
5 True
6 False
7 True
8 False
9 False
10 False
11 False
12 False


In [76]:
# could possibly use enumerate, depending on how this deals with nan values
test_df.executiveproducerGender.str.contains('male')

14917     NaN
15646     NaN
3747      NaN
11491     NaN
8957     True
9060     True
8961     True
7410      NaN
0        True
18       True
11        NaN
25        NaN
1181      NaN
Name: executiveproducerGender, dtype: object

In [79]:
for idx, value in enumerate(test_df.directorGender.str.contains('female|non-binary|transgender')):
    if value:
        print(idx)

0
1
2
3
4
5
7


In [82]:
director_fnbt = [1 if val == True else 0 for val in test_df.directorGender.str.contains('female|non-binary|transgender')]
                                                              

In [83]:
director_fnbt

[1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0]

In [84]:
test_df.loc['director_fnbt'] = director_fnbt

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [85]:
test_df

Unnamed: 0,movieTitle,directorName,directorGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId,director_fnbt
14917,Strong Island,Yance Ford,transgender male,,,,,,,,,tt5873150,1
15646,A Kid Like Jake,Silas Howard,transgender male,,,,,,,,,tt6884200,1
3747,Afternoon Delight,Jill Soloway,non-binary,Jill Soloway,non-binary,,,Jen Chaiken,female,,,tt2312890,1
11491,My Prairie Home,Chelsea McMullan,female,Rae Spoon,non-binary,,,,,,,tt3203462,1
8957,Cloud Atlas,Lilly Wachowski,transgender female,Lilly Wachowski,female,Pearry Teo,male,Lana Wachowski,transgender female,,,tt1371111,1
9060,Cloud Atlas,Lilly Wachowski,female,Lilly Wachowski,transgender female,Pearry Teo,male,Lilly Wachowski,transgender female,,,tt1371111,1
8961,Cloud Atlas,Tom Tykwer,male,Lilly Wachowski,female,Pearry Teo,male,Lilly Wachowski,transgender female,,,tt1371111,0
7410,Turning,Anohni,transgender female,Anohni,transgender female,,,Anohni,transgender female,,,tt2219894,1
0,Fast Five,Neal H. Moritz,male,Chris Morgan,male,Justin Lin,male,Vin Diesel,male,,626100000.0,tt1596343,0
18,Fast Five,Chris Morgan,male,Chris Morgan,male,Justin Lin,male,Vin Diesel,male,,626100000.0,tt1596343,0


In [89]:
test_df.drop('new_col', axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [90]:
new_writer_values = [1 if val == True else 0 for val in test_df.screenwriterGender.str.contains('female|non-binary|transgender')]

In [92]:
test_df['screenwriter_fnbt'] = new_writer_values

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [93]:
test_df

Unnamed: 0,movieTitle,directorName,directorGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId,director_fnbt
14917,Strong Island,Yance Ford,transgender male,,,,,,,,,tt5873150,1
15646,A Kid Like Jake,Silas Howard,transgender male,,,,,,,,,tt6884200,1
3747,Afternoon Delight,Jill Soloway,non-binary,Jill Soloway,non-binary,,,Jen Chaiken,female,,,tt2312890,1
11491,My Prairie Home,Chelsea McMullan,female,Rae Spoon,non-binary,,,,,,,tt3203462,1
8957,Cloud Atlas,Lilly Wachowski,transgender female,Lilly Wachowski,female,Pearry Teo,male,Lana Wachowski,transgender female,,,tt1371111,1
9060,Cloud Atlas,Lilly Wachowski,female,Lilly Wachowski,transgender female,Pearry Teo,male,Lilly Wachowski,transgender female,,,tt1371111,1
8961,Cloud Atlas,Tom Tykwer,male,Lilly Wachowski,female,Pearry Teo,male,Lilly Wachowski,transgender female,,,tt1371111,0
7410,Turning,Anohni,transgender female,Anohni,transgender female,,,Anohni,transgender female,,,tt2219894,1
0,Fast Five,Neal H. Moritz,male,Chris Morgan,male,Justin Lin,male,Vin Diesel,male,,626100000.0,tt1596343,0
18,Fast Five,Chris Morgan,male,Chris Morgan,male,Justin Lin,male,Vin Diesel,male,,626100000.0,tt1596343,0


In [94]:
# omg what is going on

In [95]:
test_df.drop('screenwriter_fnbt', axis=0, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [96]:
test_df

Unnamed: 0,movieTitle,directorName,directorGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId,director_fnbt
14917,Strong Island,Yance Ford,transgender male,,,,,,,,,tt5873150,1
15646,A Kid Like Jake,Silas Howard,transgender male,,,,,,,,,tt6884200,1
3747,Afternoon Delight,Jill Soloway,non-binary,Jill Soloway,non-binary,,,Jen Chaiken,female,,,tt2312890,1
11491,My Prairie Home,Chelsea McMullan,female,Rae Spoon,non-binary,,,,,,,tt3203462,1
8957,Cloud Atlas,Lilly Wachowski,transgender female,Lilly Wachowski,female,Pearry Teo,male,Lana Wachowski,transgender female,,,tt1371111,1
9060,Cloud Atlas,Lilly Wachowski,female,Lilly Wachowski,transgender female,Pearry Teo,male,Lilly Wachowski,transgender female,,,tt1371111,1
8961,Cloud Atlas,Tom Tykwer,male,Lilly Wachowski,female,Pearry Teo,male,Lilly Wachowski,transgender female,,,tt1371111,0
7410,Turning,Anohni,transgender female,Anohni,transgender female,,,Anohni,transgender female,,,tt2219894,1
0,Fast Five,Neal H. Moritz,male,Chris Morgan,male,Justin Lin,male,Vin Diesel,male,,626100000.0,tt1596343,0
18,Fast Five,Chris Morgan,male,Chris Morgan,male,Justin Lin,male,Vin Diesel,male,,626100000.0,tt1596343,0


In [97]:
new_writer_values

[0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]

In [98]:
test_df['writer_fnbt'] = new_writer_values

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [99]:
test_df

Unnamed: 0,movieTitle,directorName,directorGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId,director_fnbt,writer_fnbt
14917,Strong Island,Yance Ford,transgender male,,,,,,,,,tt5873150,1,0
15646,A Kid Like Jake,Silas Howard,transgender male,,,,,,,,,tt6884200,1,0
3747,Afternoon Delight,Jill Soloway,non-binary,Jill Soloway,non-binary,,,Jen Chaiken,female,,,tt2312890,1,1
11491,My Prairie Home,Chelsea McMullan,female,Rae Spoon,non-binary,,,,,,,tt3203462,1,1
8957,Cloud Atlas,Lilly Wachowski,transgender female,Lilly Wachowski,female,Pearry Teo,male,Lana Wachowski,transgender female,,,tt1371111,1,1
9060,Cloud Atlas,Lilly Wachowski,female,Lilly Wachowski,transgender female,Pearry Teo,male,Lilly Wachowski,transgender female,,,tt1371111,1,1
8961,Cloud Atlas,Tom Tykwer,male,Lilly Wachowski,female,Pearry Teo,male,Lilly Wachowski,transgender female,,,tt1371111,0,1
7410,Turning,Anohni,transgender female,Anohni,transgender female,,,Anohni,transgender female,,,tt2219894,1,1
0,Fast Five,Neal H. Moritz,male,Chris Morgan,male,Justin Lin,male,Vin Diesel,male,,626100000.0,tt1596343,0,0
18,Fast Five,Chris Morgan,male,Chris Morgan,male,Justin Lin,male,Vin Diesel,male,,626100000.0,tt1596343,0,0


In [103]:
new_exec_values = [1 if val == True else 0 for val in test_df.executiveproducerGender.str.contains('female|non-binary|transgender')]
new_producer_values = [1 if val == True else 0 for val in test_df.producerGender.str.contains('female|non-binary|transgender')]

In [104]:
test_df['exec_fnbt'] = new_exec_values
test_df['producer_fnbt'] = new_producer_values

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [105]:
test_df

Unnamed: 0,movieTitle,directorName,directorGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId,director_fnbt,writer_fnbt,exec_fnbt,producer_fnbt
14917,Strong Island,Yance Ford,transgender male,,,,,,,,,tt5873150,1,0,0,0
15646,A Kid Like Jake,Silas Howard,transgender male,,,,,,,,,tt6884200,1,0,0,0
3747,Afternoon Delight,Jill Soloway,non-binary,Jill Soloway,non-binary,,,Jen Chaiken,female,,,tt2312890,1,1,0,1
11491,My Prairie Home,Chelsea McMullan,female,Rae Spoon,non-binary,,,,,,,tt3203462,1,1,0,0
8957,Cloud Atlas,Lilly Wachowski,transgender female,Lilly Wachowski,female,Pearry Teo,male,Lana Wachowski,transgender female,,,tt1371111,1,1,0,1
9060,Cloud Atlas,Lilly Wachowski,female,Lilly Wachowski,transgender female,Pearry Teo,male,Lilly Wachowski,transgender female,,,tt1371111,1,1,0,1
8961,Cloud Atlas,Tom Tykwer,male,Lilly Wachowski,female,Pearry Teo,male,Lilly Wachowski,transgender female,,,tt1371111,0,1,0,1
7410,Turning,Anohni,transgender female,Anohni,transgender female,,,Anohni,transgender female,,,tt2219894,1,1,0,1
0,Fast Five,Neal H. Moritz,male,Chris Morgan,male,Justin Lin,male,Vin Diesel,male,,626100000.0,tt1596343,0,0,0,0
18,Fast Five,Chris Morgan,male,Chris Morgan,male,Justin Lin,male,Vin Diesel,male,,626100000.0,tt1596343,0,0,0,0


In [106]:
# drop all those other columns
cols_to_drop = ['directorName', 'directorGender', 'screenwriterName', 'screenwriterGender', 'executiveproducerName',\
               'executiveproducerGender', 'producerName', 'producerGender']
test_df.drop(columns=cols_to_drop, axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [107]:
test_df

Unnamed: 0,movieTitle,budgetAmount,boxofficeAmount,imdbId,director_fnbt,writer_fnbt,exec_fnbt,producer_fnbt
14917,Strong Island,,,tt5873150,1,0,0,0
15646,A Kid Like Jake,,,tt6884200,1,0,0,0
3747,Afternoon Delight,,,tt2312890,1,1,0,1
11491,My Prairie Home,,,tt3203462,1,1,0,0
8957,Cloud Atlas,,,tt1371111,1,1,0,1
9060,Cloud Atlas,,,tt1371111,1,1,0,1
8961,Cloud Atlas,,,tt1371111,0,1,0,1
7410,Turning,,,tt2219894,1,1,0,1
0,Fast Five,,626100000.0,tt1596343,0,0,0,0
18,Fast Five,,626100000.0,tt1596343,0,0,0,0


In [108]:
test_df['overall_fnbt'] = test_df.director_fnbt + test_df.writer_fnbt + test_df.exec_fnbt + test_df.producer_fnbt

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [109]:
test_df

Unnamed: 0,movieTitle,budgetAmount,boxofficeAmount,imdbId,director_fnbt,writer_fnbt,exec_fnbt,producer_fnbt,overall_fnbt
14917,Strong Island,,,tt5873150,1,0,0,0,1
15646,A Kid Like Jake,,,tt6884200,1,0,0,0,1
3747,Afternoon Delight,,,tt2312890,1,1,0,1,3
11491,My Prairie Home,,,tt3203462,1,1,0,0,2
8957,Cloud Atlas,,,tt1371111,1,1,0,1,3
9060,Cloud Atlas,,,tt1371111,1,1,0,1,3
8961,Cloud Atlas,,,tt1371111,0,1,0,1,2
7410,Turning,,,tt2219894,1,1,0,1,3
0,Fast Five,,626100000.0,tt1596343,0,0,0,0,0
18,Fast Five,,626100000.0,tt1596343,0,0,0,0,0


In [110]:
# now all column values for each movie should be identical and can drop duplicates
test_df.drop_duplicates(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [111]:
test_df

Unnamed: 0,movieTitle,budgetAmount,boxofficeAmount,imdbId,director_fnbt,writer_fnbt,exec_fnbt,producer_fnbt,overall_fnbt
14917,Strong Island,,,tt5873150,1,0,0,0,1
15646,A Kid Like Jake,,,tt6884200,1,0,0,0,1
3747,Afternoon Delight,,,tt2312890,1,1,0,1,3
11491,My Prairie Home,,,tt3203462,1,1,0,0,2
8957,Cloud Atlas,,,tt1371111,1,1,0,1,3
8961,Cloud Atlas,,,tt1371111,0,1,0,1,2
7410,Turning,,,tt2219894,1,1,0,1,3
0,Fast Five,,626100000.0,tt1596343,0,0,0,0,0
11,Get the Gringo,,,tt1567609,0,0,0,0,0
1181,Girl Walks into a Bar,,,tt1682246,0,0,0,0,0


In [113]:
# didn't work for Cloud Atlas because of the gender counts. . . would like to keep the row with the highest overall_fnbt
test_df.sort_values('overall_fnbt', ascending=False).drop_duplicates('movieTitle').sort_index()

Unnamed: 0,movieTitle,budgetAmount,boxofficeAmount,imdbId,director_fnbt,writer_fnbt,exec_fnbt,producer_fnbt,overall_fnbt
0,Fast Five,,626100000.0,tt1596343,0,0,0,0,0
11,Get the Gringo,,,tt1567609,0,0,0,0,0
1181,Girl Walks into a Bar,,,tt1682246,0,0,0,0,0
3747,Afternoon Delight,,,tt2312890,1,1,0,1,3
7410,Turning,,,tt2219894,1,1,0,1,3
8957,Cloud Atlas,,,tt1371111,1,1,0,1,3
11491,My Prairie Home,,,tt3203462,1,1,0,0,2
14917,Strong Island,,,tt5873150,1,0,0,0,1
15646,A Kid Like Jake,,,tt6884200,1,0,0,0,1


In [114]:
# try new sample df, use function to do all that stuff that I just did

In [115]:
test2_df = movies_2011_and_beyond.sample(n=20)

In [116]:
test2_df

Unnamed: 0,movieTitle,directorName,directorGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId
10933,Stormhouse,Dan Turner,male,Jason Arnopp,male,,,,,,,tt1864494
522,Bullet to the Head,Walter Hill,male,Walter Hill,male,,,Joel Silver,male,55000000.0,21947209.0,tt1308729
2969,A Star Is Born,Bradley Cooper,male,Eric Roth,male,,,Jon Peters,male,36000000.0,403588149.0,tt1517451
9454,The Man from Earth: Holocene,Richard Schenkman,male,Eric D. Wilkinson,male,,,Richard Schenkman,male,,,tt5770864
703,The Paperboy,Lee Daniels,male,Lee Daniels,male,Michael Benaroya,male,Michael Benaroya,male,,,tt1496422
4793,Mad Max: Fury Road,George Miller,male,Brendan McCarthy,male,Bruce Berman,male,Christopher DeFaria,male,150000000.0,378858340.0,tt1392190
1512,We're the Millers,Rawson Marshall Thurber,male,Richard Rinaldi,male,Toby Emmerich,male,Happy Walters,male,,270000000.0,tt1723121
5721,The Pyramid,Grégory Levasseur,male,,,,,Mark Canton,male,,,tt2799166
7598,Dr. Cabbie,Jean-François Pouliot,male,Vinay Virmani,male,,,Salman Khan,male,,,tt2831404
3641,The Gunman,Peter McAleese,male,Don Macpherson,male,,,Jean-Patrick Manchette,male,,,tt2515034


In [117]:
# new boolean gender columns
# drop name and other gender columns
# add overall gender column
# sort by descending overall values
# drop duplicates, keeping first
# will have to check writer columns in all the other dfs!

In [119]:
clean_up(test2_df)

Unnamed: 0,movieTitle,directorName,directorGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId,director_fnbt,writer_fnbt,exec_fnbt,producer_fnbt,overall_fnbt
10933,Stormhouse,Dan Turner,male,Jason Arnopp,male,,,,,,,tt1864494,0,0,0,0,0
522,Bullet to the Head,Walter Hill,male,Walter Hill,male,,,Joel Silver,male,55000000.0,21947209.0,tt1308729,0,0,0,0,0
2969,A Star Is Born,Bradley Cooper,male,Eric Roth,male,,,Jon Peters,male,36000000.0,403588149.0,tt1517451,0,0,0,0,0
9454,The Man from Earth: Holocene,Richard Schenkman,male,Eric D. Wilkinson,male,,,Richard Schenkman,male,,,tt5770864,0,0,0,0,0
703,The Paperboy,Lee Daniels,male,Lee Daniels,male,Michael Benaroya,male,Michael Benaroya,male,,,tt1496422,0,0,0,0,0
4793,Mad Max: Fury Road,George Miller,male,Brendan McCarthy,male,Bruce Berman,male,Christopher DeFaria,male,150000000.0,378858340.0,tt1392190,0,0,0,0,0
1512,We're the Millers,Rawson Marshall Thurber,male,Richard Rinaldi,male,Toby Emmerich,male,Happy Walters,male,,270000000.0,tt1723121,0,0,0,0,0
5721,The Pyramid,Grégory Levasseur,male,,,,,Mark Canton,male,,,tt2799166,0,0,0,0,0
7598,Dr. Cabbie,Jean-François Pouliot,male,Vinay Virmani,male,,,Salman Khan,male,,,tt2831404,0,0,0,0,0
3641,The Gunman,Peter McAleese,male,Don Macpherson,male,,,Jean-Patrick Manchette,male,,,tt2515034,0,0,0,0,0


In [None]:
# oops, forgot to drop cols, and should drop inplace?

In [129]:
# will only be using the datasets I made using wikidata queries, so all column names will be the same
def clean_up(sparql_df):
    
    new_director_values = [1 if val == True else 0 for val in sparql_df.directorGender.str.contains('female|non-binary|transgender')]
    new_writer_values = [1 if val == True else 0 for val in sparql_df.screenwriterGender.str.contains('female|non-binary|transgender')]
    new_exec_values = [1 if val == True else 0 for val in sparql_df.executiveproducerGender.str.contains('female|non-binary|transgender')]
    new_producer_values = [1 if val == True else 0 for val in sparql_df.producerGender.str.contains('female|non-binary|transgender')]
    
    sparql_df['director_fnbt'] = new_director_values
    sparql_df['writer_fnbt'] = new_writer_values
    sparql_df['exec_fnbt'] = new_exec_values
    sparql_df['producer_fnbt'] = new_producer_values
    
    sparql_df['overall_fnbt'] = sparql_df['director_fnbt'] + sparql_df['writer_fnbt'] + sparql_df['exec_fnbt'] + sparql_df['producer_fnbt']
    
    cols_to_drop = ['directorName', 'directorGender', 'screenwriterName', 'screenwriterGender', 'executiveproducerName',\
               'executiveproducerGender', 'producerName', 'producerGender']
    sparql_df.drop(columns=cols_to_drop, axis=1, inplace=True)
    sparql_df.sort_values('overall_fnbt', ascending=False).drop_duplicates('movieTitle', inplace=True)
    sparql_df.sort_index(inplace=True)
    return sparql_df


In [121]:
test3_df = movies_2011_and_beyond.sample(n=20)

In [122]:
test3_df

Unnamed: 0,movieTitle,directorName,directorGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId
1448,The Grand Budapest Hotel,Wes Anderson,male,Hugo Guinness,male,Carl Woebcken,male,Steven M. Rales,male,23000000.0,174801324.0,tt2278388
14809,My Friend Dahmer,Marc Meyers,male,,,,,,,,,tt2291540
16734,Them That Follow,,,,,,,,,,,tt7313348
13026,November Criminals,Sacha Gervasi,male,Steven Knight,male,,,,,,,tt3266284
15938,Eaters,,,,,,,,,,,tt2109127
3739,Le beau risque,Mark Penney,male,Mark Penney,male,,,Mark Penney,male,,,tt2749296
59,Comedown,Menhaj Huda,male,,,,,Gareth Wiley,male,,,tt1376168
3111,The Thompsons,Phil Flores,male,Phil Flores,male,Pierre David,male,Travis Stevens,male,,,tt1831806
1213,Greedy Lying Bastards,Craig Rosebraugh,male,,,,,Marianna Yarovskaya,female,,,tt2069784
3093,The Thompsons,Phil Flores,male,Phil Flores,male,Michael Riley,male,Travis Stevens,male,,,tt1831806


In [126]:
test4_df = movies_2011_and_beyond.sample(n=20)

In [127]:
test4_df

Unnamed: 0,movieTitle,directorName,directorGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId
12571,Mendelsohn's Incessant Visions,Duki Dror,male,,,,,,,,,tt2007403
16176,Newly Single,Adam Christian Clark,male,Adam Christian Clark,male,,,,,,,tt5515814
5224,Chuck,Philippe Falardeau,male,Jeff Feuerzeig,male,Avi Lerner,male,Christa Campbell,female,,,tt1610525
167,The Inbetweeners Movie,Ben Palmer,male,Damon Beesley,male,Iain Morris,male,Christopher Young,male,,,tt1716772
5277,Q14955181,Charlotte Brändström,female,Joakim Hansson,male,,,Joakim Hansson,male,,,tt2279786
9066,Cloud Atlas,Lana Wachowski,transgender female,Tom Tykwer,male,Pearry Teo,male,Tom Tykwer,male,,,tt1371111
9201,The Hummingbird Project,Kim Nguyen,male,Kim Nguyen,male,,,Pierre Even,male,,,tt6866224
8379,"Two Soft Things, Two Hard Things",Michael Yerxa,male,Mark Kenneth Woods,male,,,Mark Kenneth Woods,male,,,tt5503490
3200,The Cold Light of Day,Kevin Mann,male,Scott Wiper,male,Steven Zaillian,male,Kevin Mann,male,,10968746.0,tt1366365
1394,The Eye of the Storm,Fred Schepisi,male,Judy Morris,female,,,Gregory J. Read,male,,,tt1600207


In [128]:
clean_up(test4_df)

Unnamed: 0,movieTitle,budgetAmount,boxofficeAmount,imdbId,director_fnbt,writer_fnbt,exec_fnbt,producer_fnbt,overall_fnbt
12571,Mendelsohn's Incessant Visions,,,tt2007403,0,0,0,0,0
16176,Newly Single,,,tt5515814,0,0,0,0,0
5224,Chuck,,,tt1610525,0,0,0,1,1
167,The Inbetweeners Movie,,,tt1716772,0,0,0,0,0
5277,Q14955181,,,tt2279786,1,0,0,0,1
9066,Cloud Atlas,,,tt1371111,1,0,0,0,1
9201,The Hummingbird Project,,,tt6866224,0,0,0,0,0
8379,"Two Soft Things, Two Hard Things",,,tt5503490,0,0,0,0,0
3200,The Cold Light of Day,,10968746.0,tt1366365,0,0,0,0,0
1394,The Eye of the Storm,,,tt1600207,0,1,0,0,1


In [130]:
# lol okay let's try again
test5_df = movies_2011_and_beyond.sample(n=20)

In [131]:
test5_df

Unnamed: 0,movieTitle,directorName,directorGender,screenwriterName,screenwriterGender,executiveproducerName,executiveproducerGender,producerName,producerGender,budgetAmount,boxofficeAmount,imdbId
4748,Sin City: A Dame to Kill For,Robert Rodriguez,male,Robert Rodriguez,male,Frank Miller,male,Alexander Rodnyansky,male,,,tt0458481
14623,The Three Christs of Ypsilanti,Jon Avnet,male,,,,,,,,,tt5706370
16496,Farmageddon: A Shaun the Sheep Movie,Richard Goleszowski,male,,,,,,,,,tt6193408
12027,The Inside,Eoin Macken,male,Eoin Macken,male,,,,,,,tt1612083
2453,Just Go with It,Dennis Dugan,male,Allan Loeb,male,,,Jack Giarraputo,male,,214900000.0,tt1564367
6074,The Big Short,Adam McKay,male,Charles Randolph,male,,,Brad Pitt,male,28000000.0,133346500.0,tt1596363
5806,Split,M. Night Shyamalan,male,M. Night Shyamalan,male,,,M. Night Shyamalan,male,9000000.0,269995800.0,tt4972582
13918,Among the Believers,,,,,,,,,,,tt4504626
15066,Fun Mom Dinner,,,,,Paul Rudd,male,,,,,tt5829040
3545,Minions,Kyle Balda,male,Brian Lynch,male,Chris Renaud,male,Chris Meledandri,male,74000000.0,1159398000.0,tt2293640


In [132]:
clean_up(test5_df)

Unnamed: 0,movieTitle,budgetAmount,boxofficeAmount,imdbId,director_fnbt,writer_fnbt,exec_fnbt,producer_fnbt,overall_fnbt
171,Real Steel,,299300000.0,tt0433035,0,0,0,0,0
1846,In the Name of the King 2: Two Worlds,,,tt1767319,0,0,0,0,0
2453,Just Go with It,,214900000.0,tt1564367,0,0,0,0,0
3545,Minions,74000000.0,1159398000.0,tt2293640,0,0,0,0,0
4748,Sin City: A Dame to Kill For,,,tt0458481,0,0,0,0,0
5764,Star Wars: The Last Jedi,200000000.0,1332540000.0,tt2527336,0,0,0,0,0
5806,Split,9000000.0,269995800.0,tt4972582,0,0,0,0,0
6074,The Big Short,28000000.0,133346500.0,tt1596363,0,0,0,0,0
7961,Belgrade,,,tt6316906,0,0,0,0,0
8392,Contemporary Color,,,tt5258306,0,0,0,0,0


In [133]:
bechdel_df.loc[bechdel_df.title == 'Star Wars: The Last Jedi']

Unnamed: 0,year,title,score,passing,imdb_id,imdb_link
349,2017,Star Wars: The Last Jedi,3,1,tt2527336,http://us.imdb.com/title/tt2527336/


In [134]:
# The Last Jedi should be passing once all rows are considered

In [135]:
cleaned_movies_2011_and_beyond = clean_up(movies_2011_and_beyond)

In [136]:
cleaned_movies_2011_and_beyond.head()

Unnamed: 0,movieTitle,budgetAmount,boxofficeAmount,imdbId,director_fnbt,writer_fnbt,exec_fnbt,producer_fnbt,overall_fnbt
0,Fast Five,,626100000.0,tt1596343,0,0,0,0,0
1,Albert Nobbs,,,tt1602098,0,1,0,1,2
2,Albert Nobbs,,,tt1602098,0,0,0,1,1
3,300: Rise of an Empire,110000000.0,337580051.0,tt1253863,0,0,0,0,0
4,Fast Five,,626100000.0,tt1596343,0,0,0,0,0


In [137]:
cleaned_movies_2011_and_beyond.loc[cleaned_movies_2011_and_beyond.movieTitle == 'Star Wars: The Last Jedi']

Unnamed: 0,movieTitle,budgetAmount,boxofficeAmount,imdbId,director_fnbt,writer_fnbt,exec_fnbt,producer_fnbt,overall_fnbt
4578,Star Wars: The Last Jedi,200000000.0,42577970.0,tt2527336,0,0,0,0,0
4579,Star Wars: The Last Jedi,200000000.0,64974040.0,tt2527336,0,0,0,0,0
4580,Star Wars: The Last Jedi,200000000.0,83164500.0,tt2527336,0,0,0,0,0
4581,Star Wars: The Last Jedi,200000000.0,111106200.0,tt2527336,0,0,0,0,0
4582,Star Wars: The Last Jedi,200000000.0,620181400.0,tt2527336,0,0,0,0,0
4583,Star Wars: The Last Jedi,200000000.0,1332540000.0,tt2527336,0,0,0,0,0
4585,Star Wars: The Last Jedi,200000000.0,42577970.0,tt2527336,0,0,0,1,1
4586,Star Wars: The Last Jedi,200000000.0,64974040.0,tt2527336,0,0,0,1,1
4587,Star Wars: The Last Jedi,200000000.0,83164500.0,tt2527336,0,0,0,1,1
4588,Star Wars: The Last Jedi,200000000.0,111106200.0,tt2527336,0,0,0,1,1


In [138]:
movies_2011_and_beyond.head()

Unnamed: 0,movieTitle,budgetAmount,boxofficeAmount,imdbId,director_fnbt,writer_fnbt,exec_fnbt,producer_fnbt,overall_fnbt
0,Fast Five,,626100000.0,tt1596343,0,0,0,0,0
1,Albert Nobbs,,,tt1602098,0,1,0,1,2
2,Albert Nobbs,,,tt1602098,0,0,0,1,1
3,300: Rise of an Empire,110000000.0,337580051.0,tt1253863,0,0,0,0,0
4,Fast Five,,626100000.0,tt1596343,0,0,0,0,0


In [139]:
movies_2011_and_beyond.loc[movies_2011_and_beyond.movieTitle == 'Star Wars: The Last Jedi']

Unnamed: 0,movieTitle,budgetAmount,boxofficeAmount,imdbId,director_fnbt,writer_fnbt,exec_fnbt,producer_fnbt,overall_fnbt
4578,Star Wars: The Last Jedi,200000000.0,42577970.0,tt2527336,0,0,0,0,0
4579,Star Wars: The Last Jedi,200000000.0,64974040.0,tt2527336,0,0,0,0,0
4580,Star Wars: The Last Jedi,200000000.0,83164500.0,tt2527336,0,0,0,0,0
4581,Star Wars: The Last Jedi,200000000.0,111106200.0,tt2527336,0,0,0,0,0
4582,Star Wars: The Last Jedi,200000000.0,620181400.0,tt2527336,0,0,0,0,0
4583,Star Wars: The Last Jedi,200000000.0,1332540000.0,tt2527336,0,0,0,0,0
4585,Star Wars: The Last Jedi,200000000.0,42577970.0,tt2527336,0,0,0,1,1
4586,Star Wars: The Last Jedi,200000000.0,64974040.0,tt2527336,0,0,0,1,1
4587,Star Wars: The Last Jedi,200000000.0,83164500.0,tt2527336,0,0,0,1,1
4588,Star Wars: The Last Jedi,200000000.0,111106200.0,tt2527336,0,0,0,1,1


In [None]:
# NOOOOOOOOOOOOOOOOOOO