In [1]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np

In [2]:
poster = pd.read_csv('poster.csv')
poster = poster.drop(['id','action','year','rating', 'adventure','comedy','crime','drama','fantasy','horror','mystery','romance','sci-fi','short','thriller','animation'], axis=1)
poster.rename(columns={"title": "Film"}, inplace=True)


In [3]:
big_actor_movie = pd.read_csv('actorfilms.csv')  
df_big_actor_movie = big_actor_movie[['Film', 'Actor','Year','Rating']]
df_big_actor_movie

Unnamed: 0,Film,Actor,Year,Rating
0,Ghost Story,Fred Astaire,1981,6.3
1,The Purple Taxi,Fred Astaire,1977,6.6
2,The Amazing Dobermans,Fred Astaire,1976,5.3
3,The Towering Inferno,Fred Astaire,1974,7.0
4,Midas Run,Fred Astaire,1969,4.8
...,...,...,...,...
191868,Jojo Rabbit,Roman Griffin Davis,2019,7.9
191869,Silk Road,Lexi Rabe,2021,5.9
191870,Avengers: Endgame,Lexi Rabe,2019,8.4
191871,Godzilla: King of the Monsters,Lexi Rabe,2019,6.0


In [4]:
big_actors = np.unique(df_big_actor_movie['Actor'])
big_films = np.unique(df_big_actor_movie['Film'])


big_actor_by_film = dict.fromkeys(big_films, [])
big_actor_by_film

for i in range(len(df_big_actor_movie)):
    big_actor_by_film[df_big_actor_movie['Film'][i]] = big_actor_by_film[df_big_actor_movie['Film'][i]] + [df_big_actor_movie['Actor'][i]]

In [5]:
big_actor_by_film_df = pd.DataFrame(big_actor_by_film.items(), columns = ['Film', 'Actor list'])
big_actor_by_film_df.head()

Unnamed: 0,Film,Actor list
0,"!Ay, mi madre!",[Paz Vega]
1,!Hay motivo!,"[Luis Tosar, Marta Etura]"
2,!Three Amigos!,"[Steve Martin, Chevy Chase, Jon Lovitz, Joe Ma..."
3,#Alive,[Park Shin-Hye]
4,#Horror,"[Timothy Hutton, Balthazar Getty, Chloe Sevign..."


In [6]:
df = pd.merge(big_actor_by_film_df, poster, on="Film")
df.head()

Unnamed: 0,Film,Actor list,poster,genre
0,#Alive,[Park Shin-Hye],https://m.media-amazon.com/images/M/MV5BMGNiYj...,"action, drama, horror"
1,#Horror,"[Timothy Hutton, Balthazar Getty, Chloe Sevign...",https://m.media-amazon.com/images/M/MV5BMjM5OD...,"crime, drama, horror"
2,#Iamhere,[Bae Doona],https://m.media-amazon.com/images/M/MV5BZjRhN2...,comedy
3,#NAME?,"[Ray Milland, Robert Montgomery]",https://m.media-amazon.com/images/M/MV5BNzdlZj...,"animation, short, comedy"
4,#REALITYHIGH,"[Kate Walsh, John Michael Higgins, Anne Winter...",https://m.media-amazon.com/images/M/MV5BNDU3Mz...,"comedy, drama, romance"


In [7]:
df = df.drop_duplicates(subset=['Film'])
df = df.reset_index()
df.head()

Unnamed: 0,index,Film,Actor list,poster,genre
0,0,#Alive,[Park Shin-Hye],https://m.media-amazon.com/images/M/MV5BMGNiYj...,"action, drama, horror"
1,1,#Horror,"[Timothy Hutton, Balthazar Getty, Chloe Sevign...",https://m.media-amazon.com/images/M/MV5BMjM5OD...,"crime, drama, horror"
2,2,#Iamhere,[Bae Doona],https://m.media-amazon.com/images/M/MV5BZjRhN2...,comedy
3,3,#NAME?,"[Ray Milland, Robert Montgomery]",https://m.media-amazon.com/images/M/MV5BNzdlZj...,"animation, short, comedy"
4,4,#REALITYHIGH,"[Kate Walsh, John Michael Higgins, Anne Winter...",https://m.media-amazon.com/images/M/MV5BNDU3Mz...,"comedy, drama, romance"


In [8]:
actors = np.unique(df['Actor list'])
genre = np.unique(df['genre'].str.split(', ').sum())
movie = np.unique(df['Film'].str.split(', ').sum())

In [9]:
combined_list = []

for lst in actors:
    combined_list.extend(lst)

unique_list = list(set(combined_list))

In [10]:
unique_list = np.array(unique_list)

In [11]:
unique_list

array(['Folake Olowofoyeku', 'Jonathan Bennett', 'Stephany Jacobsen', ...,
       'Lee Norris', 'Drew Carey', 'Spencer Grammer'], dtype='<U27')

In [12]:
df['Actor_List_id'] = df['Actor list'].apply(lambda x: [np.where(unique_list == i)[0][0] for i in x])

In [13]:
df.head()

Unnamed: 0,index,Film,Actor list,poster,genre,Actor_List_id
0,0,#Alive,[Park Shin-Hye],https://m.media-amazon.com/images/M/MV5BMGNiYj...,"action, drama, horror",[4729]
1,1,#Horror,"[Timothy Hutton, Balthazar Getty, Chloe Sevign...",https://m.media-amazon.com/images/M/MV5BMjM5OD...,"crime, drama, horror","[7298, 8096, 143, 8869, 96, 1270]"
2,2,#Iamhere,[Bae Doona],https://m.media-amazon.com/images/M/MV5BZjRhN2...,comedy,[8483]
3,3,#NAME?,"[Ray Milland, Robert Montgomery]",https://m.media-amazon.com/images/M/MV5BNzdlZj...,"animation, short, comedy","[2387, 2122]"
4,4,#REALITYHIGH,"[Kate Walsh, John Michael Higgins, Anne Winter...",https://m.media-amazon.com/images/M/MV5BNDU3Mz...,"comedy, drama, romance","[152, 4992, 3455, 5439, 4212, 5568]"


In [14]:
adj_matrix = np.zeros((len(unique_list),len(unique_list)))

In [15]:
for i in range(len(df)):
    for j in range(len(df['Actor_List_id'][i])-1):
        for k in range(j+1,len(df['Actor_List_id'][i])-1):
            adj_matrix[df['Actor_List_id'][i][j]][df['Actor_List_id'][i][k]] = adj_matrix[df['Actor_List_id'][i][j]][df['Actor_List_id'][i][k]]+ 1
            adj_matrix[df['Actor_List_id'][i][k]][df['Actor_List_id'][i][j]] = adj_matrix[df['Actor_List_id'][i][k]][df['Actor_List_id'][i][j]]+ 1

In [16]:
adj_matrix[6094][4040]

0.0

In [17]:
filtered_df = df[df['Actor list'].apply(lambda x:"Robert De Niro" in x)]
filtered_df

Unnamed: 0,index,Film,Actor list,poster,genre,Actor_List_id
725,826,American Hustle,"[Robert De Niro, Christian Bale, Anthony Zerbe...",https://m.media-amazon.com/images/M/MV5BMmM4Yz...,"crime, drama","[3679, 6949, 1768, 2624, 4716, 3811, 3082, 893..."
1297,1470,Being Flynn,"[Robert De Niro, Julianne Moore, Lili Taylor, ...",https://m.media-amazon.com/images/M/MV5BMjI1OT...,drama,"[3679, 4063, 5060, 7561, 1228, 8124, 1802, 389..."
2812,3271,Dirty Grandpa,"[Robert De Niro, Danny Glover, Dermot Mulroney...",https://m.media-amazon.com/images/M/MV5BMzk0Nz...,comedy,"[3679, 923, 1381, 4582, 352, 1915, 6777, 7114,..."
3213,3753,Everybody's Fine,"[Marcello Mastroianni, Drew Barrymore, Robert ...",https://m.media-amazon.com/images/M/MV5BMTM1Nz...,drama,"[614, 4211, 3679, 5829, 8163, 513, 3086, 6552,..."
3479,4057,Flawless,"[Robert De Niro, Demi Moore, Michael Caine, Ph...",https://m.media-amazon.com/images/M/MV5BMjA1NT...,"crime, drama, thriller","[3679, 8741, 6933, 2217, 3072, 736, 8494, 4681..."
3593,4194,Freelancers,"[Robert De Niro, Dana Delany, Forest Whitaker,...",https://m.media-amazon.com/images/M/MV5BMjEyNj...,"action, crime, drama","[3679, 8151, 7208, 6776, 4538, 7075, 1781, 711..."
3901,4561,Great Expectations,"[Alec Guinness, Robert De Niro, Ralph Fiennes,...",https://m.media-amazon.com/images/M/MV5BMjMzOD...,"drama, romance","[780, 3679, 7174, 8737, 521, 2258, 33, 6812, 3..."
4103,4790,Heist,"[Robert De Niro, Danny DeVito, Gene Hackman, D...",https://m.media-amazon.com/images/M/MV5BMjEzNT...,"action, crime, drama","[3679, 3416, 6606, 2599, 3714, 4070, 5160, 816..."
4173,4877,Hide and Seek,"[Robert De Niro, Elisabeth Shue, Famke Janssen...",https://m.media-amazon.com/images/M/MV5BNWY2ZT...,"crime, drama, thriller","[3679, 494, 2830, 4018, 4445, 719, 477, 6552, ..."
4853,5669,Joker,"[Robert De Niro, Joaquin Phoenix, Bryan Callen...",https://m.media-amazon.com/images/M/MV5BNGVjNW...,"crime, drama, thriller","[3679, 3626, 2887, 5214, 1921, 1171, 4395, 795..."


In [18]:
filtered_df = df[df['Film'].apply(lambda x:"Shining" in x)]
filtered_df

Unnamed: 0,index,Film,Actor list,poster,genre,Actor_List_id
