In [2]:
import numpy as np
import pandas as pd
import networkx as nx
import pickle
import matplotlib.pyplot as plt

In [3]:
cast_df = None

with open('../data/the-movies-dataset/cast_for_network.pkl', 'rb') as fl:
    cast_df = pickle.load(fl)

In [4]:
def convert_ids(ids_in_csv):
    return pd.to_numeric(ids_in_csv, errors='coerce').astype('int64')
names_for_movies_df = pd.read_csv('../data/the-movies-dataset/movies_metadata.csv'
                        , converters={'id': lambda x: convert_ids(x), 'imdb_id': lambda x: convert_ids(x)}
                       ,usecols=['id', 'original_title'
                                 , 'popularity', 'overview', 'genres'
                                 , 'revenue', 'vote_average' 
                                 ,'runtime', 'tagline'
                                 ,'homepage','poster_path'
                                 , 'release_date'
                                 ,'title', 'spoken_languages'
                                 ])

  interactivity=interactivity, compiler=compiler, result=result)


In [5]:
cast_df = cast_df.drop_duplicates()

In [6]:
names_for_movies_df = names_for_movies_df.drop_duplicates()

In [7]:
actors_and_movie_info_df = cast_df.merge(names_for_movies_df
                                        , how='inner'
                                        , left_on='movie'
                                        , right_on='id')

In [9]:
#actors_and_movie_info_df.rename(columns={'id_x': 'id_actor', 'id_y': 'id_movie'})

In [10]:
actors_and_movie_info_df = actors_and_movie_info_df.rename(columns={'id_x': 'id_actor', 'id_y': 'id_movie'})

What data do I actually want in the graph? Any Rules for building the graph? 
* Nodes
    * Movie Names (Orginal_Title)
        * Movie IDs
        * Genres
        * Overview
        * Popularity
        * Poster_Path
        * Release_Date
        * Revenue
        * Runtime
        * Tagline
        * Title
        * Vote Average
        
* Edges
    * Actor Names
        * Role in Film (character)
    * Actor IDs
        * id
        * credit_id
    * Order (Billing)
    * Profile_Path

## Grouping and Iterating

In [None]:
actors_filmography_group = actors_and_movie_info_df.groupby('name')

In [93]:
test_df = actors_and_movie_info_df[(actors_and_movie_info_df.original_title == 'Toy Story') | (actors_and_movie_info_df.original_title == 'Big')]
test_group = test_df.groupby('name')

In [98]:
import time
import pprint
import itertools

master_graph = nx.MultiGraph()

for filmography in actors_filmography_group:
#for filmography in test_group:
#     print('@@@@@@@@@@@@@@@@@@@@@@@BEGIN@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@')
#     print('This is the Graph creation for %s :' % (actor_edge['name']))
    
    name = filmography[0]
    films_df = filmography[1]
    
    ## Need to store attribuetes of actors in a list, not dictionary based on constraints of NetworkX
    ## When accessing an edge, index 0 is Name, 1 is Actor_id, 2 is profile_path
    actor_edge_attrs = [name,  films_df['id_actor'].iloc[0], films_df['profile_path'].iloc[0]]   
    
    films = films_df[['id_movie', 'original_title', 'profile_path', 'popularity', 'genres', 'vote_average' 
                       ,'overview', 'poster_path', 'release_date', 'revenue'
                       ,'runtime', 'spoken_languages', 'tagline', 'title']].to_dict(orient='records')
    
    
    
    nodes_for_stargraph = [film['title'] for film in films]
    node_attributes = {film['title']: film for film in films}
    
    #pprint.pprint(node_attributes)
    
    actor_stargraph = nx.star_graph(nodes_for_stargraph, nx.MultiGraph)
    
    nx.set_node_attributes(actor_stargraph, node_attributes)
    nx.set_edge_attributes(actor_stargraph, actor_edge_attrs, 'actor')
    
 
    
    master_graph = nx.compose(master_graph, actor_stargraph)
        
    #print('\n!!!!!!!!!!!!!!!!!!ACTOR-GRAPH ANALYSIS !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n')
    #print("The Actor's Graph is composed of:")
    #pprint.pprint(actor_stargraph.nodes())
    #pprint.pprint(actor_stargraph.edges())
    
    #print('\n\n\n********************MASTER-GRAPH************************************')
    #pprint.pprint(master_graph.nodes())
    #print()
    #pprint.pprint(master_graph.edges())
    #pprint.pprint(list(nx.connected_components(master_graph)))
    #time.sleep(.25)
    
#     print("\nThe Master Graph is composed of:")
#     shortest_paths = dict(nx.all_pairs_shortest_path(master_graph))
#     for paths in shortest_paths:
#         print('PATH::::::::::::::::')
#         pprint.pprint(paths)
#         print('____________________')

    #print('^^^^^^^^^^^^^^^^^^^^END^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n')

KeyboardInterrupt: 

In [97]:
master_graph.edges.data()

MultiEdgeDataView([('Big', 'Toy Story', {'actor': ['Tom Hanks', 31.0, '/pQFoyx7rp09CJTAb932F2g8Nlho.jpg']})])

In [100]:
nx.find_cycle(master_graph)

[("Get Rich or Die Tryin'", 'How to Make Money Selling Drugs', 0),
 ('How to Make Money Selling Drugs', 'Edtv', 0),
 ('Edtv', 'The Prophecy', 0),
 ('The Prophecy', 'Zodiac', 0),
 ('Zodiac', 'Final Destination 2', 0),
 ('Final Destination 2', 'The Virgin Suicides', 0),
 ('The Virgin Suicides', 'Out Cold', 0),
 ('Out Cold', 'Friday the 13th Part VI: Jason Lives', 0),
 ('Friday the 13th Part VI: Jason Lives', 'Righteous Kill', 0),
 ('Righteous Kill', "Get Rich or Die Tryin'", 0)]

While iterating, I will have one edge, it will be the same, the actor. As well as one graph. Build the actors profile in an edge dictionary. Then, build the set of nodes. Then create all combinations of nodes with the same edge, and place them into the multigraph. 

In [76]:
A2 = nx.nx_agraph.to_agraph(master_graph) 
A2.write('/Users/bjg/Desktop/graph.out')

In [102]:
master_graph.node['Zodiac']

{'id_movie': 285135,
 'original_title': 'Zodiac',
 'profile_path': '/922dX7dP43qvKmqodz6akxdfJ5p.jpg',
 'popularity': 3.2997300000000003,
 'genres': "[{'id': 878, 'name': 'Science Fiction'}, {'id': 10770, 'name': 'TV Movie'}]",
 'vote_average': 3.8,
 'overview': 'A 2,000-year-old astrology board possesses deadly powers that threaten the fate of humanity.',
 'poster_path': '/dafW2jgYi345RLn4cFoyCol4mUk.jpg',
 'release_date': '2014-08-16',
 'revenue': 0.0,
 'runtime': 89.0,
 'spoken_languages': "[{'iso_639_1': 'en', 'name': 'English'}]",
 'tagline': 'The Signs are Everywhere',
 'title': 'Zodiac'}

In [103]:
nx.number_of_nodes(master_graph)

25899