In [1]:
#Modules
import sys
import csv

from util import Node, StackFrontier, QueueFrontier

In [2]:
#Const
# Maps names to a set of corresponding person_ids
names = {}

# Maps person_ids to a dictionary of: name, birth, movies (a set of movie_ids)
people = {}

# Maps movie_ids to a dictionary of: title, year, stars (a set of person_ids)
movies = {}

In [4]:
# Func
## This for load data from the CSV, i think.

def load_data(directory):
    """
    Load data from CSV files into memory.
    """
    # Load people
    with open(f"{directory}/people.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            people[row["id"]] = {
                "name": row["name"],
                "birth": row["birth"],
                "movies": set()
            }
            if row["name"].lower() not in names:
                names[row["name"].lower()] = {row["id"]}
            else:
                names[row["name"].lower()].add(row["id"])

    # Load movies
    with open(f"{directory}/movies.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            movies[row["id"]] = {
                "title": row["title"],
                "year": row["year"],
                "stars": set()
            }

    # Load stars
    with open(f"{directory}/stars.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            try:
                people[row["person_id"]]["movies"].add(row["movie_id"])
                movies[row["movie_id"]]["stars"].add(row["person_id"])
            except KeyError:
                pass

In [5]:
#this for get person id by name
def person_id_for_name(name):
    """
    Returns the IMDB id for a person's name,
    resolving ambiguities as needed.
    """
    person_ids = list(names.get(name.lower(), set()))
    if len(person_ids) == 0:
        return None
    elif len(person_ids) > 1:
        print(f"Which '{name}'?")
        for person_id in person_ids:
            person = people[person_id]
            name = person["name"]
            birth = person["birth"]
            print(f"ID: {person_id}, Name: {name}, Birth: {birth}")
        try:
            person_id = input("Intended Person ID: ")
            if person_id in person_ids:
                return person_id
        except ValueError:
            pass
        return None
    else:
        return person_ids[0]

#this get possible neighbors of a person by person id
def neighbors_for_person(person_id):
    """
    Returns (movie_id, person_id) pairs for people
    who starred with a given person.
    """
    movie_ids = people[person_id]["movies"]
    neighbors = set()
    for movie_id in movie_ids:
        for person_id in movies[movie_id]["stars"]:
            neighbors.add((movie_id, person_id))
    return neighbors



#Testing Loading Data

In [6]:
directory="small"               #Can only be ["small", "large"]

#Don't change this!
print("Loading data...")
load_data(directory)
print("Data loaded.")

#This will make names, people, movies filled.

Loading data...
Data loaded.


In [61]:
people

{'102': {'name': 'Kevin Bacon',
  'birth': '1958',
  'movies': {'104257', '112384'}},
 '129': {'name': 'Tom Cruise', 'birth': '1962', 'movies': {'104257', '95953'}},
 '144': {'name': 'Cary Elwes', 'birth': '1962', 'movies': {'93779'}},
 '158': {'name': 'Tom Hanks', 'birth': '1956', 'movies': {'109830', '112384'}},
 '1597': {'name': 'Mandy Patinkin', 'birth': '1952', 'movies': {'93779'}},
 '163': {'name': 'Dustin Hoffman', 'birth': '1937', 'movies': {'95953'}},
 '1697': {'name': 'Chris Sarandon', 'birth': '1942', 'movies': {'93779'}},
 '193': {'name': 'Demi Moore', 'birth': '1962', 'movies': {'104257'}},
 '197': {'name': 'Jack Nicholson', 'birth': '1937', 'movies': {'104257'}},
 '200': {'name': 'Bill Paxton', 'birth': '1955', 'movies': {'112384'}},
 '398': {'name': 'Sally Field', 'birth': '1946', 'movies': {'109830'}},
 '420': {'name': 'Valeria Golino', 'birth': '1965', 'movies': {'95953'}},
 '596520': {'name': 'Gerald R. Molen', 'birth': '1935', 'movies': {'95953'}},
 '641': {'name': '

#Get actor/actress name

In [62]:
#first actor (source)
source = person_id_for_name("Bill Paxton")                     #Must be on the dataset! E.g Emma Watson
if source is None:
    raise Exception("Incorrect source! Source not found")

#second actor (target)
target = person_id_for_name("Tom Hanks")                 #Must be on the dataset! E.g Jennifer Lawrence
if target is None:
    raise Exception("Incorrect source! target not found")

#example, this will return 2 tuple

In [68]:
#Better solution i think
def shortest_path(source, target):
    # make the first node
    initialize = Node(source, None, None)

    # create a frontier from
    frontier = QueueFrontier()
    frontier.add(initialize)

    # this is used for check if node's already explored or not. To prevent going to same node twice.
    isexplored = set()

    while True:
        
        # check if the frontier is empty
        if frontier.empty():
            raise NotImplementedError("Either the frontier is empty or no possible solution")
        
        # Get Node and switch frontier
        thenode = frontier.remove()
        isexplored.add(thenode.state)
        
        # Get Movie and ID affliated with thenode id
        for movie, id in neighbors_for_person(thenode.state):
            
            # check if id isn't explored and frontier doesn't contain id.
            if id not in isexplored and not frontier.contains_state(id):
            
                # Get new node (child) of new id and movie and assign its parent node
                child = Node(id, parent=thenode, action=movie)
                isexplored.add(id)
                
                #check if child's id has the target
                if child.state == target:
                    
                    # get path from parent to child. 
                    pathway=[]
                    while child.parent is not None:
                        pathway.append((child.action, child.state))
                        child = child.parent
                        
                    pathway.reverse()
                    return pathway
                
                # create a new frontier to child!
                frontier.add(child)

In [69]:
path = shortest_path(source, target)
print(path)

[('112384', '158')]


In [70]:
#Don't change this! or else, it'll fuck everything up
if path is None:
    print("Not connected.")
else:
    degrees = len(path)
    print(f"{degrees} degrees of separation.")
    path = [(None, source)] + path
    for i in range(degrees):
        person1 = people[path[i][1]]["name"]
        person2 = people[path[i + 1][1]]["name"]
        movie = movies[path[i + 1][0]]["title"]
        print(f"{i + 1}: {person1} and {person2} starred in {movie}")

1 degrees of separation.
1: Bill Paxton and Tom Hanks starred in Apollo 13
