In [72]:
import csv
import sys
import os
import pprint as pp
from util import Node, StackFrontier, QueueFrontier

In [2]:
names = {}

people = {}

movies = {}

In [24]:
def load_data(directory):
    """
    Load data from CSV files into memory.
    """
    # Load people
    with open(f"{directory}/people.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            people[row["id"]] = {
                "name": row["name"],
                "birth": row["birth"],
                "movies": set()
            }
            if row["name"].lower() not in names:
                names[row["name"].lower()] = {row["id"]}
            else:
                names[row["name"].lower()].add(row["id"])

    # Load movies
    with open(f"{directory}/movies.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            movies[row["id"]] = {
                "title": row["title"],
                "year": row["year"],
                "stars": set()
            }

    # Load stars
    with open(f"{directory}/stars.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            try:
                people[row["person_id"]]["movies"].add(row["movie_id"])
                movies[row["movie_id"]]["stars"].add(row["person_id"])
            except KeyError:
                pass

In [25]:
dir_ = os.getcwd()+'\\small'
load_data(dir_)

In [11]:

def person_id_for_name(name):
    """
    Returns the IMDB id for a person's name,
    resolving ambiguities as needed.
    """
    person_ids = list(names.get(name.lower(), set()))
    if len(person_ids) == 0:
        return None
    elif len(person_ids) > 1:
        print(f"Which '{name}'?")
        for person_id in person_ids:
            person = people[person_id]
            name = person["name"]
            birth = person["birth"]
            print(f"ID: {person_id}, Name: {name}, Birth: {birth}")
        try:
            person_id = input("Intended Person ID: ")
            if person_id in person_ids:
                return person_id
        except ValueError:
            pass
        return None
    else:
        return person_ids[0]

In [14]:
def neighbors_for_person(person_id):
    """
    Returns (movie_id, person_id) pairs for people
    who starred with a given person.
    """
    movie_ids = people[person_id]["movies"]
    neighbors = set()
    for movie_id in movie_ids:
        for person_id in movies[movie_id]["stars"]:
            neighbors.add((movie_id, person_id))
    return neighbors

In [18]:
def main():
    # if len(sys.argv) > 2:
    #     sys.exit("Usage: python degrees.py [directory]")
    # directory = sys.argv[1] if len(sys.argv) == 2 else "large"

    # # Load data from files into memory
    # print("Loading data...")
    # load_data(directory)
    # print("Data loaded.")

    source = person_id_for_name(input("Name: "))
    if source is None:
        sys.exit("Person not found.")
    target = person_id_for_name(input("Name: "))
    if target is None:
        sys.exit("Person not found.")

    path = shortest_path(source, target)

    if path is None:
        print("Not connected.")
    else:
        degrees = len(path)
        print(f"{degrees} degrees of separation.")
        path = [(None, source)] + path
        for i in range(degrees):
            person1 = people[path[i][1]]["name"]
            person2 = people[path[i + 1][1]]["name"]
            movie = movies[path[i + 1][0]]["title"]
            print(f"{i + 1}: {person1} and {person2} starred in {movie}")



In [22]:
people

{'102': {'name': 'Kevin Bacon',
  'birth': '1958',
  'movies': {'104257', '112384'}},
 '129': {'name': 'Tom Cruise', 'birth': '1962', 'movies': {'104257', '95953'}},
 '144': {'name': 'Cary Elwes', 'birth': '1962', 'movies': {'93779'}},
 '158': {'name': 'Tom Hanks', 'birth': '1956', 'movies': {'109830', '112384'}},
 '1597': {'name': 'Mandy Patinkin', 'birth': '1952', 'movies': {'93779'}},
 '163': {'name': 'Dustin Hoffman', 'birth': '1937', 'movies': {'95953'}},
 '1697': {'name': 'Chris Sarandon', 'birth': '1942', 'movies': {'93779'}},
 '193': {'name': 'Demi Moore', 'birth': '1962', 'movies': {'104257'}},
 '197': {'name': 'Jack Nicholson', 'birth': '1937', 'movies': {'104257'}},
 '200': {'name': 'Bill Paxton', 'birth': '1955', 'movies': {'112384'}},
 '398': {'name': 'Sally Field', 'birth': '1946', 'movies': {'109830'}},
 '420': {'name': 'Valeria Golino', 'birth': '1965', 'movies': {'95953'}},
 '596520': {'name': 'Gerald R. Molen', 'birth': '1935', 'movies': {'95953'}},
 '641': {'name': '

In [84]:
def shortest_path(source, target):
  source_id = person_id_for_name(source)
  target_id = person_id_for_name(target)
  neighbors = neighbors_for_person(source_id)
  movies = people[source_id]['movies']
  movies_searched = []
  for _ in neighbors:
    print(f'_ ==> {source_id} in {source_id in _ } and {target_id} in {target_id in _}' )
  # start = Node(state=self.start, parent=None, action=None)
  # frontier = StackFrontier()
  # frontier.

In [85]:
shortest_path('Kevin Bacon', 'Tom Cruise')

_ ==> 102 in False and 129 in False
_ ==> 102 in False and 129 in False
_ ==> 102 in False and 129 in True
_ ==> 102 in False and 129 in False
_ ==> 102 in False and 129 in False
_ ==> 102 in True and 129 in False
_ ==> 102 in False and 129 in False
_ ==> 102 in True and 129 in False


In [58]:
# def shortest_path(source, target):
source_id = person_id_for_name('Kevin Bacon') #source)
target_id = person_id_for_name('Tom Cruise') #target)
neighbors = neighbors_for_person(source_id)
deg = 0
for _ in neighbors:
    print(_)

('112384', '200')
('112384', '158')
('104257', '129')
('104257', '197')
('112384', '641')
('112384', '102')
('104257', '193')
('104257', '102')


In [43]:
sorted(list(neighbors))

[('104257', '102'),
 ('104257', '129'),
 ('104257', '193'),
 ('104257', '197'),
 ('95953', '129'),
 ('95953', '163'),
 ('95953', '420'),
 ('95953', '596520')]

In [39]:
list(neighbors)[0][1]

'200'

In [29]:
person_id_for_name('Tom Cruise')

'129'

In [30]:
neighbors_for_person('102')

{('104257', '102'),
 ('104257', '129'),
 ('104257', '193'),
 ('104257', '197'),
 ('112384', '102'),
 ('112384', '158'),
 ('112384', '200'),
 ('112384', '641')}

In [34]:
main()

KeyError: None

In [88]:
def shortest_path(source, target):
    """Finds a solution to maze, if one exists."""

    # Keep track of number of states explored
    num_explored = 0

    # Initialize frontier to just the starting position
    start = Node(state=source, parent=None, action=None)
    frontier = StackFrontier()
    frontier.add(start)

    # Initialize an empty explored set
    explored = set()

    # Keep looping until solution found
    while True:

      # If nothing left in frontier, then no path
      if frontier.empty():
          raise Exception("no solution")

      # Choose a node from the frontier
      node = frontier.remove()
      num_explored += 1

      # Mark node as explored
      explored.add(node.state)

      # Add neighbors to frontier
      for movie_id, persion_id in neighbors_for_person(node.state):
        if not frontier.contains_state(persion_id) and persion_id not in explored:
          child = Node(state=persion_id, parent=node, action=movie_id)
          # If node is the goal, then we have a solution
          if child.state == target:
            movies = []
            people = []
            solution = []
            while child.parent is not None:
              movies.append(child.action)
              people.append(child.state)
              child = child.parent
            movies.reverse()
            people.reverse()
            for movie, person in zip(movies, people):
               solution.append((movie, person))
            return solution
          
          frontier.add(child)

In [89]:
main

<function __main__.main()>

In [90]:
main()

1 degrees of separation.
1: Kevin Bacon and Tom Cruise starred in A Few Good Men
