In [1]:
import pandas as pd

In [2]:
people = pd.read_csv("./people.csv")
movies = pd.read_csv("./movies.csv")
stars = pd.read_csv("./stars.csv")

In [3]:
people.head()

Unnamed: 0,id,name,birth
0,102,Kevin Bacon,1958
1,129,Tom Cruise,1962
2,144,Cary Elwes,1962
3,158,Tom Hanks,1956
4,1597,Mandy Patinkin,1952


In [4]:
movies.head()

Unnamed: 0,id,title,year
0,112384,Apollo 13,1995
1,104257,A Few Good Men,1992
2,109830,Forrest Gump,1994
3,93779,The Princess Bride,1987
4,95953,Rain Man,1988


In [5]:
stars.head()

Unnamed: 0,person_id,movie_id
0,102,104257
1,102,112384
2,129,104257
3,129,95953
4,144,93779


In [6]:
import csv
import sys

from util import Node, StackFrontier, QueueFrontier

# Maps names to a set of corresponding person_ids
names = {}

# Maps person_ids to a dictionary of: name, birth, movies (a set of movie_ids)
people = {}

# Maps movie_ids to a dictionary of: title, year, stars (a set of person_ids)
movies = {}


def load_data(directory):
    """
    Load data from CSV files into memory.
    """
    # Load people
    with open(f"{directory}/people.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            people[row["id"]] = {
                "name": row["name"],
                "birth": row["birth"],
                "movies": set()
            }
            if row["name"].lower() not in names:
                names[row["name"].lower()] = {row["id"]}
            else:
                names[row["name"].lower()].add(row["id"])

    # Load movies
    with open(f"{directory}/movies.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            movies[row["id"]] = {
                "title": row["title"],
                "year": row["year"],
                "stars": set()
            }

    # Load stars
    with open(f"{directory}/stars.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            try:
                people[row["person_id"]]["movies"].add(row["movie_id"])
                movies[row["movie_id"]]["stars"].add(row["person_id"])
            except KeyError:
                pass


In [7]:
def person_id_for_name(name):
    """
    Returns the IMDB id for a person's name,
    resolving ambiguities as needed.
    """
    person_ids = list(names.get(name.lower(), set()))
    if len(person_ids) == 0:
        return None
    elif len(person_ids) > 1:
        print(f"Which '{name}'?")
        for person_id in person_ids:
            person = people[person_id]
            name = person["name"]
            birth = person["birth"]
            print(f"ID: {person_id}, Name: {name}, Birth: {birth}")
        try:
            person_id = input("Intended Person ID: ")
            if person_id in person_ids:
                return person_id
        except ValueError:
            pass
        return None
    else:
        return person_ids[0]


def neighbors_for_person(person_id):
    """
    Returns (movie_id, person_id) pairs for people
    who starred with a given person.
    """
    movie_ids = people[person_id]["movies"]
    neighbors = set()
    for movie_id in movie_ids:
        for person_id in movies[movie_id]["stars"]:
            neighbors.add((movie_id, person_id))
    return neighbors

### Implementation

In [22]:
directory = './Datasets'
source = 'kevin bacon'
target = 'tom cruise'

In [9]:
#Loading the data
load_data(directory)

In [10]:
'''
The names dictionary is a way to look up a person by their name: it maps names to a set of 
corresponding ids (because it’s possible that multiple actors have the same name).
'''
names

{'kevin bacon': {'102'},
 'tom cruise': {'129'},
 'cary elwes': {'144'},
 'tom hanks': {'158'},
 'mandy patinkin': {'1597'},
 'dustin hoffman': {'163'},
 'chris sarandon': {'1697'},
 'demi moore': {'193'},
 'jack nicholson': {'197'},
 'bill paxton': {'200'},
 'sally field': {'398'},
 'valeria golino': {'420'},
 'gerald r. molen': {'596520'},
 'gary sinise': {'641'},
 'robin wright': {'705'},
 'emma watson': {'914612'}}

In [13]:
'''
The people 
dictionary maps each person’s id to another dictionary with values for the person’s name, birth year, 
and the set of all the movies they have starred in.
'''
people

{'102': {'name': 'Kevin Bacon',
  'birth': '1958',
  'movies': {'104257', '112384'}},
 '129': {'name': 'Tom Cruise', 'birth': '1962', 'movies': {'104257', '95953'}},
 '144': {'name': 'Cary Elwes', 'birth': '1962', 'movies': {'93779'}},
 '158': {'name': 'Tom Hanks', 'birth': '1956', 'movies': {'109830', '112384'}},
 '1597': {'name': 'Mandy Patinkin', 'birth': '1952', 'movies': {'93779'}},
 '163': {'name': 'Dustin Hoffman', 'birth': '1937', 'movies': {'95953'}},
 '1697': {'name': 'Chris Sarandon', 'birth': '1942', 'movies': {'93779'}},
 '193': {'name': 'Demi Moore', 'birth': '1962', 'movies': {'104257'}},
 '197': {'name': 'Jack Nicholson', 'birth': '1937', 'movies': {'104257'}},
 '200': {'name': 'Bill Paxton', 'birth': '1955', 'movies': {'112384'}},
 '398': {'name': 'Sally Field', 'birth': '1946', 'movies': {'109830'}},
 '420': {'name': 'Valeria Golino', 'birth': '1965', 'movies': {'95953'}},
 '596520': {'name': 'Gerald R. Molen', 'birth': '1935', 'movies': {'95953'}},
 '641': {'name': '

In [14]:
'''
the movies dictionary maps each movie’s id to 
another dictionary with values for that movie’s title, release year, and the set of all the movie’s stars. 
'''
movies

{'112384': {'title': 'Apollo 13',
  'year': '1995',
  'stars': {'102', '158', '200', '641'}},
 '104257': {'title': 'A Few Good Men',
  'year': '1992',
  'stars': {'102', '129', '193', '197'}},
 '109830': {'title': 'Forrest Gump',
  'year': '1994',
  'stars': {'158', '398', '641', '705'}},
 '93779': {'title': 'The Princess Bride',
  'year': '1987',
  'stars': {'144', '1597', '1697', '705'}},
 '95953': {'title': 'Rain Man',
  'year': '1988',
  'stars': {'129', '163', '420', '596520'}}}

In [23]:
source_id = person_id_for_name(source)
target_id = person_id_for_name(target)

In [24]:
print(source_id)

102


In [25]:
print(target_id)

129


### BFS Implementation

In [48]:
import csv
import sys
from collections import deque
from util import Node, StackFrontier, QueueFrontier

# Maps names to a set of corresponding person_ids
names = {}

# Maps person_ids to a dictionary of: name, birth, movies (a set of movie_ids)
people = {}

# Maps movie_ids to a dictionary of: title, year, stars (a set of person_ids)
movies = {}


def load_data(directory):
    """
    Load data from CSV files into memory.
    """
    # Load people
    with open(f"{directory}/people.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            people[row["id"]] = {
                "name": row["name"],
                "birth": row["birth"],
                "movies": set()
            }
            if row["name"].lower() not in names:
                names[row["name"].lower()] = {row["id"]}
            else:
                names[row["name"].lower()].add(row["id"])

    # Load movies
    with open(f"{directory}/movies.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            movies[row["id"]] = {
                "title": row["title"],
                "year": row["year"],
                "stars": set()
            }

    # Load stars
    with open(f"{directory}/stars.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            try:
                people[row["person_id"]]["movies"].add(row["movie_id"])
                movies[row["movie_id"]]["stars"].add(row["person_id"])
            except KeyError:
                pass


def main():
    
    directory = './Datasets'
    # Load data from files into memory
    print("Loading data...")
    load_data(directory)
    print("Data loaded.")

    source = person_id_for_name(input("Name: "))
    if source is None:
        sys.exit("Person not found.")
    target = person_id_for_name(input("Name: "))
    if target is None:
        sys.exit("Person not found.")

    path = shortest_path(source, target)

    if path is None:
        print("Not connected.")
    else:
        degrees = len(path)
        print(f"{degrees} degrees of separation.")
        path = [(None, source)] + path
        for i in range(degrees):
            person1 = people[path[i][1]]["name"]
            person2 = people[path[i + 1][1]]["name"]
            movie = movies[path[i + 1][0]]["title"]
            print(f"{i + 1}: {person1} and {person2} starred in {movie}")


def shortest_path(source, target):
    """
    Returns the shortest list of (movie_id, person_id) pairs
    that connect the source to the target.

    If no possible path, returns None.
    """

    frontier = deque([(source, [])])
    explored = set()
    while frontier:
        current_person, path = frontier.popleft()
        explored.add(current_person)
        for movie_id, neighbor in neighbors_for_person(current_person):
            if neighbor == target:
                path = path + [(movie_id, neighbor)]
                print(f"Path found: {path}")
                return path
            if neighbor not in explored and not any(neighbor == node[0] for node in frontier):
                frontier.append((neighbor, path + [(movie_id, neighbor)]))
    print("No path found.")
    return None


def person_id_for_name(name):
    """
    Returns the IMDB id for a person's name,
    resolving ambiguities as needed.
    """
    person_ids = list(names.get(name.lower(), set()))
    if len(person_ids) == 0:
        return None
    elif len(person_ids) > 1:
        print(f"Which '{name}'?")
        for person_id in person_ids:
            person = people[person_id]
            name = person["name"]
            birth = person["birth"]
            print(f"ID: {person_id}, Name: {name}, Birth: {birth}")
        try:
            person_id = input("Intended Person ID: ")
            if person_id in person_ids:
                return person_id
        except ValueError:
            pass
        return None
    else:
        return person_ids[0]


def neighbors_for_person(person_id):
    """
    Returns (movie_id, person_id) pairs for people
    who starred with a given person.
    """
    movie_ids = people[person_id]["movies"]
    neighbors = set()
    for movie_id in movie_ids:
        for person_id in movies[movie_id]["stars"]:
            neighbors.add((movie_id, person_id))
    return neighbors


if __name__ == "__main__":
    main()


Loading data...
Data loaded.
Name: kevin bacon
Name: tom cruise
Path found: [('104257', '129')]
1 degrees of separation.
1: Kevin Bacon and Tom Cruise starred in A Few Good Men


### DFS Implementation

In [51]:
import csv
import sys
from collections import deque
from util import Node, StackFrontier, QueueFrontier

# Maps names to a set of corresponding person_ids
names = {}

# Maps person_ids to a dictionary of: name, birth, movies (a set of movie_ids)
people = {}

# Maps movie_ids to a dictionary of: title, year, stars (a set of person_ids)
movies = {}


def load_data(directory):
    """
    Load data from CSV files into memory.
    """
    # Load people
    with open(f"{directory}/people.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            people[row["id"]] = {
                "name": row["name"],
                "birth": row["birth"],
                "movies": set()
            }
            if row["name"].lower() not in names:
                names[row["name"].lower()] = {row["id"]}
            else:
                names[row["name"].lower()].add(row["id"])

    # Load movies
    with open(f"{directory}/movies.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            movies[row["id"]] = {
                "title": row["title"],
                "year": row["year"],
                "stars": set()
            }

    # Load stars
    with open(f"{directory}/stars.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            try:
                people[row["person_id"]]["movies"].add(row["movie_id"])
                movies[row["movie_id"]]["stars"].add(row["person_id"])
            except KeyError:
                pass


def main():
    
    directory = './Datasets'
    # Load data from files into memory
    print("Loading data...")
    load_data(directory)
    print("Data loaded.")

    source = person_id_for_name(input("Name: "))
    if source is None:
        sys.exit("Person not found.")
    target = person_id_for_name(input("Name: "))
    if target is None:
        sys.exit("Person not found.")

    path = shortest_path(source, target)

    if path is None:
        print("Not connected.")
    else:
        degrees = len(path)
        print(f"{degrees} degrees of separation.")
        path = [(None, source)] + path
        for i in range(degrees):
            person1 = people[path[i][1]]["name"]
            person2 = people[path[i + 1][1]]["name"]
            movie = movies[path[i + 1][0]]["title"]
            print(f"{i + 1}: {person1} and {person2} starred in {movie}")


def shortest_path(source, target):
    """
    Returns the shortest list of (movie_id, person_id) pairs
    that connect the source to the target.

    If no possible path, returns None.
    """
    # Stack for DFS: stores (movie_id, person_id, path_so_far)
    stack = [(None, source, [])]
    explored = set()

    while stack:
        movie_id, person_id, path = stack.pop()  # LIFO behavior for DFS

        # If target is found, return the path
        if person_id == target:
            return path[1:] + [(movie_id, person_id)] if path else [(movie_id, person_id)]

        # Mark the person as explored
        if person_id not in explored:
            explored.add(person_id)

            # Add neighbors to the stack
            for neighbor_movie_id, neighbor_person_id in neighbors_for_person(person_id):
                if neighbor_person_id not in explored:
                    stack.append((neighbor_movie_id, neighbor_person_id, path + [(movie_id, person_id)]))

    # If no path found, return None
    return None



def person_id_for_name(name):
    """
    Returns the IMDB id for a person's name,
    resolving ambiguities as needed.
    """
    person_ids = list(names.get(name.lower(), set()))
    if len(person_ids) == 0:
        return None
    elif len(person_ids) > 1:
        print(f"Which '{name}'?")
        for person_id in person_ids:
            person = people[person_id]
            name = person["name"]
            birth = person["birth"]
            print(f"ID: {person_id}, Name: {name}, Birth: {birth}")
        try:
            person_id = input("Intended Person ID: ")
            if person_id in person_ids:
                return person_id
        except ValueError:
            pass
        return None
    else:
        return person_ids[0]


def neighbors_for_person(person_id):
    """
    Returns (movie_id, person_id) pairs for people
    who starred with a given person.
    """
    movie_ids = people[person_id]["movies"]
    neighbors = set()
    for movie_id in movie_ids:
        for person_id in movies[movie_id]["stars"]:
            neighbors.add((movie_id, person_id))
    return neighbors


if __name__ == "__main__":
    main()


Loading data...
Data loaded.
Name: kevin bacon
Name: tom cruise
1 degrees of separation.
1: Kevin Bacon and Tom Cruise starred in A Few Good Men
