### Name: Shriya Bhat
### Reg: 220968020
### Batch: A1
### Week-2 Q1

# a) Using bfs search 

In [2]:
import os
print(os.getcwd())

C:\Users\Shriya Bhat\Documents\college\sem6\AI_LAB\WEEK2


In [1]:
import csv
import sys

sys.argv = ['degrees.py', 'small']

from util import Node, StackFrontier, QueueFrontier  
# -->ensure there is an __init__.py file in the degrees folder if using degrees.util

# Maps names to a set of corresponding person_ids
names = {}

# Maps person_ids to a dictionary of: name, birth, movies (a set of movie_ids)
people = {}

# Maps movie_ids to a dictionary of: title, year, stars (a set of person_ids)
movies = {}


def load_data(directory):
    """
    Load data from CSV files into memory.
    """
    # Load people
    with open(f"{directory}/people.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            people[row["id"]] = {
                "name": row["name"],
                "birth": row["birth"],
                "movies": set()
            }
            if row["name"].lower() not in names:
                names[row["name"].lower()] = {row["id"]}
            else:
                names[row["name"].lower()].add(row["id"])

    # Load movies
    with open(f"{directory}/movies.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            movies[row["id"]] = {
                "title": row["title"],
                "year": row["year"],
                "stars": set()
            }

    # Load stars
    with open(f"{directory}/stars.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            try:
                people[row["person_id"]]["movies"].add(row["movie_id"])
                movies[row["movie_id"]]["stars"].add(row["person_id"])
            except KeyError:
                pass


def main():
    if len(sys.argv) > 2:
        sys.exit("Usage: python degrees.py [directory]")
    
    directory = sys.argv[1] if len(sys.argv) == 2 else "large"
    print(f"Using directory: {directory}")
     


    # Load data from files into memory
    print("Loading data...")
    load_data(directory)
    print("Data loaded.")

    source = person_id_for_name(input("Name: "))
    if source is None:
        sys.exit("Person not found.")
    target = person_id_for_name(input("Name: "))
    if target is None:
        sys.exit("Person not found.")

    path = shortest_path(source, target)

    if path is None:
        print("Not connected.")
    else:
        degrees = len(path)
        print(f"{degrees} degrees of separation.")
        for i in range(degrees):  # Loop through the entire path, from start to end
            person1 = people[path[i - 1][1]]["name"] if i > 0 else people[path[i][1]]["name"]  # Previous person or the first person in the path
            person2 = people[path[i][1]]["name"]  # Current person in the path
            movie = movies[path[i][0]]["title"]  # Movie where they starred together
            print(f"{i + 1}: {person1} and {person2} starred in {movie}")

            
def bfs(source, target):
    """
    Perform breadth-first search to find the shortest path from source to target.
    """
    frontier = QueueFrontier()  # Use Queue for BFS
    start = Node(state=source, parent=None, action=None)
    frontier.add(start)

    # Keep track of explored nodes
    explored = set()

    while not frontier.empty():
        node = frontier.remove()

        if node.state == target:
            # Reconstruct the path by traversing the parent nodes
            path = []
            while node.parent is not None:
                path.append((node.action, node.state))
                node = node.parent
            path.reverse()  # Reverse the path to get the correct order
            return path

        explored.add(node.state)

        # Add neighbors to the frontier
        for movie_id, person_id in neighbors_for_person(node.state):
            if person_id not in explored and not frontier.contains_state(person_id):
                child = Node(state=person_id, parent=node, action=movie_id)
                frontier.add(child)

    return None  # If no path found


def shortest_path(source, target):
    """
    Returns the shortest list of (movie_id, person_id) pairs
    that connect the source to the target using BFS.
    If no direct path is found, it will calculate the path through Kevin Bacon.
    
    If no possible path, returns None.
    """
    # 1. Perform BFS to find the shortest path directly from source to target
    path = bfs(source, target)
    if path is not None:
        return path  # Return the direct path if found

    # 2. If no direct path, find the shortest path through Kevin Bacon
    kevin_bacon_id = person_id_for_name("Kevin Bacon")
    if kevin_bacon_id is None:
        sys.exit("Kevin Bacon not found in the database.")

    # Use BFS to find the shortest path from Kevin Bacon to the source and target
    bacon_path = bfs(kevin_bacon_id, source)
    if bacon_path is None:
        print(f"{people[source]['name']} is not connected to Kevin Bacon.")
        return None

    target_path = bfs(kevin_bacon_id, target)
    if target_path is None:
        print(f"{people[target]['name']} is not connected to Kevin Bacon.")
        return None

    # 3. Print the degrees of separation from Kevin Bacon
    print(f"\nDegrees of separation from Kevin Bacon:")
    print(f"{people[source]['name']}: {len(bacon_path)} degrees")
    print(f"{people[target]['name']}: {len(target_path)} degrees")
    
    # 4. Print the total degree of separation
    total_degrees = len(bacon_path) + len(target_path)  # Sum of degrees of separation
    print(f"Total degrees of separation: {total_degrees}")

    # 5. Print the movies starred with Kevin Bacon for the source and target
    print("\nMovies starred with Kevin Bacon:")
    print(f"Source ({len(bacon_path)} degrees):")
    for movie_id, person_id in bacon_path:
        movie_title = movies[movie_id]["title"]
        print(f"  {people[person_id]['name']} starred in {movie_title} with Kevin Bacon")

    print(f"\nTarget ({len(target_path)} degrees):")
    for movie_id, person_id in target_path:
        movie_title = movies[movie_id]["title"]
        print(f"  {people[person_id]['name']} starred in {movie_title} with Kevin Bacon")

    return bacon_path  # No need to return a path now, just print the relevant details




def person_id_for_name(name):
    """
    Returns the IMDB id for a person's name,
    resolving ambiguities as needed.
    """
    person_ids = list(names.get(name.lower(), set()))
    if len(person_ids) == 0:
        return None
    elif len(person_ids) > 1:
        print(f"Which '{name}'?")
        for person_id in person_ids:
            person = people[person_id]
            name = person["name"]
            birth = person["birth"]
            print(f"ID: {person_id}, Name: {name}, Birth: {birth}")
        try:
            person_id = input("Intended Person ID: ")
            if person_id in person_ids:
                return person_id
        except ValueError:
            pass
        return None
    else:
        return person_ids[0]


def neighbors_for_person(person_id):
    """
    Returns (movie_id, person_id) pairs for people
    who starred with a given person.
    """
    movie_ids = people[person_id]["movies"]
    neighbors = set()
    for movie_id in movie_ids:
        for person_id in movies[movie_id]["stars"]:
            neighbors.add((movie_id, person_id))
    return neighbors


if __name__ == "__main__":
    main()


Using directory: small
Loading data...
Data loaded.
Name: Tom Cruise
Name: Dustin Hoffman
1 degrees of separation.
1: Dustin Hoffman and Dustin Hoffman starred in Rain Man


- **Tom Cruise** and **Dustin Hoffman** have a connection through the movie Rain Man (1988). there is a direct connection, hence 1 degree separation

In [7]:
if __name__ == "__main__":
    main()

Using directory: small
Loading data...
Data loaded.
Name: Robin Wright
Name: Dustin Hoffman
4 degrees of separation.
1: Tom Hanks and Tom Hanks starred in Forrest Gump
2: Tom Hanks and Kevin Bacon starred in Apollo 13
3: Kevin Bacon and Tom Cruise starred in A Few Good Men
4: Tom Cruise and Dustin Hoffman starred in Rain Man


# b) Using dfs search 

In [7]:
import csv
import sys

sys.argv = ['degrees.py', 'small']

from util import Node, StackFrontier, QueueFrontier

# Maps names to a set of corresponding person_ids
names = {}

# Maps person_ids to a dictionary of: name, birth, movies (a set of movie_ids)
people = {}

# Maps movie_ids to a dictionary of: title, year, stars (a set of person_ids)
movies = {}


def load_data(directory):
    """
    Load data from CSV files into memory.
    """
    # Load people
    with open(f"{directory}/people.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            people[row["id"]] = {
                "name": row["name"],
                "birth": row["birth"],
                "movies": set()
            }
            if row["name"].lower() not in names:
                names[row["name"].lower()] = {row["id"]}
            else:
                names[row["name"].lower()].add(row["id"])

    # Load movies
    with open(f"{directory}/movies.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            movies[row["id"]] = {
                "title": row["title"],
                "year": row["year"],
                "stars": set()
            }

    # Load stars
    with open(f"{directory}/stars.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            try:
                people[row["person_id"]]["movies"].add(row["movie_id"])
                movies[row["movie_id"]]["stars"].add(row["person_id"])
            except KeyError:
                pass


def main():
    if len(sys.argv) > 2:
        sys.exit("Usage: python degrees.py [directory]")
    
    directory = sys.argv[1] if len(sys.argv) == 2 else "large"
    print(f"Using directory: {directory}")
    
    # Load data from files into memory
    print("Loading data...")
    load_data(directory)
    print("Data loaded.")

    source = person_id_for_name(input("Name: "))
    if source is None:
        sys.exit("Person not found.")
    target = person_id_for_name(input("Name: "))
    if target is None:
        sys.exit("Person not found.")

    path = shortest_path(source, target)

    if path is None:
        print("Not connected.")
    else:
        degrees = len(path)
        print(f"{degrees} degrees of separation.")
        for i in range(degrees):  # Loop through the entire path, from start to end
            person1 = people[path[i - 1][1]]["name"] if i > 0 else people[path[i][1]]["name"]  # Previous person or the first person in the path
            person2 = people[path[i][1]]["name"]  # Current person in the path
            movie = movies[path[i][0]]["title"]  # Movie where they starred together
            print(f"{i + 1}: {person1} and {person2} starred in {movie}")


def dfs(source, target):
    """
    Perform depth-first search to find the shortest path from source to target.
    """
    frontier = StackFrontier()  # Use Stack for DFS
    start = Node(state=source, parent=None, action=None)
    frontier.add(start)

    # Keep track of explored nodes
    explored = set()

    while not frontier.empty():
        node = frontier.remove()

        if node.state == target:
            # Reconstruct the path by traversing the parent nodes
            path = []
            while node.parent is not None:
                path.append((node.action, node.state))
                node = node.parent
            path.reverse()  # Reverse the path to get the correct order
            return path

        explored.add(node.state)

        # Add neighbors to the frontier
        for movie_id, person_id in neighbors_for_person(node.state):
            if person_id not in explored and not frontier.contains_state(person_id):
                child = Node(state=person_id, parent=node, action=movie_id)
                frontier.add(child)

    return None  # If no path found


def shortest_path(source, target):
    """
    Returns the shortest list of (movie_id, person_id) pairs
    that connect the source to the target using DFS.
    If no direct path is found, it will calculate the path through Kevin Bacon.
    
    If no possible path, returns None.
    """
    # 1. Perform DFS to find the shortest path directly from source to target
    path = dfs(source, target)
    if path is not None:
        return path  # Return the direct path if found

    # 2. If no direct path, find the shortest path through Kevin Bacon
    kevin_bacon_id = person_id_for_name("Kevin Bacon")
    if kevin_bacon_id is None:
        sys.exit("Kevin Bacon not found in the database.")

    # Use DFS to find the shortest path from Kevin Bacon to the source and target
    bacon_path = dfs(kevin_bacon_id, source)
    if bacon_path is None:
        print(f"{people[source]['name']} is not connected to Kevin Bacon.")
        return None

    target_path = dfs(kevin_bacon_id, target)
    if target_path is None:
        print(f"{people[target]['name']} is not connected to Kevin Bacon.")
        return None

    # 3. Print the degrees of separation from Kevin Bacon
    print(f"\nDegrees of separation from Kevin Bacon:")
    print(f"{people[source]['name']}: {len(bacon_path)} degrees")
    print(f"{people[target]['name']}: {len(target_path)} degrees")
    
    # 4. Print the total degree of separation
    total_degrees = len(bacon_path) + len(target_path)  # Sum of degrees of separation
    print(f"Total degrees of separation: {total_degrees}")

    # 5. Print the movies starred with Kevin Bacon for the source and target
    print("\nMovies starred with Kevin Bacon:")
    print(f"Source ({len(bacon_path)} degrees):")
    for movie_id, person_id in bacon_path:
        movie_title = movies[movie_id]["title"]
        print(f"  {people[person_id]['name']} starred in {movie_title} with Kevin Bacon")

    print(f"\nTarget ({len(target_path)} degrees):")
    for movie_id, person_id in target_path:
        movie_title = movies[movie_id]["title"]
        print(f"  {people[person_id]['name']} starred in {movie_title} with Kevin Bacon")

    return bacon_path  # No need to return a path now, just print the relevant details


def person_id_for_name(name):
    """
    Returns the IMDB id for a person's name,
    resolving ambiguities as needed.
    """
    person_ids = list(names.get(name.lower(), set()))
    if len(person_ids) == 0:
        return None
    elif len(person_ids) > 1:
        print(f"Which '{name}'?")
        for person_id in person_ids:
            person = people[person_id]
            name = person["name"]
            birth = person["birth"]
            print(f"ID: {person_id}, Name: {name}, Birth: {birth}")
        try:
            person_id = input("Intended Person ID: ")
            if person_id in person_ids:
                return person_id
        except ValueError:
            pass
        return None
    else:
        return person_ids[0]


def neighbors_for_person(person_id):
    """
    Returns (movie_id, person_id) pairs for people
    who starred with a given person.
    """
    movie_ids = people[person_id]["movies"]
    neighbors = set()
    for movie_id in movie_ids:
        for person_id in movies[movie_id]["stars"]:
            neighbors.add((movie_id, person_id))
    return neighbors


if __name__ == "__main__":
    main()


Using directory: small
Loading data...
Data loaded.
Name: Dustin Hoffman
Name: Tom Hanks
3 degrees of separation.
1: Tom Cruise and Tom Cruise starred in Rain Man
2: Tom Cruise and Kevin Bacon starred in A Few Good Men
3: Kevin Bacon and Tom Hanks starred in Apollo 13


In [5]:
if __name__ == "__main__":
    main()

Using directory: small
Loading data...
Data loaded.
Name: Robin Wright
Name: Dustin Hoffman
4 degrees of separation.
1: Tom Hanks and Tom Hanks starred in Forrest Gump
2: Tom Hanks and Kevin Bacon starred in Apollo 13
3: Kevin Bacon and Tom Cruise starred in A Few Good Men
4: Tom Cruise and Dustin Hoffman starred in Rain Man


- Actors are nodes.
- Movies represent edges between actors who starred in them.

In [6]:
if __name__ == "__main__":
    main()

Using directory: small
Loading data...
Data loaded.
Name: Tom cruise
Name: Dustin Hoffman
1 degrees of separation.
1: Dustin Hoffman and Dustin Hoffman starred in Rain Man
