## **P02: Degrees of Separation (BFS)**
*Tijuana, B.C. | 26 MAR 2024*

*Presenta:*
- Agraz Vallejo Daniel
- Cruz Jimenez Axel
- Roldan Machado David

In [11]:
from classes.Queue import Queue
from classes.Color import Color
import pandas as pd
import numpy as np

CINEMA_DIR = "datasets/"

#### **Dataset Loader:** CinemaDataset

In [13]:
class CinemaDataset():
    people: np.ndarray
    movie: np.ndarray
    stars: np.ndarray

    def __init__(self, size:str):
        self.people = pd.read_csv(f"{CINEMA_DIR}{size.lower()}/people.csv").to_numpy()
        self.movie = pd.read_csv(f"{CINEMA_DIR}{size.lower()}/movies.csv").to_numpy()
        self.stars = pd.read_csv(f"{CINEMA_DIR}{size.lower()}/stars.csv").to_numpy()

    # Gets Movie Record by ID
    def get_movie(self, id:int) -> np.ndarray:
        mask = np.isin(element=self.movie[:,0], test_elements=id)
        return self.movie[mask].flatten()
    
        # Gets Actor Record by ID
    def get_actor(self, id:int) -> np.ndarray:
        mask = np.isin(element=self.people[:,0], test_elements=id)
        return self.people[mask].flatten()
        

#### **Load Actors/Movies Dataset**

In [14]:
size = input("Select dataset size (small/large):")
data = CinemaDataset(size)

In [15]:
print(f"PEOPLE SHAPE: {data.people.shape}")
print(f"MOVIE SHAPE: {data.movie.shape}")
print(f"STARS SHAPE: {data.stars.shape}")

PEOPLE SHAPE: (1044499, 3)
MOVIE SHAPE: (344276, 3)
STARS SHAPE: (1189594, 2)


#### **POC:** Get Actor/Movie record by ID

In [13]:
# This two actors supposedly have 2 Degrees of Separation
name1, name2 = "Emma Watson", "Jennifer Lawrence"

In [14]:
# Get Actor/Actress PeopleID
mask = np.isin(element=data.people[:,1], test_elements=np.array([name1, name2]))
name1_ID, name2_ID = data.people[mask][0,0], data.people[mask][1,0]
print(f"Name1's PeopleID: {name1_ID}")
print(f"Name2's PeopleID: {name2_ID}")

Name1's PeopleID: 705356
Name2's PeopleID: 2225369


705356

In [15]:
# Get Actor/Actress Movies given its PeopleID
movies_index = np.where(data.stars[:,0] == name1_ID)[0].flatten()
movies = data.stars[movies_index,1]
print(f"Movie's ID from PeopleID:\n{movies}")

# Get Actors/Actresses from Name2_ID's Movies
movies_idx = np.where(data.stars[:,1] == movies[0])[0].flatten()
costars_id = [data.stars[idx,0] for idx in movies_idx]
print(costars_id)

mask = np.isin(element=data.people[:,0], test_elements=costars_id)
costars_records = data.people[mask]
costars_records

Movie's ID from PeopleID:
[ 241527  295297  304141  330373  373889  417741  465436  926084 1201607
 1311071 1486834 1528071 1596365 1781796 1976009 3758172 4034354 4199898
 4781612 6170506 6338476 6845728 6902676]
[705356, 342488, 1321, 1749]


array([[1321, 'Richard Harris', 1930.0],
       [1749, 'Maggie Smith', 1934.0],
       [342488, 'Rupert Grint', 1988.0],
       [705356, 'Daniel Radcliffe', 1989.0]], dtype=object)

#### **Degrees of Separation Algorithm:** (Dijkstra's Inspired)

In [16]:
def degrees_of_separation(Q: Queue, name1: str, name2: str, data:CinemaDataset):
    # Dict that holds graph information
    ds = {
        'status': False,                    # Tells whether target_x has a solution
        'came_from': {},                    # Keeps track of shortest path's nodes
        'cost_so_far': {},                  # Stores node's C(x) 
        'iterations':0,                     # Iterations passed until shortest path is found
        'visited_m':[],                     # Stores visited movies
        'visited_x':[],                     # Stores visited actors
        'sentences':[],                     # Stores actor and movie association statements
        'shortest_path': [],                # Node-by-node path to shortest route
        'degrees': 0}                       # Degrees of Separations

    # Get Actor's PeopleID
    mask = np.isin(element=data.people[:,1], test_elements=np.array([name1, name2]))
    init_x, target_x = data.people[mask][0,0], data.people[mask][1,0]

    # BFS Start
    ds['came_from'][init_x] = None; ds['cost_so_far'][init_x] = 0
    Q.insert(init_x)

    while Q.size() != 0:
        # Gets current state
        x = Q.get_first(); ds['iterations'] += 1

        if x == target_x:
            # Write relevant results to graph dict
            ds['degrees'] = ds['cost_so_far'][x]; ds['shortest_path'].append(x); ds['status'] = True
            while x != init_x:
                prev_x = ds['came_from'][x][0]
                ds['shortest_path'].append(prev_x)
                ds['sentences'].append(f"{Color.BOLD}{data.get_actor(prev_x)[1]}{Color.END} and {Color.BOLD}{data.get_actor(x)[1]}{Color.END} starred in {Color.CYAN}{data.get_movie(ds['came_from'][x][1])[1]}{Color.END}")
                x = prev_x
            ds['shortest_path'].reverse(); ds['sentences'].reverse()
            return ds
        
        # Get Actor's Movies
        movies_index = np.where(data.stars[:,0] == x)[0].flatten()
        movies = data.stars[movies_index,1]

        for u in movies:
            if u in ds['visited_m']: continue
            # Get actors from movie
            actors_idx = np.where(data.stars[:,1] == u)[0].flatten()
            costars_id = [data.stars[idx,0] for idx in actors_idx]; costars_id.remove(x)

            for id in costars_id:
                if id in ds['visited_x']: continue
                ds['came_from'][id] = [x, u]
                ds['cost_so_far'][id] = ds['cost_so_far'][x] + 1 
                ds['visited_x'].append(id)
                Q.insert(id)
            ds['visited_m'].append(u)

    return ds

#### **Actors Input**

In [25]:
p1 = input("Enter actor/actress Name 1: ")
p2 = input("Enter actor/actress Name 2: ")

In [27]:
# Actors Random Selection
a = data.people[np.random.choice(data.people.shape[0], size=2, replace=False), :]
p1 = a[0,:][1]
p2 = a[1,:][1]

#### **Results:** Degrees of Separation

In [26]:
# a = data.people[np.random.choice(data.people.shape[0], size=2, replace=False), :]
# p1 = a[0,:][1]
# p2 = a[1,:][1]

ds = degrees_of_separation(Queue(), p1, p2, data)
print(f"Name 1: {p1}\nName 2: {p2}")
print(f"Couple of Actors has solution? : {Color.BOLD}{Color.GREEN if ds['status'] else Color.RED} {ds['status']}{Color.END}")
print(f"Iterations to find shortest path: {Color.BOLD}{ds['iterations']}{Color.END}\n")
print(f"Node-by-Node Path: {f'{Color.CYAN}{Color.BOLD} -> {Color.END}'.join([f'{Color.BOLD}{x}{Color.END}' for x in ds['shortest_path']])}")
for s in ds['sentences']: print(s)
print(f"\nPath Length: {Color.BOLD}{len(ds['shortest_path'])}{Color.END}")
print(f"Degrees of Separation: {Color.BOLD}{ds['degrees']}{Color.END}")

Name 1: Julianna Margulies
Name 2: Dario Russo
Couple of Actors has solution? : [1m[91m False[0m
Iterations to find shortest path: [1m377671[0m

Node-by-Node Path: 

Path Length: [1m0[0m
Degrees of Separation: [1m0[0m
