## **P02: Degrees of Separation (BFS)**
*Tijuana, B.C. | 26 MAR 2024*

*Presenta:*
- Agraz Vallejo Daniel
- Cruz Jimenez Axel
- Roldan Machado David

In [1]:
import numpy as np
from classes.misc.Color import Color
from classes.structures.Queue import Queue
from classes.misc.Cinema import CinemaDataset

#### **Load Actors/Movies Dataset**

In [2]:
size = input("Select dataset size (small/large):")
data = CinemaDataset(size)

In [3]:
print(f"PEOPLE SHAPE: {data.people.shape}")
print(f"MOVIE SHAPE: {data.movie.shape}")
print(f"STARS SHAPE: {data.stars.shape}")

PEOPLE SHAPE: (1044499, 3)
MOVIE SHAPE: (344276, 3)
STARS SHAPE: (1189594, 2)


#### **POC:** Get Actor/Movie record by ID

In [None]:
# This two actors supposedly have 2 Degrees of Separation
name1, name2 = "Emma Watson", "Jennifer Lawrence"

In [None]:
# Get Actor/Actress PeopleID
mask = np.isin(element=data.people[:,1], test_elements=np.array([name1, name2]))
name1_ID, name2_ID = int(data.people[mask][0,0]), int(data.people[mask][1,0])
print(f"{name1} ID: {name1_ID}")
print(f"{name2} ID: {name2_ID}")

In [None]:
# Get Actor/Actress Movies given its PeopleID
movies_index = np.where(data.stars[:,0] == name1_ID)[0].flatten()
movies = data.stars[movies_index,1]
print(f"Movie's ID from PeopleID:\n{movies}\n")

# Get Actors/Actresses from Name2_ID's Movies
movies_idx = np.where(data.stars[:,1] == movies[0])[0].flatten()
costars_id = [str(data.stars[idx,0]) for idx in movies_idx]
print(f"Costars from Name1's ID:\n{costars_id}\n")

mask = np.isin(element=data.people[:,0], test_elements=costars_id)
costars_records = data.people[mask]
print(f"Costars Records:\n{costars_records}")

#### **Degrees of Separation Algorithm:** (Dijkstra's Inspired)

In [4]:
def degrees_of_separation(Q: Queue, name1: str, name2: str, data:CinemaDataset) -> dict:
    # Dict that holds graph information
    ds = {
        'status': False,        # Tells whether target_x has a solution
        'came_from': {},        # Keeps track of shortest path's nodes
        'cost_so_far': {},      # Stores node's C(x)
        'iterations':0,         # Iterations passed until shortest path is found
        'visited_m':[],         # Stores visited movies
        'visited_x':[],         # Stores visited actors
        'sentences':[],         # Stores actor and movie association statements
        'shortest_path': [],    # Node-by-node path to shortest route
        'degrees': 0}           # Degrees of Separations

    # Get Actor's PeopleID
    mask = np.isin(element=data.people[:,1], test_elements=np.array([name1, name2]))
    init_x, target_x = data.people[mask][0,0], data.people[mask][1,0]

    # BFS Start
    ds['came_from'][init_x] = None; ds['cost_so_far'][init_x] = 0
    Q.insert(init_x)

    while Q.size() != 0:
        # Gets current state
        x = Q.get_first(); ds['iterations'] += 1

        if x == target_x:
            # Write relevant Dijstra's results to graph dict
            ds['degrees'] = ds['cost_so_far'][x]; ds['shortest_path'].append(x); ds['status'] = True
            while x != init_x:
                prev_x = ds['came_from'][x][0]
                ds['shortest_path'].append(prev_x)
                ds['sentences'].append(f"{Color.BOLD}{data.get_actor(prev_x)[1]}{Color.END} and {Color.BOLD}{data.get_actor(x)[1]}{Color.END} starred in {Color.CYAN}{data.get_movie(ds['came_from'][x][1])[1]}{Color.END}")
                x = prev_x
            ds['shortest_path'].reverse(); ds['sentences'].reverse()
            return ds

        # Get Actor's Movies
        movies_index = np.where(data.stars[:,0] == x)[0].flatten()
        movies = data.stars[movies_index,1]

        for u in movies:
            if u in ds['visited_m']: continue
            # Get actors from movie
            actors_idx = np.where(data.stars[:,1] == u)[0].flatten()
            costars_id = [data.stars[idx,0] for idx in actors_idx]; costars_id.remove(x)

            for id in costars_id:
                if id in ds['visited_x']: continue
                ds['came_from'][id] = [x, u]
                ds['cost_so_far'][id] = ds['cost_so_far'][x] + 1
                ds['visited_x'].append(id)
                Q.insert(id)
            ds['visited_m'].append(u)

    return ds

#### **Actors Input**

In [5]:
p1 = input("Enter actor/actress Name 1: ")
p2 = input("Enter actor/actress Name 2: ")

In [None]:
# Actors Random Selection
a = data.people[np.random.choice(data.people.shape[0], size=2, replace=False), :]
p1 = a[0,:][1]
p2 = a[1,:][1]

#### **Results:** Degrees of Separation

In [6]:
ds = degrees_of_separation(Queue(), p1, p2, data)
print(f"Name 1: {p1}\nName 2: {p2}")
print(f"Couple of Actors has solution? : {Color.BOLD}{Color.GREEN if ds['status'] else Color.RED} {ds['status']}{Color.END}")
print(f"Iterations to find shortest path: {Color.BOLD}{ds['iterations']}{Color.END}\n")
if ds['status'] == True: print(f"Node-by-Node Path: {f'{Color.CYAN}{Color.BOLD} -> {Color.END}'.join([f'{Color.BOLD}{x}{Color.END}' for x in ds['shortest_path']])}")
else: print(f"Node-by-Node Path: {None}")
for s in ds['sentences']: print(s)
print(f"\nPath Length: {Color.BOLD}{len(ds['shortest_path'])}{Color.END}")
print(f"Degrees of Separation: {Color.BOLD}{ds['degrees']}{Color.END}")

Name 1: Emma Watson
Name 2: Jennifer Lawrence
Couple of Actors has solution? : [1m[92m True[0m
Iterations to find shortest path: [1m2229[0m

Node-by-Node Path: [1m914612[0m[96m[1m -> [0m[1m705356[0m[96m[1m -> [0m[1m564215[0m[96m[1m -> [0m[1m2225369[0m
[1mEmma Watson[0m and [1mDaniel Radcliffe[0m starred in [96mHarry Potter and the Chamber of Secrets[0m
[1mDaniel Radcliffe[0m and [1mJames McAvoy[0m starred in [96mVictor Frankenstein[0m
[1mJames McAvoy[0m and [1mJennifer Lawrence[0m starred in [96mX-Men: First Class[0m

Path Length: [1m4[0m
Degrees of Separation: [1m3[0m
