# Test `_get_movie_id_from_title` Function

This notebook tests the `_get_movie_id_from_title` function from the user info microservice (`service.py`). It loads the necessary data and runs several test cases.

In [1]:
import os
import sys
from pathlib import Path
import pandas as pd
import difflib

sys.path.append("D:\\Internship\\recsys\\microservices\\microservices")
from user_info.service import _get_movie_id_from_title, movies_df

[INFO] Running locally


OSError while attempting to symlink the latest log directory
OSError while attempting to symlink the latest log directory


[LOG] Detected local environment, with Docker: [False]
[LOG] Detected local environment, with Docker: [False]
Loaded movies_df with 45433 entries.
Loaded movies_df with 45433 entries.


## Test Cases

In [None]:
# Test Case 1: Exact Match (assuming 'Toy Story (1995)' exists)
test_title_exact = "Toy Story" # Updated title to include year
try:
    movie_id = _get_movie_id_from_title(test_title_exact)
    matched_row = movies_df[movies_df['id'].astype(str) == str(movie_id)].iloc[0]
    print(f"Exact Match Test ('{test_title_exact}'): Found movie_id = {movie_id}")
    print("Matched Row:")
    print(matched_row)
except Exception as e:
    print(f"Exact Match Test ('{test_title_exact}'): Error - {e}")

Exact Match Test ('Titanic'): Found movie_id = 597
Matched Row:
adult                                                                False
belongs_to_collection                                                 None
budget                                                           200000000
genres                                          [Drama, Romance, Thriller]
homepage                                       http://www.titanicmovie.com
id                                                                     597
original_language                                                       en
original_title                                                     Titanic
overview                 84 years later, a 101-year-old woman named Ros...
popularity                                                        26.88907
production_companies     [Paramount Pictures, Twentieth Century Fox Fil...
production_countries                            [United States of America]
release_date                        

In [3]:
# Test Case 2: Fuzzy Match (e.g., slight typo or variation)
test_title_fuzzy = "Toy Stary" # Intentional typo, updated title
try:
    movie_id = _get_movie_id_from_title(test_title_fuzzy)
    matched_row = movies_df[movies_df['id'].astype(str) == str(movie_id)].iloc[0]
    print(f"Fuzzy Match Test ('{test_title_fuzzy}'): Found movie_id = {movie_id}")
    print("Matched Row:")
    print(matched_row)
except Exception as e:
    print(f"Fuzzy Match Test ('{test_title_fuzzy}'): Error - {e}")

Fuzzy Match Test ('Toy Stary'): Found movie_id = 862
Matched Row:
adult                                                                False
belongs_to_collection                                 Toy Story Collection
budget                                                            30000000
genres                                         [Animation, Comedy, Family]
homepage                              http://toystory.disney.com/toy-story
id                                                                     862
original_language                                                       en
original_title                                                   Toy Story
overview                 Led by Woody, Andy's toys live happily in his ...
popularity                                                       21.946943
production_companies                             [Pixar Animation Studios]
production_countries                            [United States of America]
release_date                      

In [4]:
# Test Case 3: No Match
test_title_no_match = "This Movie Does Not Exist 12345"
try:
    movie_id = _get_movie_id_from_title(test_title_no_match)
    matched_row = movies_df[movies_df['id'].astype(str) == str(movie_id)].iloc[0]
    print(f"No Match Test ('{test_title_no_match}'): Found movie_id = {movie_id} (Unexpected!)")
    print("Matched Row:")
    print(matched_row)
except ValueError as e:
    print(f"No Match Test ('{test_title_no_match}'): Success - {e}")
except Exception as e:
    print(f"No Match Test ('{test_title_no_match}'): Error - {e}")

No Match Test ('This Movie Does Not Exist 12345'): Success - No match found for movie title 'This Movie Does Not Exist 12345'


In [5]:
# Test Case 4: Case Insensitivity and Whitespace
test_title_case = "  tOy sToRy   "
try:
    movie_id = _get_movie_id_from_title(test_title_case)
    matched_row = movies_df[movies_df['id'].astype(str) == str(movie_id)].iloc[0]
    print(f"Case/Whitespace Test ('{test_title_case}'): Found movie_id = {movie_id}")
    print("Matched Row:")
    print(matched_row)
except Exception as e:
    print(f"Case/Whitespace Test ('{test_title_case}'): Error - {e}")

Case/Whitespace Test ('  tOy sToRy   '): Found movie_id = 862
Matched Row:
adult                                                                False
belongs_to_collection                                 Toy Story Collection
budget                                                            30000000
genres                                         [Animation, Comedy, Family]
homepage                              http://toystory.disney.com/toy-story
id                                                                     862
original_language                                                       en
original_title                                                   Toy Story
overview                 Led by Woody, Andy's toys live happily in his ...
popularity                                                       21.946943
production_companies                             [Pixar Animation Studios]
production_countries                            [United States of America]
release_date             

In [6]:
# Test Case 5: Ambiguous Title (if data contains duplicates or very similar titles)
# Example: If 'Hamlet' exists multiple times with different years/IDs
# test_title_ambiguous = "Hamlet"
# try:
#     movie_id = _get_movie_id_from_title(test_title_ambiguous)
#     matched_row = movies_df[movies_df['id'].astype(str) == str(movie_id)].iloc[0]
#     print(f"Ambiguous Test ('{test_title_ambiguous}'): Found movie_id = {movie_id} (Unexpected!)")
#     print("Matched Row:")
#     print(matched_row)
# except ValueError as e:
#     print(f"Ambiguous Test ('{test_title_ambiguous}'): Success - {e}")
# except Exception as e:
#     print(f"Ambiguous Test ('{test_title_ambiguous}'): Error - {e}")
print("Skipping Ambiguous Test - Requires specific data setup.")

Skipping Ambiguous Test - Requires specific data setup.
