In [None]:
import sys
from pathlib import Path

# Add parent directory to path for imports
sys.path.insert(0, str(Path().resolve().parent.parent))

from implementation.misc.helpers import normalize_string, create_watch_provider_offering_key
from implementation.classes.movie import BaseMovie
from implementation.classes.enums import Genre, MaturityRating


## Testing String Normalization

In [2]:
"""
Test suite for normalize_string function.

Tests 20 edge cases covering:
- Basic normalization (case, whitespace, punctuation)
- Unicode handling (diacritics, special characters, non-Latin scripts)
- Hyphen and apostrophe variations
- Edge cases (empty, numeric, mixed content)
"""

# Define 20 test cases: (input_string, expected_output, description)
TEST_CASES = [
    # Basic punctuation and case handling - HYPHENS NOW PRESERVED
    ("Spider-Man: Into the Spider-Verse", "spider-man into the spider-verse", "Hyphen preserved, colon becomes space"),
    ("Ocean's Eleven", "oceans eleven", "Standard apostrophe removal"),
    ("L.A. Confidential", "la confidential", "Periods removed (no space)"),
    ("Se7en", "se7en", "Numbers preserved within words"),
    
    # Unicode diacritics and accents
    ("Amélie", "amelie", "French accent (acute)"),
    ("Señor López", "senor lopez", "Spanish tilde and accent"),
    ("Björk's Greatest Hits", "bjorks greatest hits", "Nordic umlaut with apostrophe"),
    ("Crème brûlée", "creme brulee", "Multiple French diacritics"),
    ("Mötley Crüe", "motley crue", "Heavy metal umlauts"),
    
    # Various apostrophe and quote styles
    ("It's a Wonderful Life", "its a wonderful life", "Curly apostrophe (right single quote)"),
    ("Rock 'n' Roll", "rock n roll", "Multiple curly apostrophes"),
    ("\"Quoted Title\"", "quoted title", "Double quotes removed"),
    
    # Various hyphen and dash styles - regular hyphens preserved, others become space
    ("Spider–Man", "spider man", "En-dash becomes space"),
    ("Spider—Man", "spider man", "Em-dash becomes space"),
    ("Jean-Luc Picard", "jean-luc picard", "Regular hyphen preserved"),
    
    # Whitespace edge cases
    ("  Multiple   Spaces  ", "multiple spaces", "Multiple spaces collapsed and trimmed"),
    ("Tab\tSeparated\tWords", "tab separated words", "Tabs become single spaces"),
    ("Line\nBreak\rTest", "line break test", "Newlines and carriage returns"),
    
    # Empty and minimal inputs
    ("", "", "Empty string returns empty"),
    ("   ", "", "Whitespace-only returns empty"),
]


def run_normalize_string_tests():
    """Run all test cases and report results."""
    print("=" * 70)
    print("NORMALIZE_STRING TEST SUITE")
    print("=" * 70)
    print()
    
    passed = 0
    failed = 0
    
    for i, (input_str, expected, description) in enumerate(TEST_CASES, 1):
        result = normalize_string(input_str)
        success = result == expected
        
        if success:
            passed += 1
            status = "✓ PASS"
        else:
            failed += 1
            status = "✗ FAIL"
        
        # Display test result
        print(f"Test {i:2d}: {status}")
        print(f"         Description: {description}")
        print(f"         Input:    {repr(input_str)}")
        print(f"         Expected: {repr(expected)}")
        if not success:
            print(f"         Got:      {repr(result)}")
        print()
    
    # Summary
    print("=" * 70)
    print(f"RESULTS: {passed} passed, {failed} failed out of {len(TEST_CASES)} tests")
    print("=" * 70)
    
    return failed == 0

In [3]:
# Run the test suite
run_normalize_string_tests()

NORMALIZE_STRING TEST SUITE

Test  1: ✓ PASS
         Description: Hyphen preserved, colon becomes space
         Input:    'Spider-Man: Into the Spider-Verse'
         Expected: 'spider-man into the spider-verse'

Test  2: ✓ PASS
         Description: Standard apostrophe removal
         Input:    "Ocean's Eleven"
         Expected: 'oceans eleven'

Test  3: ✓ PASS
         Description: Periods removed (no space)
         Input:    'L.A. Confidential'
         Expected: 'la confidential'

Test  4: ✓ PASS
         Description: Numbers preserved within words
         Input:    'Se7en'
         Expected: 'se7en'

Test  5: ✓ PASS
         Description: French accent (acute)
         Input:    'Amélie'
         Expected: 'amelie'

Test  6: ✓ PASS
         Description: Spanish tilde and accent
         Input:    'Señor López'
         Expected: 'senor lopez'

Test  7: ✓ PASS
         Description: Nordic umlaut with apostrophe
         Input:    "Björk's Greatest Hits"
         Expected: 'bjo

True

## Title Tokens

In [4]:
"""
Test suite for BaseMovie.normalized_title_tokens.

Tests 25 cases covering:
- Basic tokenization (single word, multi-word, short titles, numbers)
- Hyphen expansion (compound → compound + parts, dedup across expansions)
- Punctuation handling (apostrophes, periods, commas, colons)
- Case normalization (all-caps, mixed case)
- Diacritic / Unicode handling (accents, umlauts)
- Deduplication (repeated words collapsed to first occurrence)
- Whitespace edge cases
"""


def _make_movie(title: str) -> BaseMovie:
    """Create a minimal BaseMovie with only the title varying."""
    return BaseMovie(
        id="test-1",
        tmdb_id=1,
        title=title,
        overall_keywords=[],
        release_date="2020-01-01",
        duration=120,
        genres=["Drama"],
        countries_of_origin=["US"],
        languages=["English"],
        maturity_rating="PG-13",
        overview="Test overview.",
        plot_keywords=[],
        directors=["Director"],
        writers=["Writer"],
        producers=["Producer"],
        composers=["Composer"],
        actors=["Actor"],
        characters=["Character"],
        production_companies=["Studio"],
        watch_providers=[],
    )


# Each tuple: (title, expected_tokens, description)
TITLE_TOKEN_TEST_CASES = [
    # ── Basic tokenization (4 tests) ───────────────────────────────
    ("Inception",
     ["inception"],
     "Single word lowercased"),

    ("The Dark Knight",
     ["the", "dark", "knight"],
     "Multi-word title split on spaces"),

    ("Up",
     ["up"],
     "Very short title"),

    ("Toy Story 3",
     ["toy", "story", "3"],
     "Numeric token preserved as-is"),

    # ── Hyphen expansion (6 tests) ─────────────────────────────────
    ("Spider-Man",
     ["spider-man", "spider", "man"],
     "Hyphenated token kept + parts expanded"),

    ("Ant-Man and the Wasp",
     ["ant-man", "ant", "man", "and", "the", "wasp"],
     "Hyphen expansion alongside plain words"),

    ("X-Men: Days of Future Past",
     ["x-men", "x", "men", "days", "of", "future", "past"],
     "Hyphen expansion with colon-separated subtitle"),

    ("Spider-Man: Into the Spider-Verse",
     ["spider-man", "spider", "man", "into", "the", "spider-verse", "verse"],
     "Two hyphenated tokens share 'spider' — deduped across expansions"),

    ("Sci-Fi Movie",
     ["sci-fi", "sci", "fi", "movie"],
     "Genre-like hyphenated token expanded"),

    ("Self-Made Man",
     ["self-made", "self", "made", "man"],
     "Hyphen expansion adds parts after compound token"),

    # ── Punctuation handling (5 tests) ─────────────────────────────
    ("Ocean's Eleven",
     ["oceans", "eleven"],
     "Straight apostrophe removed, no space inserted"),

    ("L.A. Confidential",
     ["la", "confidential"],
     "Periods removed (no space), letters merge"),

    ("Crouching Tiger, Hidden Dragon",
     ["crouching", "tiger", "hidden", "dragon"],
     "Comma replaced by space"),

    ("Star Wars: A New Hope",
     ["star", "wars", "a", "new", "hope"],
     "Colon replaced by space"),

    ("Dr. No",
     ["dr", "no"],
     "Period in abbreviation removed"),

    # ── Case normalization (2 tests) ───────────────────────────────
    ("THE MATRIX",
     ["the", "matrix"],
     "All-caps lowercased"),

    ("Se7en",
     ["se7en"],
     "Alphanumeric mix preserved after lowercasing"),

    # ── Diacritic / Unicode handling (2 tests) ─────────────────────
    ("Amélie",
     ["amelie"],
     "French acute accent stripped"),

    ("Mötley Crüe: The Biopic",
     ["motley", "crue", "the", "biopic"],
     "German umlauts stripped, colon becomes space"),

    # ── Deduplication (3 tests) ────────────────────────────────────
    ("The Good, the Bad and the Ugly",
     ["the", "good", "bad", "and", "ugly"],
     "'the' appears 3 times, only first occurrence kept"),

    ("The Lord of the Rings: The Two Towers",
     ["the", "lord", "of", "rings", "two", "towers"],
     "'the' deduped across colon boundary"),

    ("Man-Bat vs Spider-Man",
     ["man-bat", "man", "bat", "vs", "spider-man", "spider"],
     "'man' from first expansion blocks 'man' from second expansion"),

    # ── Whitespace edge cases (1 test) ─────────────────────────────
    ("  Multiple   Spaces  ",
     ["multiple", "spaces"],
     "Leading, trailing, and internal whitespace collapsed"),

    # ── Combined edge cases (2 tests) ──────────────────────────────
    ("It's a Wonderful Life",
     ["its", "a", "wonderful", "life"],
     "Apostrophe removed in contraction"),

    ("2001: A Space Odyssey",
     ["2001", "a", "space", "odyssey"],
     "Leading number preserved, colon becomes space"),
]


def run_title_token_tests():
    """Run all normalized_title_tokens test cases and report results."""
    print("=" * 70)
    print("NORMALIZED_TITLE_TOKENS TEST SUITE")
    print("=" * 70)
    print()

    passed = 0
    failed = 0

    for i, (title, expected, description) in enumerate(TITLE_TOKEN_TEST_CASES, 1):
        movie = _make_movie(title)
        result = movie.normalized_title_tokens()
        success = result == expected

        if success:
            passed += 1
            status = "✓ PASS"
        else:
            failed += 1
            status = "✗ FAIL"

        print(f"Test {i:2d}: {status}")
        print(f"         Description: {description}")
        print(f"         Title:    {repr(title)}")
        print(f"         Expected: {expected}")
        if not success:
            print(f"         Got:      {result}")
        print()

    # Summary
    print("=" * 70)
    print(f"RESULTS: {passed} passed, {failed} failed out of {len(TITLE_TOKEN_TEST_CASES)} tests")
    print("=" * 70)

    return failed == 0

In [5]:
# Run the test suite
run_title_token_tests()

NORMALIZED_TITLE_TOKENS TEST SUITE

Test  1: ✓ PASS
         Description: Single word lowercased
         Title:    'Inception'
         Expected: ['inception']

Test  2: ✓ PASS
         Description: Multi-word title split on spaces
         Title:    'The Dark Knight'
         Expected: ['the', 'dark', 'knight']

Test  3: ✓ PASS
         Description: Very short title
         Title:    'Up'
         Expected: ['up']

Test  4: ✓ PASS
         Description: Numeric token preserved as-is
         Title:    'Toy Story 3'
         Expected: ['toy', 'story', '3']

Test  5: ✓ PASS
         Description: Hyphenated token kept + parts expanded
         Title:    'Spider-Man'
         Expected: ['spider-man', 'spider', 'man']

Test  6: ✓ PASS
         Description: Hyphen expansion alongside plain words
         Title:    'Ant-Man and the Wasp'
         Expected: ['ant-man', 'ant', 'man', 'and', 'the', 'wasp']

Test  7: ✓ PASS
         Description: Hyphen expansion with colon-separated subtitle
  

True

## MaturityRating ID generation

In [6]:
"""
Test suite for BaseMovie.maturity_rating_and_rank.

Tests 25 cases covering:
- Canonical ratings (each MPAA rating maps to correct label + rank)
- Case insensitivity (lowercase, UPPERCASE, MiXeD)
- Whitespace tolerance (leading/trailing spaces trimmed)
- Hyphenated ratings (PG-13, NC-17 preserved correctly)
- Unknown / invalid inputs fall back to ("unrated", 999)
- Tricky near-miss strings that should NOT match a valid rating
"""


def _make_movie_with_rating(maturity_rating: str) -> BaseMovie:
    """Create a minimal BaseMovie with only the maturity_rating varying."""
    return BaseMovie(
        id="test-1",
        tmdb_id=1,
        title="Test Movie",
        overall_keywords=[],
        release_date="2020-01-01",
        duration=120,
        genres=["Drama"],
        countries_of_origin=["US"],
        languages=["English"],
        maturity_rating=maturity_rating,
        overview="Test overview.",
        plot_keywords=[],
        directors=["Director"],
        writers=["Writer"],
        producers=["Producer"],
        composers=["Composer"],
        actors=["Actor"],
        characters=["Character"],
        production_companies=["Studio"],
        watch_providers=[],
    )


# Each tuple: (raw_maturity_rating, expected_label, expected_rank, description)
MATURITY_RATING_TEST_CASES = [
    # ── Canonical ratings — title case (6 tests) ───────────────────
    ("G",       "g",     1,   "G maps to rank 1"),
    ("PG",      "pg",    2,   "PG maps to rank 2"),
    ("PG-13",   "pg-13", 3,   "PG-13 maps to rank 3 (hyphen preserved)"),
    ("R",       "r",     4,   "R maps to rank 4"),
    ("NC-17",   "nc-17", 5,   "NC-17 maps to rank 5 (hyphen preserved)"),
    ("Unrated", "unrated", 999, "Unrated maps to rank 999"),

    # ── Case insensitivity — lowercase (5 tests) ──────────────────
    ("g",       "g",     1,   "Lowercase 'g' resolves to G"),
    ("pg",      "pg",    2,   "Lowercase 'pg' resolves to PG"),
    ("pg-13",   "pg-13", 3,   "Lowercase 'pg-13' resolves to PG-13"),
    ("r",       "r",     4,   "Lowercase 'r' resolves to R"),
    ("nc-17",   "nc-17", 5,   "Lowercase 'nc-17' resolves to NC-17"),

    # ── Case insensitivity — mixed / all-caps (3 tests) ───────────
    ("Pg-13",   "pg-13", 3,   "Mixed case 'Pg-13' resolves to PG-13"),
    ("NC-17",   "nc-17", 5,   "All-caps 'NC-17' still resolves correctly"),
    ("UNRATED", "unrated", 999, "All-caps 'UNRATED' resolves to Unrated"),

    # ── Whitespace tolerance (3 tests) ─────────────────────────────
    ("  G  ",     "g",     1,   "Leading/trailing spaces around 'G'"),
    (" PG-13 ",   "pg-13", 3,   "Spaces around 'PG-13'"),
    ("  R  ",     "r",     4,   "Spaces around 'R'"),

    # ── Unknown / invalid → UNRATED fallback (5 tests) ────────────
    ("",          "unrated", 999, "Empty string falls back to UNRATED"),
    ("   ",       "unrated", 999, "Whitespace-only falls back to UNRATED"),
    ("X",         "unrated", 999, "Single invalid character falls back"),
    ("PG-14",     "unrated", 999, "Close but invalid rating 'PG-14'"),
    ("MA",        "unrated", 999, "TV rating 'MA' is not a valid MPAA rating"),

    # ── Tricky near-misses (3 tests) ──────────────────────────────
    ("Not Rated", "unrated", 999, "Phrase 'Not Rated' doesn't match 'Unrated'"),
    ("PG13",      "unrated", 999, "Missing hyphen — 'pg13' ≠ 'pg-13'"),
    ("NC17",      "unrated", 999, "Missing hyphen — 'nc17' ≠ 'nc-17'"),
]


def run_maturity_rating_tests():
    """Run all maturity_rating_and_rank test cases and report results."""
    print("=" * 70)
    print("MATURITY_RATING_AND_RANK TEST SUITE")
    print("=" * 70)
    print()

    passed = 0
    failed = 0

    for i, (raw_rating, exp_label, exp_rank, description) in enumerate(MATURITY_RATING_TEST_CASES, 1):
        movie = _make_movie_with_rating(raw_rating)
        result_label, result_rank = movie.maturity_rating_and_rank()
        success = result_label == exp_label and result_rank == exp_rank

        if success:
            passed += 1
            status = "✓ PASS"
        else:
            failed += 1
            status = "✗ FAIL"

        print(f"Test {i:2d}: {status}")
        print(f"         Description: {description}")
        print(f"         Input:    {repr(raw_rating)}")
        print(f"         Expected: ({repr(exp_label)}, {exp_rank})")
        if not success:
            print(f"         Got:      ({repr(result_label)}, {result_rank})")
        print()

    # Summary
    print("=" * 70)
    print(f"RESULTS: {passed} passed, {failed} failed out of {len(MATURITY_RATING_TEST_CASES)} tests")
    print("=" * 70)

    return failed == 0

In [7]:
# Run the test suite
run_maturity_rating_tests()

MATURITY_RATING_AND_RANK TEST SUITE

Test  1: ✓ PASS
         Description: G maps to rank 1
         Input:    'G'
         Expected: ('g', 1)

Test  2: ✓ PASS
         Description: PG maps to rank 2
         Input:    'PG'
         Expected: ('pg', 2)

Test  3: ✓ PASS
         Description: PG-13 maps to rank 3 (hyphen preserved)
         Input:    'PG-13'
         Expected: ('pg-13', 3)

Test  4: ✓ PASS
         Description: R maps to rank 4
         Input:    'R'
         Expected: ('r', 4)

Test  5: ✓ PASS
         Description: NC-17 maps to rank 5 (hyphen preserved)
         Input:    'NC-17'
         Expected: ('nc-17', 5)

Test  6: ✓ PASS
         Description: Unrated maps to rank 999
         Input:    'Unrated'
         Expected: ('unrated', 999)

Test  7: ✓ PASS
         Description: Lowercase 'g' resolves to G
         Input:    'g'
         Expected: ('g', 1)

Test  8: ✓ PASS
         Description: Lowercase 'pg' resolves to PG
         Input:    'pg'
         Expected: ('pg'

True

## create_watch_provider_offering_key

In [None]:
"""
Test suite for create_watch_provider_offering_key function.

Tests verify that:
- Provider ID is bitshifted left by 4 bits
- Method ID is added to the lower 4 bits via OR
- Formula: result = (provider_id << 4) | method_id
"""

# Each tuple: (provider_id, method_id, expected_result, description)
WATCH_PROVIDER_OFFERING_TEST_CASES = [
    # ── Basic cases (4 tests) ──────────────────────────────────────
    (0, 0, 0, "Zero provider and method yields zero"),
    (1, 0, 16, "Provider 1 shifted left 4 bits = 16"),
    (0, 1, 1, "Method 1 with zero provider = 1"),
    (1, 1, 17, "Provider 1 (16) + method 1 = 17"),

    # ── Verify bitshift by 4 (3 tests) ─────────────────────────────
    (2, 0, 32, "Provider 2 << 4 = 32"),
    (10, 0, 160, "Provider 10 << 4 = 160"),
    (100, 0, 1600, "Provider 100 << 4 = 1600"),

    # ── Method ID range (4 tests) ──────────────────────────────────
    (1, 2, 18, "Provider 1 + method 2 = 18"),
    (1, 3, 19, "Provider 1 + method 3 = 19"),
    (1, 15, 31, "Provider 1 + max 4-bit method (15) = 31"),
    (0, 15, 15, "Zero provider + max method = 15"),

    # ── Realistic provider IDs (4 tests) ───────────────────────────
    (8, 1, 129, "Netflix-like provider (8) + flatrate (1) = 129"),
    (337, 2, 5394, "Disney+ provider (337) + rent (2) = 5394"),
    (1899, 3, 30387, "Max provider (1899) + buy (3) = 30387"),
    (15, 1, 241, "Provider 15 + method 1 = 241"),

    # ── Edge cases (2 tests) ───────────────────────────────────────
    (4095, 15, 65535, "Max 12-bit provider + max 4-bit method = 65535"),
    (256, 8, 4104, "Provider 256 + method 8 = 4104"),
]


def run_watch_provider_offering_tests():
    """Run all create_watch_provider_offering_key test cases and report results."""
    print("=" * 70)
    print("CREATE_WATCH_PROVIDER_OFFERING_KEY TEST SUITE")
    print("=" * 70)
    print()

    passed = 0
    failed = 0

    for i, (provider_id, method_id, expected, description) in enumerate(WATCH_PROVIDER_OFFERING_TEST_CASES, 1):
        result = create_watch_provider_offering_key(provider_id, method_id)
        success = result == expected

        if success:
            passed += 1
            status = "✓ PASS"
        else:
            failed += 1
            status = "✗ FAIL"

        print(f"Test {i:2d}: {status}")
        print(f"         Description: {description}")
        print(f"         Input:    provider_id={provider_id}, method_id={method_id}")
        print(f"         Expected: {expected}")
        if not success:
            print(f"         Got:      {result}")
        print()

    # Summary
    print("=" * 70)
    print(f"RESULTS: {passed} passed, {failed} failed out of {len(WATCH_PROVIDER_OFFERING_TEST_CASES)} tests")
    print("=" * 70)

    return failed == 0

In [9]:
# Run the test suite
run_watch_provider_offering_tests()

CREATE_WATCH_PROVIDER_OFFERING_INT TEST SUITE

Test  1: ✓ PASS
         Description: Zero provider and method yields zero
         Input:    provider_id=0, method_id=0
         Expected: 0

Test  2: ✓ PASS
         Description: Provider 1 shifted left 4 bits = 16
         Input:    provider_id=1, method_id=0
         Expected: 16

Test  3: ✓ PASS
         Description: Method 1 with zero provider = 1
         Input:    provider_id=0, method_id=1
         Expected: 1

Test  4: ✓ PASS
         Description: Provider 1 (16) + method 1 = 17
         Input:    provider_id=1, method_id=1
         Expected: 17

Test  5: ✓ PASS
         Description: Provider 2 << 4 = 32
         Input:    provider_id=2, method_id=0
         Expected: 32

Test  6: ✓ PASS
         Description: Provider 10 << 4 = 160
         Input:    provider_id=10, method_id=0
         Expected: 160

Test  7: ✓ PASS
         Description: Provider 100 << 4 = 1600
         Input:    provider_id=100, method_id=0
         Expected: 

True