In [1]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('../')
from sentence_transformers import SentenceTransformer
from model.decoderMLP import decoderMLP, decoderAttention, movieTransformer
from tqdm import tqdm
import openai
import pickle 
import argparse
from typing import List
import logging
import numpy as np
import torch
import json
import time
import torch.optim as optim
import torch
from pprint import pprint as pp
from scipy.sparse import csr_matrix
import os 
import pandas as pd
from collections import defaultdict
from helper.sampler import NegSampler, negsamp_vectorized_bsearch_preverif
from argparse import ArgumentParser
from model.MF import MatrixFactorization,MatrixFactorizationLLM
from trainer.training_utils import *
from helper.eval_metrics import *
from helper.dataloader import *

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
lr= 0.00001
epochs = 500
num_heads = 8
cosine = False
num_layers = 2
output_emb = 256
embedding_dim = 768

saved_path = f'/home/mila/e/emiliano.penaloza/scratch/saved_model/ml-100k/attention_best_model_{lr}_{epochs}_{num_heads}_{cosine}_{num_layers}.pth'

model_path = saved_path + '_best_model.pth'
embedder_path = saved_path + '_embedder.pth'
item_embeddings_path = saved_path + '_item_embeddings.pth'
user_embeddings_path = saved_path + '_user_embeddings.pth'
model_rankings_path = saved_path + '_rankings_matrix.npy'
id_genre_map = map_id_to_genre('../data/ml-100k/movies.dat')

# 1. Data Loading & Preprocessing
train_data = load_dataset("../data_preprocessed/ml-100k/data_split/train_set_leave_one.json")
valid_data = load_dataset("../data_preprocessed/ml-100k/data_split/valid_set_leave_one.json")
test_data = load_dataset("../data_preprocessed/ml-100k/data_split/test_set_leave_one.json")
movie_title_to_id = map_title_to_id("../data/ml-100k/movies.dat")

train_data = convert_titles_to_ids(train_data, movie_title_to_id)
valid_data = convert_titles_to_ids(valid_data, movie_title_to_id)
test_data = convert_titles_to_ids(test_data, movie_title_to_id)

train_matrix, actual_list_val, actual_list_test = create_train_matrix_and_actual_lists(train_data, valid_data,
                                                                                        test_data, movie_title_to_id)
train_matrix = csr_matrix(train_matrix)  # Convert train_matrix to a CSR matrix


# 2. Model Creation
num_users, num_items = train_matrix.shape

In [None]:
args = parse_args(True)
user_embedder = decoderAttention(embedding_dim,num_heads,num_layers,output_emb, 0  )

model = MatrixFactorizationLLM(num_users, user_embedder,num_items, args).to(args.device)


  torch.nn.init.xavier_uniform(m.weight)


In [5]:
model.load_state_dict(torch.load(model_path,map_location=torch.device('cuda')))
# user_embedder.load_state_dict(torch.load(user_embeddings_path,map_location=torch.device('mps')))
# model.user_embeddings = user_embedder

model.eval()
transformer_model = SentenceTransformer('sentence-transformers/sentence-t5-large').to(args.device)


In [103]:
summaries = {
        "drama": "summary: this collection of drama movies explores various themes, including crime, romance, and personal journeys. from british films to american dramas, these stories delve into the human condition, addressing issues of love, loss, and societal challenges.",
        "romance": "summary: a collection of romantic films spanning different subgenres, including romantic comedies, dramas, and musicals. these movies explore themes of love, relationships, and personal growth.",
        "action": "summary: action-packed films filled with thrilling crime, martial arts, cyberpunk, and sci-fi elements. from the adrenaline-fueled world of hackers to intense battles and jaw-dropping stunts, these movies will keep you on the edge of your",
        "scifi": "summary: a collection of science fiction films that explore themes such as time travel, cloning, and genetic experimentation. the movies range from thrilling adventures in dinosaur parks to cyberpunk thrillers set in a dystopian future.",
        "thriller": "summary: a collection of 1990s thriller films featuring crime, suspense, psychological twists, and action. the films star a variety of notable actors and cover genres such as cybercrime, neo-noir, psychological drama, and action horror.",
        "comedy": "summary: a collection of comedic films featuring romance, drama, coming-of-age, and superhero elements.",
        "crime": "summary: a collection of crime films spanning the early to mid-1990s, featuring comedy, cyberpunk, legal thriller, neo-noir, mystery, and superhero genres. the movies delve into themes of hacking, legal battles, heists,",
        "war": "summary: war movies range from powerful historical dramas depicting the atrocities of war to satirical comedies highlighting international relations, and intense dramas set against the backdrop of political unrest. these films explore themes of human resilience, activism, and the untold stories of",
        "mystery": "summary: a collection of mystery movies from the mid-90s, featuring elements of comedy, science fiction, psychological thrills, and neo-noir.",
        "adventure": "summary: adventure movies from the 90s featuring thrilling quests, exotic locations, and diverse characters. these films range from action-packed science fiction to heartfelt family adventures.",
        "children": "summary: a collection of beloved children's movies featuring animated adventures, musical numbers, and heartwarming stories.",
        "fantasy": "summary: a collection of fantasy films that transport viewers into magical worlds filled with adventure and imagination. these movies feature young protagonists embarking on extraordinary quests, often blending animation with live-action elements. from tales of knights and legendary tables to enchanted libraries and toys",
        "horror": "summary: a collection of horror films ranging from gothic comedies to supernatural slashers, exploring various themes like science fiction, psychological thrillers, and slasher genres.",
        "animation": "summary: these animated films offer fantasy adventures with memorable characters and music, capturing the imaginations of audiences with their vibrant animation and heartwarming stories.",
        "documentary": "summary: documentaries that explore important historical figures and events, including the assassination of malcolm x and the life of anne frank. also included are documentaries about controversial figures like heidi fleiss and mockumentaries like \"man of the year\"."
    }
  
          
def get_preds(summaries ,USER_INDEX): 
    args.embedding_module = 't5'
    topk= args.topk
    embs = get_genrewise_embeddings(summaries,args, model= transformer_model )


    genre_list = get_genres()
    embs_tens = model.user_embeddings.prepare_input(embs,genre_list).to(args.device)


    rating_pred = model.predict(embs_tens.unsqueeze(0)).cpu().detach().numpy()
    if USER_INDEX != -1:
        rating_pred[train_matrix[USER_INDEX].toarray() > 0] = 0


    # reference: https://stackoverflow.com/a/23734295, https://stackoverflow.com/a/20104162
    ind = np.argpartition(rating_pred, -topk)
    ind = ind[:, -topk:]
    arr_ind = rating_pred[np.arange(len(rating_pred))[:, None], ind]
    arr_ind_argsort = np.argsort(arr_ind)[np.arange(len(rating_pred)), ::-1]
    

    ranked_items = ind[np.arange(len(rating_pred))[:, None], arr_ind_argsort]
    recall_val = recall_at_k_one(actual_list_val[USER_INDEX], ranked_items[0].tolist(), 20)
    print(f"{recall_val=}")


    reversed_movie_title_to_id = {v: k for k, v in movie_title_to_id.items()}
    movie_titles_ranked = [f'{index} : {reversed_movie_title_to_id[i+1]} {id_genre_map[i+1]}' for index,i in enumerate(ranked_items[0][:20])]
    print(f"{rating_pred[:,ranked_items[:20]]=}")
    pp(movie_titles_ranked)



    return torch.tensor(ranked_items).to(args.device),torch.tensor(rating_pred).to(args.device)

In [193]:
summaries = {
        "children's" : 'movies based on video games sci-fi elements',
        # "adventure": "summary:",
    }

_ = get_preds(summaries,-1)

recall_val=0.08333333333333333
rating_pred[:,ranked_items[:20]]=array([[[0.84067714, 0.8211572 , 0.8196265 , 0.80309385, 0.79291224,
         0.7859359 , 0.7820298 , 0.7720497 , 0.7658509 , 0.73849   ,
         0.7327476 , 0.69349915, 0.6933176 , 0.67101103, 0.6452217 ,
         0.6359311 , 0.63181835, 0.6302438 , 0.626497  , 0.6227812 ]]],
      dtype=float32)
['0 : If Lucy Fell (1996) Comedy|Romance',
 "1 : Mighty Morphin Power Rangers: The Movie (1995) Action|Children's",
 '2 : Perez Family, The (1995) Comedy|Romance',
 '3 : Pretty Woman (1990) Comedy|Romance',
 "4 : Kid in King Arthur's Court, A (1995) "
 "Adventure|Children's|Comedy|Fantasy|Romance|Sci-Fi",
 "5 : Super Mario Bros. (1993) Action|Adventure|Children's|Sci-Fi",
 "6 : Toy Story (1995) Animation|Children's|Comedy",
 '7 : Natural Born Killers (1994) Action|Thriller',
 '8 : Usual Suspects, The (1995) Crime|Thriller',
 "9 : Young Poisoner's Handbook, The (1995) Crime",
 '10 : Thinner (1996) Horror|Thriller',
 '11 : Sabrina

In [127]:
summaries = {
        "romance": "summary: dramatic comedies",
        "action": "summary: ",
        "scifi": "summary: ",
        "thriller": "summary: ",
        "comedy": "summary: ",
        "crime": "summary: ",
        "war": "summary: ",
        "mystery": "summary: ",
        "adventure": "summary: ",
        "children": "summary: ",
        "fantasy": "summary:",
        "horror": "summary: ",
        "animation": "summary: ",
        "documentary": "summary: "
    }

_ = get_preds(summaries,10)

recall_val=0.0
rating_pred[:,ranked_items[:20]]=array([[[10.149776 ,  9.857829 ,  9.436834 ,  9.174857 ,  8.174489 ,
          7.913516 ,  7.643267 ,  7.352436 ,  7.123072 ,  6.7454185,
          6.6781964,  6.469304 ,  6.4074607,  6.3636374,  6.2962384,
          6.231206 ,  6.1615562,  6.1074853,  5.987812 ,  5.9270906]]],
      dtype=float32)
["0 : Toy Story (1995) Animation|Children's|Comedy",
 '1 : Usual Suspects, The (1995) Crime|Thriller',
 "2 : Mighty Morphin Power Rangers: The Movie (1995) Action|Children's",
 '3 : Perez Family, The (1995) Comedy|Romance',
 '4 : Sabrina (1995) Comedy|Romance',
 "5 : Young Poisoner's Handbook, The (1995) Crime",
 '6 : Rob Roy (1995) Drama|Romance|War',
 '7 : Natural Born Killers (1994) Action|Thriller',
 '8 : Milk Money (1994) Comedy|Romance',
 '9 : Just Cause (1995) Mystery|Thriller',
 '10 : Mixed Nuts (1994) Comedy',
 '11 : Some Folks Call It a Sling Blade (1993) Drama|Thriller',
 '12 : Hudsucker Proxy, The (1994) Comedy|Romance',
 '13 : Only

In [25]:
summaries = {
        "action": "summary: action-packed films from the 1990s featuring post-apocalyptic worlds, crime, superheroes, buddy cops, historical dramas, and intense sci-fi thrillers.",
        "scifi": "summary: a collection of sci-fi films spanning different subgenres including space opera, cyberpunk, neo-noir, and fantasy comedy. these movies explore themes such as alien arrivals, epic space battles, futuristic technology, and dystopian societies.",
        "thriller": "summary: a collection of intense thriller films with psychological elements, featuring mystery, crime, and suspense.",
        "comedy": "summary: a collection of comedy films from the 1990s with diverse themes including sports, romance, divorce, coming-of-age, drag queens, and family dynamics. these movies explore humor and life in various situations, offering a mixture of laughs and",
        "crime": "summary: intense crime movies featuring psychological thrillers, gangsters, and dramatic crime thrillers set in urban environments.",
        "drama": "summary: a collection of dramatic films featuring psychological thrillers, crime dramas, legal thrillers, and epic comedies. these movies explore themes of escape, justice, redemption, and the complexities of human relationships.",
        "adventure": "summary: adventure movies from the 90s featuring thrilling survival, fantasy elements, and action-packed quests.",
        "children": "summary: a collection of uplifting and family-friendly children's movies that feature adventure, comedy, and animated characters. these movies provide entertainment for young audiences and are enjoyable for the whole family.",
        "fantasy": "summary: this set of fantasy movies includes adventure, comedy, action, and supernatural elements. some films involve time travel or historical settings, while others weave fantasy into the holiday season. these movies feature magical and fantastical themes, with a mix of live",
        "romance": "summary: a collection of romantic films spanning different genres, including comedy, drama, and horror. these films feature various themes such as love, relationships, and coming-of-age.",
        "mystery": "summary: a collection of mysterious films that explore themes of crime, psychology, and suspense. featuring legal thrillers, psychological thrillers, acid westerns, and science fantasy. starring renowned actors and directors, these movies delve into the depths of human",
        "horror": "summary: a collection of horror movies with various themes such as body horror, gothic comedy, psychological thriller, supernatural, and romantic horror."
    }

get_preds(summaries,10)

rating_pred=array([[ 3.2983067 , -0.03292658, -2.6388288 , ..., -5.7749333 ,
        -6.4197893 , -6.347992  ]], dtype=float32)
recall_val=0.0
['0 : Johnny Mnemonic (1995) Action|Sci-Fi|Thriller',
 '1 : Usual Suspects, The (1995) Crime|Thriller',
 '2 : Mute Witness (1994) Thriller',
 "3 : Mighty Morphin Power Rangers: The Movie (1995) Action|Children's",
 '4 : In the Bleak Midwinter (1995) Comedy',
 '5 : Nick of Time (1995) Action|Thriller',
 '6 : Brothers McMullen, The (1995) Comedy',
 '7 : Something to Talk About (1995) Comedy|Drama|Romance',
 '8 : Man of No Importance, A (1994) Drama',
 '9 : Exit to Eden (1994) Comedy',
 '10 : Bhaji on the Beach (1993) Comedy|Drama',
 '11 : Crimson Tide (1995) Drama|Thriller|War',
 '12 : Two if by Sea (1996) Comedy|Romance',
 '13 : Waiting to Exhale (1995) Comedy|Drama',
 "14 : White Man's Burden (1995) Drama",
 '15 : Batman Forever (1995) Action|Adventure|Comedy|Crime',
 '16 : Sabrina (1995) Comedy|Romance',
 '17 : Lamerica (1994) Drama',
 '18 : Ni

(tensor([[ 770,   11,  772,  373,  717,  760,   25,  774, 1074,  724, 1474,   30,
          1039, 1310,  554,   28,  273, 1627,  721,    0]], device='cuda:0'),
 tensor([[ 3.2983, -0.0329, -2.6388,  ..., -5.7749, -6.4198, -6.3480]],
        device='cuda:0'))

In [79]:
summaries = {
         "drama": "summary: a collection of diverse drama films spanning different time periods and settings. these movies explore themes such as war, historical events, personal struggles, and cultural differences. prepare for engaging storytelling and compelling performances.",
        "thriller": "summary: a collection of gripping neo-noir  thrillers with captivating storytelling and compelling characters.",
        "comedy": "summary: a collection of lighthearted comedy-drama films with romantic elements, centered around various characters and their humorous situations." 
    }
get_preds(summaries,8)


recall_val=0.0
rating_pred[:,ranked_items[:20]]=array([[[4.88187  , 4.796911 , 4.2220984, 4.118821 , 3.9036672,
         3.8293638, 3.7943988, 3.426751 , 3.4190583, 3.302624 ,
         3.2861614, 3.1626375, 3.0910912, 3.0854669, 3.0730765,
         3.0154707, 2.9396334, 2.9084356, 2.9044778, 2.902322 ]]],
      dtype=float32)
['0 : City Hall (1996) Drama|Thriller',
 "1 : Kid in King Arthur's Court, A (1995) "
 "Adventure|Children's|Comedy|Fantasy|Romance|Sci-Fi",
 '2 : Quiz Show (1994) Drama',
 "3 : Mighty Morphin Power Rangers: The Movie (1995) Action|Children's",
 "4 : Toy Story (1995) Animation|Children's|Comedy",
 '5 : Natural Born Killers (1994) Action|Thriller',
 '6 : Forget Paris (1995) Comedy|Romance',
 "7 : Young Poisoner's Handbook, The (1995) Crime",
 '8 : Nemesis 2: Nebula (1995) Action|Sci-Fi|Thriller',
 '9 : My Crazy Life (Mi vida loca) (1993) Drama',
 '10 : Circle of Friends (1995) Drama|Romance',
 '11 : Shopping (1994) Action|Thriller',
 '12 : Queen Margot (Reine Margot

(tensor([[ 695,  559,   57,  373,    0,   52, 1040,  546, 1595, 1420,  723, 1594,
           729,  789,  275,  548, 1217,  716, 1039,  784]], device='cuda:0'),
 tensor([[ 3.9037,  0.7154, -2.0728,  ..., -4.4861, -5.6669, -5.9640]],
        device='cuda:0'))

In [96]:
summaries = {
       "drama": "summary: a collection of diverse drama films spanning different time periods and settings. these movies explore themes such as war,. prepare for engaging storytelling and compelling performances.",
        "thriller": "summary: a collection of gripping neo-noir crime thrillers with captivating storytelling and compelling characters.",
        "comedy": "summary: a collection of lighthearted comedy-drama films with romantic elements, centered around various characters and their humorous situations." 
        }
get_preds(summaries,8)


recall_val=0.0
rating_pred[:,ranked_items[:20]]=array([[[4.097349 , 3.949589 , 3.6617706, 3.3679554, 3.317356 ,
         3.2075047, 3.07199  , 2.938858 , 2.9328187, 2.9130967,
         2.8185902, 2.7796543, 2.7576113, 2.718648 , 2.6585126,
         2.6254716, 2.6081107, 2.5756125, 2.5701537, 2.5588923]]],
      dtype=float32)
['0 : City Hall (1996) Drama|Thriller',
 "1 : Kid in King Arthur's Court, A (1995) "
 "Adventure|Children's|Comedy|Fantasy|Romance|Sci-Fi",
 "2 : Mighty Morphin Power Rangers: The Movie (1995) Action|Children's",
 '3 : Quiz Show (1994) Drama',
 "4 : Toy Story (1995) Animation|Children's|Comedy",
 '5 : Forget Paris (1995) Comedy|Romance',
 '6 : Natural Born Killers (1994) Action|Thriller',
 "7 : Young Poisoner's Handbook, The (1995) Crime",
 '8 : Shopping (1994) Action|Thriller',
 '9 : Friday (1995) Comedy',
 '10 : Circle of Friends (1995) Drama|Romance',
 '11 : Juror, The (1996) Drama|Thriller',
 '12 : Johnny Mnemonic (1995) Action|Sci-Fi|Thriller',
 '13 : Two if 

(tensor([[ 695,  559,  373,   57,    0, 1040,   52,  546, 1594, 1217,  723,  716,
           770, 1039,  717,  719, 1595,  275,  372,  548]], device='cuda:0'),
 tensor([[ 3.3174,  0.4781, -1.9608,  ..., -4.0396, -5.1065, -5.3225]],
        device='cuda:0'))

### Checking climber results

In [98]:
with open('/home/mila/e/emiliano.penaloza/LLM4REC/climber/result_dict_down.pkl','rb') as f:
    data = pickle.load(f)
first = data[5]
user_id = 5 
data

{0: {'old': ["action: summary: action-packed movies from the 90s with a mix of cyberpunk, post-apocalyptic, and martial arts themes. these films feature intense action sequences and are directed by renowned filmmakers. drama: summary: a collection of drama films ranging from disaster survival, epic western, historical biographical, romantic, and psychological thriller. these films explore themes of love, loss, personal struggles, and the human condition. romance: summary: a collection of romantic movies from various genres including drama, comedy, musical, and fantasy. these films explore themes of love and relationships, featuring a mix of well-known actors and diverse storylines. scifi: summary: a collection of sci-fi films ranging from space operas to post-apocalyptic adventures, with elements of cyberpunk and dystopia. the movies explore themes of technology, survival, and thrilling action, featuring memorable characters and imaginative settings. thriller: summary: a collection of 

In [18]:
import re
def make_string_dict(data):
    
    genre_summary_dict = {}
    data = data[0].lower().replace('\n',' ').replace('-','').replace('summary:','summary')

    # Use regular expression to find genre and summary information
    matches = re.finditer(r'(\w+): (.+?)(?=\w+:|$)', data)

    # Iterate through the matches and extract genre and summary information
    for match in matches:
        genre, summary = match.group(1), match.group(2)
        genre_summary_dict[genre] = summary.replace('summary','summary:').strip()

    # Convert the dictionary to JSON
    
    return genre_summary_dict
summaries = make_string_dict(first['old'])
summaries_new = make_string_dict(first['new'])


In [22]:
preds, scores = get_preds(summaries,user_id)
scores[:,preds]


rating_pred=array([[ 2.9418287,  1.3047427, -2.2056112, ..., -6.270923 , -7.307493 ,
        -7.2975984]], dtype=float32)
['0 : Johnny Mnemonic (1995) Action|Sci-Fi|Thriller',
 "1 : Mighty Morphin Power Rangers: The Movie (1995) Action|Children's",
 '2 : Living in Oblivion (1995) Comedy',
 '3 : Exit to Eden (1994) Comedy',
 '4 : Mad Love (1995) Drama|Romance',
 '5 : Bye Bye, Love (1995) Comedy',
 '6 : August (1996) Drama',
 '7 : Yankee Zulu (1994) Comedy|Drama',
 '8 : Destiny Turns on the Radio (1995) Comedy',
 '9 : In the Bleak Midwinter (1995) Comedy',
 '10 : Hate (Haine, La) (1995) Drama',
 '11 : Before Sunrise (1995) Drama|Romance',
 '12 : Quiz Show (1994) Drama',
 '13 : Solo (1996) Action|Sci-Fi|Thriller',
 '14 : Kids in the Hall: Brain Candy (1996) Comedy',
 '15 : Bhaji on the Beach (1993) Comedy|Drama',
 '16 : Black Sheep (1996) Comedy',
 '17 : Once Upon a Time... When We Were Colored (1995) Drama',
 '18 : Clean Slate (1994) Comedy',
 '19 : Calendar Girl (1993) Drama']


tensor([[[5.3849, 4.7300, 4.1718, 4.0173, 3.7522, 3.6169, 3.4238, 3.2670,
          3.2548, 3.2110, 3.1619, 3.1258, 3.1133, 3.0163, 2.9157, 2.8986,
          2.8662, 2.8463, 2.8085, 2.7866]]], device='cuda:0')

In [23]:
preds, scores = get_preds(summaries_new,user_id)
scores[:,preds]


rating_pred=array([[ 2.621348 ,  1.2781109, -1.8051425, ..., -5.7661824, -6.681301 ,
        -6.7262344]], dtype=float32)
['0 : Johnny Mnemonic (1995) Action|Sci-Fi|Thriller',
 "1 : Mighty Morphin Power Rangers: The Movie (1995) Action|Children's",
 '2 : Living in Oblivion (1995) Comedy',
 '3 : Exit to Eden (1994) Comedy',
 '4 : Mad Love (1995) Drama|Romance',
 '5 : Bye Bye, Love (1995) Comedy',
 '6 : Quiz Show (1994) Drama',
 '7 : Yankee Zulu (1994) Comedy|Drama',
 '8 : In the Bleak Midwinter (1995) Comedy',
 '9 : August (1996) Drama',
 '10 : Before Sunrise (1995) Drama|Romance',
 '11 : Destiny Turns on the Radio (1995) Comedy',
 '12 : Bhaji on the Beach (1993) Comedy|Drama',
 '13 : Once Upon a Time... When We Were Colored (1995) Drama',
 '14 : Clean Slate (1994) Comedy',
 '15 : Solo (1996) Action|Sci-Fi|Thriller',
 '16 : Hate (Haine, La) (1995) Drama',
 '17 : Strange Days (1995) Action|Crime|Sci-Fi',
 '18 : Safe (1995) Thriller',
 '19 : Black Sheep (1996) Comedy']


tensor([[[4.8019, 4.2590, 3.7453, 3.5889, 3.3896, 3.1480, 2.9575, 2.9272,
          2.8972, 2.8730, 2.7684, 2.7650, 2.7613, 2.7112, 2.7105, 2.7018,
          2.6813, 2.5314, 2.5257, 2.4328]]], device='cuda:0')

In [None]:

len(first['new'][0].split()),len(first['old'][0].split())

(193, 194)

In [None]:
first

{'old': ["action: summary: action-packed films with thrilling plots and intense characters. from sci-fi to crime, these movies deliver excitement and adrenaline. thriller: summary: a collection of thrilling films spanning various genres, including medical disasters, westerns, crime dramas, and spy thrillers, featuring intense storylines and memorable performances. adventure: summary: adventure awaits as a kid is transported to a fantastical world, toys come to life, and a spy battles against enemies in thrilling missions. children: summary: a collection of children's adventure movies with a touch of fantasy and comedy. comedy: summary: a collection of comedic films featuring various themes, including prank calls, blended families, romantic entanglements, and the comedic dynamics of unusual situations. romance: summary: a collection of romantic comedy and drama films set in various time periods and featuring characters navigating love, relationships, and unexpected twists. crime: summar

In [42]:
def find_different_word_indices(s1, s2):
    # Split the strings into lists of words
    words_s1 = s1.split()[88:]
    words_s2 = s2.split()[87:]

    # Find the minimum length of the two lists
    min_len = min(len(words_s1), len(words_s2))

    # Initialize a list to store the indices of differences
    different_indices = []

    # Iterate through each word and compare
    i = 0
    k=0
    while i < len(words_s1):

        while  i < len(words_s1) and words_s1[i] != words_s2[k]:
            print(f"{ words_s2[k]=}")
            print(f"{words_s1[i]=}")
            different_indices.append(i)
            i += 1
        i+=1
        k+=1

    return different_indices
find_different_word_indices(first['old'][0],first['new'][0])



[]