In [1]:
from embeddings import EmbeddingsResponder
from entity_extraction import Extractor
from factual import FactualResponder
from data_repository import DataRepository
from intent_classifier import IntentClassifier, EmbeddingBasedIntentClassifier, MLPBasedIntentClassifier
from recommender import RecommendationResponder

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data_repository = DataRepository()



In [5]:
intent_classifier_emb = EmbeddingBasedIntentClassifier(data_repository)
intent_classifier_mlp = MLPBasedIntentClassifier(data_repository)
extractor = Extractor(data_repository)
embeddings = EmbeddingsResponder(data_repository, extractor, intent_classifier=intent_classifier_mlp, emb_intent_classifier=intent_classifier_emb)
factual = FactualResponder(data_repository, extractor, emb_intent_classifier=intent_classifier_emb, mlp_intent_classifier=intent_classifier_mlp)
recommender = RecommendationResponder(data_repository, extractor, mlp_intent_classifier=intent_classifier_mlp)

Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [9]:
recommender.answer_query("Recommend movies like Baby's Day Out and Home Alone")

["Baby's Day Out", 'Home Alone', 'Home Alone 2: Lost in New York', 'Home Alone 3', 'A Magician Home Alone', 'Planes, Trains and Automobiles', 'Only the Lonely', 'Dennis the Menace', 'Bedtime Stories', 'Uncle Buck', 'The Nightmare Before Christmas', 'Christmas with the Kranks', 'Addams Family Values', 'Relative Strangers', 'Adventures in Babysitting', 'Steal Big Steal Little', 'Jingle All the Way', 'The Goonies', "National Lampoon's European Vacation", 'Agent Cody Banks 2: Destination London']
['Home Alone 2: Lost in New York', 'Home Alone 3', 'A Magician Home Alone', 'Planes, Trains and Automobiles', 'Only the Lonely', 'Dennis the Menace', 'Bedtime Stories', 'Uncle Buck', 'The Nightmare Before Christmas', 'Christmas with the Kranks', 'Addams Family Values', 'Relative Strangers', 'Adventures in Babysitting', 'Steal Big Steal Little', 'Jingle All the Way', 'The Goonies', "National Lampoon's European Vacation", 'Agent Cody Banks 2: Destination London']


['Home Alone 2: Lost in New York', 'Home Alone 3', 'A Magician Home Alone']

In [31]:
import pandas as pd

movies_df = pd.read_csv("data/movies.csv")
ratings_df = pd.read_csv("data/ratings.csv")

In [34]:
movies_df['genres'] = movies_df['genres'].apply(lambda x: x.split("|"))

In [35]:
movies_df

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]"
1,2,Jumanji (1995),"[Adventure, Children, Fantasy]"
2,3,Grumpier Old Men (1995),"[Comedy, Romance]"
3,4,Waiting to Exhale (1995),"[Comedy, Drama, Romance]"
4,5,Father of the Bride Part II (1995),[Comedy]
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),"[Action, Animation, Comedy, Fantasy]"
9738,193583,No Game No Life: Zero (2017),"[Animation, Comedy, Fantasy]"
9739,193585,Flint (2017),[Drama]
9740,193587,Bungo Stray Dogs: Dead Apple (2018),"[Action, Animation]"


In [42]:
# get horror films
horror_movies = movies_df[movies_df['genres'].apply(lambda x: 'Horror' in x and 'Sci-Fi' in x)]

In [43]:
horror_movies[:10]

Unnamed: 0,movieId,title,genres
166,196,Species (1995),"[Horror, Sci-Fi]"
235,273,Mary Shelley's Frankenstein (Frankenstein) (1994),"[Drama, Horror, Sci-Fi]"
290,332,Village of the Damned (1995),"[Horror, Sci-Fi]"
370,426,Body Snatchers (1993),"[Horror, Sci-Fi, Thriller]"
447,512,"Puppet Masters, The (1994)","[Horror, Sci-Fi]"
522,610,Heavy Metal (1981),"[Action, Adventure, Animation, Horror, Sci-Fi]"
523,611,Hellraiser: Bloodline (1996),"[Action, Horror, Sci-Fi]"
741,968,Night of the Living Dead (1968),"[Horror, Sci-Fi, Thriller]"
794,1037,"Lawnmower Man, The (1992)","[Action, Horror, Sci-Fi, Thriller]"
902,1200,Aliens (1986),"[Action, Adventure, Horror, Sci-Fi]"


In [38]:
ratings_df

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [39]:
recommender.answer_query("I liked Robert De Niro in Taxi Driver, can you recommend me something similar?")

['Taxi Driver', 'Robert De Niro', 'Travis Bickle', 'Bernard Herrmann', 'Martin Scorsese', 'Jean-Marie Moncelet', 'Leonard Harris', 'Ennio Morricone', 'Diahnne Abbott', 'Elmer Bernstein', 'Purvis Short', 'Raging Bull', 'Al Pacino', 'Albert Brooks', 'Barry Primus', 'Carl Jung', 'Tony Gilroy', 'Luchino Visconti', 'Joe Pesci', 'Harold Hecht']
['Travis Bickle', 'Bernard Herrmann', 'Martin Scorsese', 'Jean-Marie Moncelet', 'Leonard Harris', 'Ennio Morricone', 'Diahnne Abbott', 'Elmer Bernstein', 'Purvis Short', 'Raging Bull', 'Al Pacino', 'Albert Brooks', 'Barry Primus', 'Carl Jung', 'Tony Gilroy', 'Luchino Visconti', 'Joe Pesci', 'Harold Hecht']


['Travis Bickle', 'Bernard Herrmann', 'Martin Scorsese']

In [44]:
extractor.extract_ner("I liked Robert De Niro in Taxi Driver, can you recommend me something similar?")

['Robert De Niro', 'Taxi Driver']

In [47]:
movies_df[movies_df['title'].str.contains("Star Wars")]

Unnamed: 0,movieId,title,genres
224,260,Star Wars: Episode IV - A New Hope (1977),"[Action, Adventure, Sci-Fi]"
898,1196,Star Wars: Episode V - The Empire Strikes Back...,"[Action, Adventure, Sci-Fi]"
911,1210,Star Wars: Episode VI - Return of the Jedi (1983),"[Action, Adventure, Sci-Fi]"
1979,2628,Star Wars: Episode I - The Phantom Menace (1999),"[Action, Adventure, Sci-Fi]"
3832,5378,Star Wars: Episode II - Attack of the Clones (...,"[Action, Adventure, Sci-Fi, IMAX]"
5896,33493,Star Wars: Episode III - Revenge of the Sith (...,"[Action, Adventure, Sci-Fi]"
6823,61160,Star Wars: The Clone Wars (2008),"[Action, Adventure, Animation, Sci-Fi]"
7367,79006,Empire of Dreams: The Story of the 'Star Wars'...,[Documentary]
8683,122886,Star Wars: Episode VII - The Force Awakens (2015),"[Action, Adventure, Fantasy, Sci-Fi, IMAX]"
8908,135216,The Star Wars Holiday Special (1978),"[Adventure, Children, Comedy, Sci-Fi]"


In [52]:
mov1 = "Baby's Day Out"
mov2 = "Home Alone"

# get the movies where the genres overlap between the two movies
intersecting_genre1 = movies_df[movies_df['title'].str.contains(mov1)]['genres']
intersecting_genre2 = movies_df[movies_df['title'].str.contains(mov2)]['genres']
combined= set(intersecting_genre1.values[0]).intersection(set(intersecting_genre2.values[0]))

In [53]:
combined

{'Comedy'}

In [54]:
list_of_all_genres = []
for i in range(len(movies_df)):
    list_of_all_genres.extend(movies_df['genres'].values[i])

list_of_all_genres = list(set(list_of_all_genres))

In [55]:
list_of_all_genres

['Drama',
 'Fantasy',
 'Documentary',
 'Romance',
 'Animation',
 'Comedy',
 'Sci-Fi',
 'Horror',
 'Film-Noir',
 'War',
 'Action',
 '(no genres listed)',
 'Thriller',
 'Mystery',
 'IMAX',
 'Western',
 'Adventure',
 'Children',
 'Musical',
 'Crime']

In [75]:
from fuzzywuzzy import process

process.extractOne("Give me recommendations of a  film", list_of_all_genres)[0]


'Film-Noir'

In [65]:
extractor.extract_ner("Give me recommendations of a horror film")

[]