<a href="https://colab.research.google.com/github/Harshitmax/Movie-Recommendation-System/blob/main/Movie%20Recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F
from sklearn.metrics.pairwise import cosine_similarity

  from tqdm.autonotebook import tqdm, trange


In [None]:
movies = pd.read_csv('movies.csv')
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
27273,131254,Kein Bund für's Leben (2007),Comedy
27274,131256,"Feuer, Eis & Dosenbier (2002)",Comedy
27275,131258,The Pirates (2014),Adventure
27276,131260,Rentun Ruusu (2001),(no genres listed)


In [None]:
movies['description'] = 'Title: ' + movies['title'] + ', Genre: ' + movies['genres']
movies

Unnamed: 0,movieId,title,genres,description
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,"Title: Toy Story (1995), Genre: Adventure|Anim..."
1,2,Jumanji (1995),Adventure|Children|Fantasy,"Title: Jumanji (1995), Genre: Adventure|Childr..."
2,3,Grumpier Old Men (1995),Comedy|Romance,"Title: Grumpier Old Men (1995), Genre: Comedy|..."
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,"Title: Waiting to Exhale (1995), Genre: Comedy..."
4,5,Father of the Bride Part II (1995),Comedy,"Title: Father of the Bride Part II (1995), Gen..."
...,...,...,...,...
27273,131254,Kein Bund für's Leben (2007),Comedy,"Title: Kein Bund für's Leben (2007), Genre: Co..."
27274,131256,"Feuer, Eis & Dosenbier (2002)",Comedy,"Title: Feuer, Eis & Dosenbier (2002), Genre: C..."
27275,131258,The Pirates (2014),Adventure,"Title: The Pirates (2014), Genre: Adventure"
27276,131260,Rentun Ruusu (2001),(no genres listed),"Title: Rentun Ruusu (2001), Genre: (no genres ..."


In [None]:
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0]
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')

def get_embeddings(sentences):
    encoded_input = tokenizer(sentences, padding = True, truncation = True, return_tensors = 'pt')
    with torch.no_grad():
        model_output = model(**encoded_input)
    sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
    sentence_embeddings = F.normalize(sentence_embeddings, p = 2, dim = 1)
    return sentence_embeddings

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

In [None]:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
movies_embeddings = model.encode(movies['description'].tolist())

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
def get_recommendations(query, embeddings, df, top_n = 5):
    query_embedding = model.encode([query])
    similarities = cosine_similarity(query_embedding, embeddings)
    top_indices = similarities[0].argsort()[-top_n:][::-1]
    return df.iloc[top_indices]

In [None]:
query = "Animated movie about bears that I can watch with kids"
recommendations = get_recommendations(query, movies_embeddings, movies)
recommendations[['title', 'genres']]

Unnamed: 0,title,genres
5442,Care Bears Movie II: A New Generation (1986),Animation|Children
5441,"Care Bears Movie, The (1985)",Animation|Children|Fantasy
6779,Brother Bear (2003),Adventure|Animation|Children
23254,Bears (2014),Documentary
5385,"Country Bears, The (2002)",Children|Comedy


In [None]:
query = "Movies falling under the comedy and horror genres"
recommendations = get_recommendations(query, movies_embeddings, movies)
recommendations[['title', 'genres']]

Unnamed: 0,title,genres
13907,Hood of Horror (2006),Comedy|Drama|Horror
7415,Versus (2000),Action|Comedy|Fantasy|Horror
22614,American Scary (2006),Comedy|Documentary|Horror
20441,Lo (2009),Comedy|Horror|Romance
24907,Detention of the Dead (2012),Comedy|Horror


In [None]:
query = "Funny hindi comedies to watch"
recommendations = get_recommendations(query, movies_embeddings, movies)
recommendations[['title', 'genres']]

Unnamed: 0,title,genres
5491,Indian Summer (1993),Comedy|Drama
15051,India (Indien) (1993),Comedy|Drama
25242,Loins of Punjab Presents (2007),Comedy
15018,Delhi-6 (2009),Comedy|Crime|Drama
11558,My Bollywood Bride (2006),Comedy|Drama|Romance


In [None]:
uery = "Romantic comedies released in the 1990s"
recommendations = get_recommendations(query, movies_embeddings, movies)
recommendations[['title', 'genres']]

Unnamed: 0,title,genres
5491,Indian Summer (1993),Comedy|Drama
15051,India (Indien) (1993),Comedy|Drama
25242,Loins of Punjab Presents (2007),Comedy
15018,Delhi-6 (2009),Comedy|Crime|Drama
11558,My Bollywood Bride (2006),Comedy|Drama|Romance
