In [5]:
import pandas as pd
import torch
from transformers import BertTokenizer, BertModel
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np


In [6]:
def load_bert_model():
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    model = BertModel.from_pretrained('bert-base-uncased')
    return tokenizer, model

In [7]:
def encode_text(text, tokenizer, model):
    inputs = tokenizer(
        text,
        return_tensors='pt',
        truncation=True,
        padding=True,
        max_length=512
    )

    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).squeeze()

In [8]:
def load_data(file_path):
    return pd.read_csv(file_path)

In [9]:
def preprocess_data(df):
    df.dropna(
        subset=['overview'],
        inplace=True
    )

    return df

In [10]:
def get_similar_movies(query, df, tokenizer, model, top_n=5):
    query_embedding = encode_text(query, tokenizer, model)

    movie_embeddings = []
    for overview in df['overview']:
        movie_embeddings.append(encode_text(overview, tokenizer, model))

    movie_embeddings = torch.stack(movie_embeddings)
    cosine_similarities = cosine_similarity(query_embedding.unsqueeze(0), movie_embeddings.numpy())

    similarity_scores = cosine_similarities.flatten()
    top_indices = similarity_scores.argsort()[-top_n:][::-1]

    recommendations = []
    for idx in top_indices:
        movie = df.iloc[idx]
        recommendations.append((movie['title'], similarity_scores[idx]))

    return recommendations

In [11]:
def recommend_movies(query, file_path, top_n=5):
    df = load_data(file_path)
    df = preprocess_data(df)

    tokenizer, model = load_bert_model()

    recommendations = get_similar_movies(query, df, tokenizer, model, top_n)

    print(f"Top {top_n} Movie Recommendations:")
    for idx, (title, score) in enumerate(recommendations, start=1):
        print(f"{idx}. {title} (Similarity Score: {score:.4f})")

if __name__ == '__main__':
    user_query = "I love thrilling action movies set in space, with a comedic twist."
    dataset_path = '/content/short_TMDB-movies copy.csv'  # Change this to your actual file path

    recommend_movies(user_query, dataset_path)

Top 5 Movie Recommendations:
1. Bo Burnham: Inside (Similarity Score: 0.7178)
2. My Hero Academia: Heroes Rising (Similarity Score: 0.7158)
3. La Jetée (Similarity Score: 0.7135)
4. Primal: Tales of Savagery (Similarity Score: 0.7116)
5. Everything Everywhere All at Once (Similarity Score: 0.7079)
