In [3]:
#write a program that identifies top 3 pairs of movies that are very much alike

import pandas as pd
import numpy as np


In [5]:
# Read the movie review dataset
data = pd.read_csv("moviereviews.csv")
data.head()

Unnamed: 0,movie,review
0,The Lord of the Rings The Two Towers,remarkable display of fantasy action powerful ...
1,Inception,implanting stealing idea destroy gripping acti...
2,Spiderman Across the spider verse,mind bending wild action sequences intimate em...
3,The Dark Knight,Best live action portrayal beat organized crim...
4,Three colors red,mesmerising friendship turned love profound un...


In [6]:

all_words = set()
for review in data['review']:
    words = review.split()
    all_words.update(words)

# movie review vectors
movie_vectors = {}

for i, row in data.iterrows():
    movie = row['movie']
    review = row['review'].split()
    review_vector = np.array([1 if word in review else 0 for word in all_words])
    movie_vectors[movie] = review_vector

In [22]:
%%time
#using angle between vectors formula
similarity_scores = []

for movie1 in movie_vectors:
    for movie2 in movie_vectors:
        if movie1 != movie2:
            vector1 = movie_vectors[movie1]
            vector2 = movie_vectors[movie2]
            cosine_similarity = np.dot(vector1, vector2) / (np.linalg.norm(vector1) * np.linalg.norm(vector2))
            if (movie1, movie2) not in similarity_scores and (movie2, movie1) not in similarity_scores:
                similarity_scores.append((movie1, movie2, cosine_similarity))

similarity_scores.sort(key=lambda x: x[2], reverse=True)

top_3_pairs = similarity_scores[:6]




CPU times: total: 0 ns
Wall time: 11 ms


In [18]:
#top 3 similar movies using angle similarity
for pair in top_3_pairs:
    movie1, movie2, similarity = pair
    print(f"{movie1} and {movie2} have an angle similarity of {similarity:.2f}")

The Lord of the Rings The Two Towers and Inception have an angle similarity of 0.26
Inception and The Lord of the Rings The Two Towers have an angle similarity of 0.26
It happened one night and Gone with the wind have an angle similarity of 0.18
Gone with the wind and It happened one night have an angle similarity of 0.18
It happened one night and Before Sunrise have an angle similarity of 0.17
Before Sunrise and It happened one night have an angle similarity of 0.17


In [23]:
%%time
# handcoded calculations
similarity_scores = []

for movie1 in movie_vectors:
    for movie2 in movie_vectors:
        if movie1 != movie2:
            vector1 = movie_vectors[movie1]
            vector2 = movie_vectors[movie2]
            
            dot_product = sum(v1 * v2 for v1, v2 in zip(vector1, vector2))
            
            mag1 = sum(v ** 2 for v in vector1) ** 0.5
            mag2 = sum(v ** 2 for v in vector2) ** 0.5
            
            cosine_similarity = dot_product / (mag1 * mag2) if mag1 * mag2 != 0 else 0
            
            if (movie1, movie2) not in similarity_scores and (movie2, movie1) not in similarity_scores:
                similarity_scores.append((movie1, movie2, cosine_similarity))

similarity_scores.sort(key=lambda x: x[2], reverse=True)

top_3_pairs = similarity_scores[:6]




CPU times: total: 0 ns
Wall time: 5.02 ms


In [21]:
# Print top similar movies
for pair in top_3_pairs:
    movie1, movie2, similarity = pair
    print(f"{movie1} and {movie2} have a cosine similarity of {similarity:.2f}")

The Lord of the Rings The Two Towers and Inception have a cosine similarity of 0.26
Inception and The Lord of the Rings The Two Towers have a cosine similarity of 0.26
It happened one night and Gone with the wind have a cosine similarity of 0.18
Gone with the wind and It happened one night have a cosine similarity of 0.18
It happened one night and Before Sunrise have a cosine similarity of 0.17
Before Sunrise and It happened one night have a cosine similarity of 0.17
