In [3]:
import pandas as pd
import numpy as np

# Read the dataset
data = pd.read_csv('moviereviews.csv')

# Calculate movie review vectors
movie_reviews = data.pivot_table(index='movie', columns='review', aggfunc='size', fill_value=0)
movie_vectors = movie_reviews.values

# Find similarity using dot product and angle between vectors
def cosine_similarity(v1, v2):
    dot_product = np.dot(v1, v2)
    norm_v1 = np.linalg.norm(v1)
    norm_v2 = np.linalg.norm(v2)
    cosine_sim = dot_product / (norm_v1 * norm_v2)
    return cosine_sim

num_movies = movie_vectors.shape[0]
similarities = np.zeros((num_movies, num_movies))

for i in range(num_movies):
    for j in range(i+1, num_movies):
        similarity = cosine_similarity(movie_vectors[i], movie_vectors[j])
        similarities[i, j] = similarity
        similarities[j, i] = similarity

# Identify top 3 pairs of movies that are very much alike
top_pairs = []
for i in range(num_movies):
    for j in range(i+1, num_movies):
        top_pairs.append((data['movie'][i], data['movie'][j], similarities[i, j]))

top_pairs.sort(key=lambda x: x[2], reverse=True)
top_pairs = top_pairs[:3]

for pair in top_pairs:
    print(f"Movies: {pair[0]}, {pair[1]}, Similarity: {pair[2]:.4f}")

Movies: The Lord of the Rings The Two Towers, Inception, Similarity: 0.0000
Movies: The Lord of the Rings The Two Towers, Spiderman Across the spider verse, Similarity: 0.0000
Movies: The Lord of the Rings The Two Towers, The Dark Knight, Similarity: 0.0000


In [12]:
print(movie_reviews)

review                                 Best live action portrayal beat organized crime in Gotham enigmatic villain brutality violence  \
movie                                                                                                                                   
Before Sunrise                                                                         0                                                
Eternal Sunshine of the Spotless Mind                                                  0                                                
Gone with the wind                                                                     0                                                
In the Mood for Love                                                                   0                                                
Inception                                                                              0                                                
It happened one night                    

In [13]:
print(movie_vectors)

[[0 0 0 0 1 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 1 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0 0 0 1 0 0 0]]


In [15]:
print(data).head(5)

                                    movie  \
0    The Lord of the Rings The Two Towers   
1                               Inception   
2       Spiderman Across the spider verse   
3                         The Dark Knight   
4                        Three colors red   
5                   It happened one night   
6                    In the Mood for Love   
7                          Before Sunrise   
8                      Gone with the wind   
9   Eternal Sunshine of the Spotless Mind   
10               The Shawshank Redemption   
11                            Raging Bull   
12                     Lawrence of Arabia   

                                               review  
0   remarkable display of fantasy action powerful ...  
1   implanting stealing idea destroy gripping acti...  
2   mind bending wild action sequences intimate em...  
3   Best live action portrayal beat organized crim...  
4   mesmerising friendship turned love profound un...  
5   Romantic comedy screwball com

AttributeError: 'NoneType' object has no attribute 'head'