In [None]:
import pandas as pd
import numpy as np
import sys

In [None]:
## Mount the Google Drive folder for accessing data etc
if('google.colab' in sys.modules):
    from google.colab import drive
    drive.mount('/content/drive', force_remount = True)
    # Change path below starting from /content/drive/MyDrive/Colab Notebooks/
    # depending on how data is organized inside your Colab Notebooks folder in
    # Google Drive
    DIR = '/content/drive/MyDrive/Colab Notebooks/ALA_assign'
    DATA_DIR = DIR+'/Data/'
else:
    DATA_DIR='Data/'

In [None]:
FILENAME = DATA_DIR + 'moviereviews.csv'
df = pd.read_csv(FILENAME)
df.head()

In [None]:
# Create a dictionary to store movie review vectors
movie_vectors = {}

# Calculate movie review vectors
for index, row in df.iterrows():
    movie = row['movie']
    review = row['review']
    words = review.split()
    vector = np.zeros(300)  # Assuming review vectors are of size 300
    for word in words:
        # Add word vectors to the movie vector (you might need pretrained word vectors for accurate results)
    #vector += word_vectors[word]  # You need to have pre-trained word vectors for this step
     movie_vectors[movie] = vector

# Calculate similarity using dot product formula
def calculate_similarity_dot(movie1, movie2):
    return np.dot(movie_vectors[movie1], movie_vectors[movie2])

# Calculate similarity using manual multiplication and addition
def calculate_similarity_manual(movie1, movie2):
    return sum(movie_vectors[movie1] * movie_vectors[movie2])

# Calculate the angle between two vectors in degrees
def calculate_angle(movie1, movie2):
    dot_product = calculate_similarity_dot(movie1, movie2)
    magnitude_product = np.linalg.norm(movie_vectors[movie1]) * np.linalg.norm(movie_vectors[movie2])
    cosine_similarity = dot_product / magnitude_product
    angle_rad = np.arccos(cosine_similarity)
    return np.degrees(angle_rad)

# Find top 3 similar movie pairs
similar_pairs = []
for i, movie1 in enumerate(movie_vectors):
    for j, movie2 in enumerate(movie_vectors):
        if i < j:
            similarity = calculate_similarity_dot(movie1, movie2)
            angle = calculate_angle(movie1, movie2)
            similar_pairs.append((movie1, movie2, similarity, angle))

# Sort the similar pairs by similarity
similar_pairs.sort(key=lambda x: x[2], reverse=True)

# Print the top 3 similar movie pairs
print("Top 3 similar movie pairs:")
for pair in similar_pairs[:3]:
    print(f"{pair[0]} - {pair[1]}: Similarity = {pair[2]}, Angle = {pair[3]} degrees")
