# Description

* Simulates movie descriptions as features
* Uses TF-IDF to convert descriptions into vectors
* Computes cosine similarity between item features
* Recommends movies similar to Movie1 based on textual features

# 0. Setting Up the Env

## 0.1 Import Packages

In [10]:
import numpy as np
import pandas as pd


from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

## 0.2 Constants

# 1. Data

In [19]:
items = ['Final Destination: Bloodlines',
 'Materialists', 
 'Mission: Impossible - The Final Reckoning', 
 'Sinners', 
 'Superman']



descriptions = [
    "Plagued by a recurring violent nightmare, a college student returns home to find the one person who can break the cycle and save her family from the horrific fate that inevitably awaits them.",
    "A young, ambitious New York City matchmaker finds herself torn between the perfect match and her imperfect ex.",
    "Hunt and the IMF pursue a dangerous AI called the Entity that's infiltrated global intelligence. With governments and a figure from his past in pursuit, Hunt races to stop it from forever changing the world.",
    "Trying to leave their troubled lives behind, twin brothers return to their hometown to start again, only to discover that an even greater evil is waiting to welcome them back.",
    "Superman must reconcile his alien Kryptonian heritage with his human upbringing as reporter Clark Kent. As the embodiment of truth, justice and the human way he soon finds himself in a world that views these as old-fashioned."
]


genres = [
    "Horror, Thriller, Mystery",
    "Comedy, Romance",
    "Action, Adventure, Thriller",
    "Horror, Thriller, Mystery",
    "Action, Adventure, Fantasy"
]

df = pd.DataFrame({'Item': items, 
                   'Description': descriptions, 
                   'Genres': genres})
df.head()

Unnamed: 0,Item,Description,Genres
0,Final Destination: Bloodlines,"Plagued by a recurring violent nightmare, a co...","Horror, Thriller, Mystery"
1,Materialists,"A young, ambitious New York City matchmaker fi...","Comedy, Romance"
2,Mission: Impossible - The Final Reckoning,Hunt and the IMF pursue a dangerous AI called ...,"Action, Adventure, Thriller"
3,Sinners,"Trying to leave their troubled lives behind, t...","Horror, Thriller, Mystery"
4,Superman,Superman must reconcile his alien Kryptonian h...,"Action, Adventure, Fantasy"


# 2. Similarity Calculation

In [20]:
#TF-IDF vectorization of item descriptions
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix_desc = vectorizer.fit_transform(df['Description'])
tfidf_matrix_genre = vectorizer.fit_transform(df['Genres'])


print(type(tfidf_matrix_desc))

<class 'scipy.sparse._csr.csr_matrix'>


In [21]:
cosine_sim_desc = cosine_similarity(tfidf_matrix_desc, tfidf_matrix_desc)
cosine_sim_desc

array([[1.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 1.        , 0.        , 0.        , 0.04038078],
       [0.        , 0.        , 1.        , 0.        , 0.02962215],
       [0.        , 0.        , 0.        , 1.        , 0.        ],
       [0.        , 0.04038078, 0.02962215, 0.        , 1.        ]])

In [22]:
cosine_sim_genre = cosine_similarity(tfidf_matrix_genre, tfidf_matrix_genre)
cosine_sim_genre

array([[1.        , 0.        , 0.2562429 , 1.        , 0.        ],
       [0.        , 1.        , 0.        , 0.        , 0.        ],
       [0.2562429 , 0.        , 1.        , 0.2562429 , 0.64856907],
       [1.        , 0.        , 0.2562429 , 1.        , 0.        ],
       [0.        , 0.        , 0.64856907, 0.        , 1.        ]])

In [27]:
# Recommend Movies for Movie3 
item_idx = 2  # Movie3
similarities = cosine_sim_desc[item_idx]
recommendations = list(enumerate(similarities))
recommendations


[(0, np.float64(0.0)),
 (1, np.float64(0.0)),
 (2, np.float64(1.0000000000000002)),
 (3, np.float64(0.0)),
 (4, np.float64(0.029622150932809763))]

In [29]:
recommendations = sorted(recommendations, key=lambda x: x[1], reverse=True)
 

print("Recommendations for Mission: Impossible:")
for idx, score in recommendations[1:]:
    print(f"{df['Item'][idx]}: {score:.2f}")

Recommendations for Mission: Impossible:
Superman: 0.03
Final Destination: Bloodlines: 0.00
Materialists: 0.00
Sinners: 0.00


More than their summaries, its their genres which might do better

In [30]:
# Recommend Movies for Movie3
item_idx = 2  # Movie3
similarities = cosine_sim_genre[item_idx]
recommendations = list(enumerate(similarities))
recommendations


[(0, np.float64(0.25624290058154725)),
 (1, np.float64(0.0)),
 (2, np.float64(1.0)),
 (3, np.float64(0.25624290058154725)),
 (4, np.float64(0.648569069800036))]

In [31]:
recommendations = sorted(recommendations, key=lambda x: x[1], reverse=True)
 

print("Recommendations for Mission: Impossible:")
for idx, score in recommendations[1:]:
    print(f"{df['Item'][idx]}: {score:.2f}")

Recommendations for Mission: Impossible:
Superman: 0.65
Final Destination: Bloodlines: 0.26
Sinners: 0.26
Materialists: 0.00


# END