In [None]:
# Import libraries

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans


# Import necessary libraries (if not already imported)
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans

In [None]:
df = pd.read_csv('netflix_titles.csv')
# Replace NaN with an empty string
df['description'] = df['description'].fillna('')

In [None]:
# Create a TfidfVectorizer and Remove stopwords
tfidf = TfidfVectorizer(stop_words='english')
# Fit and transform the data to a tfidf matrix
tfidf_matrix = tfidf.fit_transform(df['description'])
# Print the shape of the tfidf_matrix
tfidf_matrix.shape

(6234, 16151)

In [None]:
# Compute the cosine similarity between each movie description
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [None]:
indices = pd.Series(df.index, index=df['title']).drop_duplicates()

In [None]:
def get_recommendations(title, cosine_sim=cosine_sim, num_recommend = 10):
    idx = indices[title]
# Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))
# Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
# Get the scores of the 10 most similar movies
    top_similar = sim_scores[1:num_recommend+1]
# Get the movie indices
    movie_indices = [i[0] for i in top_similar]
# Return the top 10 most similar movies
    return df['title'].iloc[movie_indices]

In [None]:
get_recommendations('Power Rangers Zeo', num_recommend = 20)

3723                                    Power Rangers RPM
3725                                Power Rangers Samurai
4637              The Adventures of Sharkboy and Lavagirl
3715                           Power Rangers Dino Thunder
3717                            Power Rangers Jungle Fury
5222                                            Possessed
4039                                      The Witch Files
3716                               Power Rangers in Space
6030                         Mighty Morphin Power Rangers
3733         Power Rangers Super Samurai: Trickster Treat
3732      Power Rangers Super Samurai: Stuck on Christmas
3065                          NOVA: The Impossible Flight
5648                                       Peaky Blinders
5852                                               The OA
3722                    Power Rangers Operation Overdrive
5935                               SWORDGAI The Animation
3728    Power Rangers Samurai: Party Monsters (Hallowe...
3729          

In [None]:
def knn_recommendations(title, num_recommend=10):
    idx = indices[title]

    # Fit a KNN model to find the nearest neighbors
    knn = NearestNeighbors(n_neighbors=num_recommend + 1, metric='cosine')
    knn.fit(tfidf_matrix)

    distances, indices = knn.kneighbors(tfidf_matrix[idx], n_neighbors=num_recommend + 1)

    # Get the indices of the most similar movies
    movie_indices = indices.flatten()[1:]

    # Return the top 10 most similar movies
    return df['title'].iloc[movie_indices]

In [None]:
def kmeans_recommendations(title, num_recommend=10):
    idx = indices['title']

    # Fit a K-Means model to cluster similar movie descriptions
    num_clusters = 10  # You can adjust the number of clusters as needed
    kmeans = KMeans(n_clusters=num_clusters, random_state=0)
    kmeans.fit(tfidf_matrix)

    cluster_label = kmeans.labels_[idx]
    similar_movies_indices = [i for i, label in enumerate(kmeans.labels_) if label == cluster_label]

    # Remove the input movie from the recommendations
    similar_movies_indices.remove(idx)

    # Return the top 10 most similar movies
    return df['title'].iloc[similar_movies_indices[:num_recommend]]

In [None]:
print("TF-IDF and Cosine Similarity Recommendations:")
print(get_recommendations('Power Rangers Zeo', num_recommend=20))

TF-IDF and Cosine Similarity Recommendations:
3723                                    Power Rangers RPM
3725                                Power Rangers Samurai
4637              The Adventures of Sharkboy and Lavagirl
3715                           Power Rangers Dino Thunder
3717                            Power Rangers Jungle Fury
5222                                            Possessed
4039                                      The Witch Files
3716                               Power Rangers in Space
6030                         Mighty Morphin Power Rangers
3733         Power Rangers Super Samurai: Trickster Treat
3732      Power Rangers Super Samurai: Stuck on Christmas
3065                          NOVA: The Impossible Flight
5648                                       Peaky Blinders
5852                                               The OA
3722                    Power Rangers Operation Overdrive
5935                               SWORDGAI The Animation
3728    Power Rangers Samu

In [None]:
print("KNN Recommendations:")
print(knn_recommendations('Power Rangers Zeo', num_recommend=20))


KNN Recommendations:


UnboundLocalError: ignored

In [None]:
print("K-Means Recommendations:")
print(kmeans_recommendations('Power Rangers Zeo', num_recommend=20))

K-Means Recommendations:




24                                  The Runner
61                                   Frequency
91                               Hold the Dark
127                             China Salesman
149                                      Hilda
167                                   The Bund
190                   When the Camellia Blooms
271                              Locked on You
310                         My Sister's Keeper
314                                Open Season
342                               Murder Party
387    Offline - Das leben ist kein bonuslevel
388                                  Resurface
400                          A Noble Intention
407                                   Full Out
410                                 Lucky Days
414                            The Fierce Wife
415                              The Womanizer
460                                 Compulsion
472                      Zumbo's Just Desserts
Name: title, dtype: object


In [None]:
# Load the dataset
df = pd.read_csv('netflix_titles.csv')
df['description'] = df['description'].fillna('')

# Create a TfidfVectorizer and remove stopwords
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['description'])

# Fit a K-Means model to cluster similar movie descriptions
num_clusters = 5  # You can adjust the number of clusters as needed
kmeans = KMeans(n_clusters=num_clusters, random_state=0)
kmeans.fit(tfidf_matrix)

# Add the cluster labels to the DataFrame
df['cluster'] = kmeans.labels_



In [None]:
# Function to get recommendations from the same cluster
def kmeans_recommendations(title, num_recommend=10):
    idx = indices[title]
    cluster_label = df['cluster'].iloc[idx]

    # Get movies from the same cluster, excluding the input movie
    similar_movies = df[df['cluster'] == cluster_label]
    similar_movies = similar_movies[similar_movies.index != idx]

    # Sort by cluster similarity
    similar_movies['similarity'] = kmeans.transform(tfidf_matrix)[idx]
    similar_movies = similar_movies.sort_values(by='similarity')

    # Return the top 10 most similar movies
    return similar_movies.head(num_recommend)

In [None]:
print("K-Means Recommendations:")
print(kmeans_recommendations('Power Rangers Zeo', num_recommend=10))

K-Means Recommendations:


ValueError: ignored

In [None]:
# Import libraries

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans


# Import necessary libraries (if not already imported)
#import pandas as pd
#from sklearn.feature_extraction.text import TfidfVectorizer
#from sklearn.cluster import KMeans

df = pd.read_csv('netflix_titles.csv')
# Replace NaN with an empty string
df['description'] = df['description'].fillna('')


# Create a TfidfVectorizer and Remove stopwords
tfidf = TfidfVectorizer(stop_words='english')
# Fit and transform the data to a tfidf matrix
tfidf_matrix = tfidf.fit_transform(df['description'])
# Print the shape of the tfidf_matrix
tfidf_matrix.shape


# Compute the cosine similarity between each movie description
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)


indices = pd.Series(df.index, index=df['title']).drop_duplicates()


def get_recommendations(title, cosine_sim=cosine_sim, num_recommend = 10):
    idx = indices[title]
# Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))
# Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
# Get the scores of the 10 most similar movies
    top_similar = sim_scores[1:num_recommend+1]
# Get the movie indices
    movie_indices = [i[0] for i in top_similar]
# Return the top 10 most similar movies
    return df['title'].iloc[movie_indices]


get_recommendations('Power Rangers Zeo', num_recommend = 20)


def knn_recommendations(title, num_recommend=10):
    idx = indices['title']

    # Fit a KNN model to find the nearest neighbors
    knn = NearestNeighbors(n_neighbors=num_recommend + 1, metric='cosine')
    knn.fit(tfidf_matrix)

    distances, indices = knn.kneighbors(tfidf_matrix[idx], n_neighbors=num_recommend + 1)

    # Get the indices of the most similar movies
    movie_indices = indices.flatten()[1:]

    # Return the top 10 most similar movies
    return df['title'].iloc[movie_indices]

def kmeans_recommendations(title, num_recommend=10):
    idx = indices[title]

    # Fit a K-Means model to cluster similar movie descriptions
    num_clusters = 10  # You can adjust the number of clusters as needed
    kmeans = KMeans(n_clusters=num_clusters, random_state=0)
    kmeans.fit(tfidf_matrix)

    cluster_label = kmeans.labels_[idx]
    similar_movies_indices = [i for i, label in enumerate(kmeans.labels_) if label == cluster_label]

    # Remove the input movie from the recommendations
    similar_movies_indices.remove(idx)

    # Return the top 10 most similar movies
    return df['title'].iloc[similar_movies_indices[:num_recommend]]


print("TF-IDF and Cosine Similarity Recommendations:")
print(get_recommendations('Power Rangers Zeo', num_recommend=20))

print("KNN Recommendations:")
print(knn_recommendations('Power Rangers Zeo', num_recommend=20))

print("K-Means Recommendations:")
print(kmeans_recommendations('Power Rangers Zeo', num_recommend=20))






# Load the dataset
df = pd.read_csv('netflix_titles.csv')
df['description'] = df['description'].fillna('')

# Create a TfidfVectorizer and remove stopwords
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['description'])

# Fit a K-Means model to cluster similar movie descriptions
num_clusters = 5  # You can adjust the number of clusters as needed
kmeans = KMeans(n_clusters=num_clusters, random_state=0)
kmeans.fit(tfidf_matrix)

# Add the cluster labels to the DataFrame
df['cluster'] = kmeans.labels_

# Function to get recommendations from the same cluster
def kmeans_recommendations(title, num_recommend=10):
    idx = indices[title]
    cluster_label = df['cluster'].iloc[idx]

    # Get movies from the same cluster, excluding the input movie
    similar_movies = df[df['cluster'] == cluster_label]
    similar_movies = similar_movies[similar_movies.index != idx]

    # Sort by cluster similarity
    similar_movies['similarity'] = kmeans.transform(tfidf_matrix)[idx]
    similar_movies = similar_movies.sort_values(by='similarity')

    # Return the top 10 most similar movies
    return similar_movies.head(num_recommend)

print("K-Means Recommendations:")
print(kmeans_recommendations('Power Rangers Zeo', num_recommend=10))

TF-IDF and Cosine Similarity Recommendations:
3723                                    Power Rangers RPM
3725                                Power Rangers Samurai
4637              The Adventures of Sharkboy and Lavagirl
3715                           Power Rangers Dino Thunder
3717                            Power Rangers Jungle Fury
5222                                            Possessed
4039                                      The Witch Files
3716                               Power Rangers in Space
6030                         Mighty Morphin Power Rangers
3733         Power Rangers Super Samurai: Trickster Treat
3732      Power Rangers Super Samurai: Stuck on Christmas
3065                          NOVA: The Impossible Flight
5648                                       Peaky Blinders
5852                                               The OA
3722                    Power Rangers Operation Overdrive
5935                               SWORDGAI The Animation
3728    Power Rangers Samu

UnboundLocalError: ignored