In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the movie dataset, specifying the data type for the "overview" column
movies_df = pd.read_csv("movies_metadata.csv", dtype={"overview": str}, low_memory=False)

# Create a TfidfVectorizer object with stop words
tfidf_vectorizer = TfidfVectorizer(stop_words='english')

# Preprocess the movie descriptions
movies_df["overview"] = movies_df["overview"].fillna("")  # Handling missing values

# Fit the TF-IDF vectorizer with the movie overviews
tfidf_matrix = tfidf_vectorizer.fit_transform(movies_df["overview"])

def get_recommendations(movie_title):
    # Search for the movie using the "original_title" column
    movie_indices = movies_df[movies_df["original_title"] == movie_title].index

    if len(movie_indices) == 0:
        print("Movie not found.")
        return []

    movie_index = movie_indices[0]

    # Preprocess the movie title
    processed_title = tfidf_vectorizer.transform([movie_title])

    # Get the cosine similarities for the movie title
    cosine_similarities = cosine_similarity(processed_title, tfidf_matrix).flatten()

    # Get the indices of top similar movies
    top_similar_movies = np.argsort(cosine_similarities)[::-1]

    # Get the top 10 movie recommendations (excluding the movie itself)
    recommendations = []
    for movie in top_similar_movies:
        if movie != movie_index:  # Skip the current movie index
            recommendations.append(movies_df.loc[movie, "original_title"])
            if len(recommendations) == 10:  # Limit the number of recommendations to 10
                break

    return recommendations

def measure_recommendation(recommendations, ground_truth):
    # Convert the ground truth to a set for efficient membership checking
    ground_truth_set = set(ground_truth)

    # Calculate the number of correctly recommended movies
    correct_recommendations = len(set(recommendations) & ground_truth_set)

    # Calculate the accuracy and precision
    accuracy = correct_recommendations / len(ground_truth)
    precision = correct_recommendations / len(recommendations)

    return accuracy, precision

# Example usage:
# Ground truth movie list
ground_truth = ["The Martian", "Gravity", "Inception", "Sunshine", "Moon"]

# Get the movie recommendations for "Interstellar"
recommendations = get_recommendations("Interstellar")

# Print the movie recommendations
print("Recommendations:")
for movie in recommendations:
    print(movie)

# Measure accuracy and precision
accuracy, precision = measure_recommendation(recommendations, ground_truth)




Recommendations:
Suburban Commando
ほしのこえ
Stargate
Starship Troopers
Star Force: Fugitive Alien II
Arrowhead
Otroki vo Vselennoy
Frank Herbert's Children of Dune
Cosmos
Tonite Let's All Make Love in London


In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the movie dataset, specifying the data type for the "overview" column

movies_df = pd.read_csv("movies_metadata.csv", dtype={"overview": str}, low_memory=False)


# Create a TfidfVectorizer object with stop words

tfidf_vectorizer = TfidfVectorizer(stop_words='english')


# Preprocess the movie descriptions

movies_df["overview"] = movies_df["overview"].fillna("")  # Handling missing values

# Fit the TF-IDF vectorizer with the movie overviews
tfidf_matrix = tfidf_vectorizer.fit_transform(movies_df["overview"])

def get_recommendations(movie_title):
    # Search for the movie using the "original_title" column
    movie_indices = movies_df[movies_df["original_title"] == movie_title].index

    if len(movie_indices) == 0:
        print("Movie not found.")
        return []

    movie_index = movie_indices[0]

    # Preprocess the movie title
    processed_title = tfidf_vectorizer.transform([movie_title])

    # Get the cosine similarities for the movie title
    cosine_similarities = cosine_similarity(processed_title, tfidf_matrix).flatten()

    # Get the indices of top similar movies
    top_similar_movies = np.argsort(cosine_similarities)[::-1]

    # Get the top 10 movie recommendations (excluding the movie itself)
    recommendations = []
    for movie in top_similar_movies:
        if movie != movie_index:  # Skip the current movie index
            recommendations.append(movies_df.loc[movie, "original_title"])
            if len(recommendations) == 10:  # Limit the number of recommendations to 10
                break

    return recommendations

# Example usage:
# Get the movie recommendations for "Lucy"
recommendations = get_recommendations("Lucy")

# Print the movie recommendations
for movie in recommendations:
    print(movie)


I'm with Lucy
Immediate Family
Two Weeks Notice
Housekeeping
The Ghost and Mrs. Muir
Maybe Baby
If Lucy Fell
Stealing Beauty
Born to Dance
50 First Dates


In [9]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the movie dataset, specifying the data type for the "overview" column
movies_df = pd.read_csv("movies_metadata.csv", dtype={"overview": str}, low_memory=False)

# Create a TfidfVectorizer object with stop words
tfidf_vectorizer = TfidfVectorizer(stop_words='english')

# Preprocess the movie descriptions
movies_df["overview"] = movies_df["overview"].fillna("")  # Handling missing values

# Fit the TF-IDF vectorizer with the movie overviews
tfidf_matrix = tfidf_vectorizer.fit_transform(movies_df["overview"])

def get_recommendations(movie_title):
    # Search for the movie using the "original_title" column
    movie_indices = movies_df[movies_df["original_title"] == movie_title].index

    if len(movie_indices) == 0:
        print("Movie not found.")
        return []

    movie_index = movie_indices[0]

    # Preprocess the movie title
    processed_title = tfidf_vectorizer.transform([movie_title])

    # Get the cosine similarities for the movie title
    cosine_similarities = cosine_similarity(processed_title, tfidf_matrix).flatten()

    # Get the indices of top similar movies
    top_similar_movies = np.argsort(cosine_similarities)[::-1]

    # Get the top 10 movie recommendations (excluding the movie itself)
    recommendations = []
    for movie in top_similar_movies:
        if movie != movie_index:  # Skip the current movie index
            recommendations.append(movies_df.loc[movie, "original_title"])
            if len(recommendations) == 10:  # Limit the number of recommendations to 10
                break

    return recommendations

def measure_recommendation(recommendations, ground_truth):
    # Convert the ground truth to a set for efficient membership checking
    ground_truth_set = set(ground_truth)

    # Calculate the number of correctly recommended movies
    correct_recommendations = len(set(recommendations) & ground_truth_set)

    # Calculate the accuracy and precision
    accuracy = correct_recommendations / len(ground_truth)
    precision = correct_recommendations / len(recommendations)

    return accuracy, precision

# Example usage:
# Ground truth movie list
# ground_truth = ["The Martian", "Gravity", "Inception", "Sunshine", "Moon"]

# Get the movie recommendations for "Toy Story"
recommendations = get_recommendations("The Notebook")

# Print the movie recommendations
print("Recommendations:")
for movie in recommendations:
    print(movie)

# Measure accuracy and precision
# accuracy, precision = measure_recommendation(recommendations, ground_truth)




Recommendations:
Don't Hug Me, I'm Scared
Harriet the Spy
خانه‌ی دوست کجاست؟
安娜玛德莲娜
Hourou-ki
デスノート
E Agora? Lembra-me
The 39 Steps
Telefon
The Ties That Bind
