In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import nltk
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neighbors import NearestNeighbors
import pickle

final_movies_path = 'artifacts/FinalMovies.pkl'
cv_path = 'artifacts/cv.pkl'

if os.path.exists(final_movies_path) and os.path.exists(cv_path):
    with open(final_movies_path, 'rb') as f:
        FinalMovies = pickle.load(f)
    with open(cv_path, 'rb') as f:
        cv = pickle.load(f)
    vectors = cv.transform(FinalMovies['tag'])
else:
    movie = pd.read_csv('data/1M Movies Dataset TMDB_movie_dataset_v11.csv')
    moviesIMP = movie[['id', 'title', 'overview', 'genres', 'tagline', 'keywords']].dropna()
    moviesIMP.dropna(inplace=True)

    def remove_spaces(word):
        if isinstance(word, list):
            return [i.replace(" ", "") for i in word]
        return word

    moviesIMP['genres'] = moviesIMP['genres'].apply(remove_spaces)
    moviesIMP['tag'] = moviesIMP['overview'] + ' ' + moviesIMP['genres'] + ' ' + moviesIMP['keywords'] + ' ' + moviesIMP['title'] + ' ' + moviesIMP['tagline']
    FinalMovies = moviesIMP[['id', 'title', 'tag']]

    ps = PorterStemmer()
    def stems(text):
        l = []
        for i in text.split():
            l.append(ps.stem(i))
        return " ".join(l)
    FinalMovies['tag'] = FinalMovies['tag'].apply(stems)

    cv = CountVectorizer(max_features=5000)
    vectors = cv.fit_transform(FinalMovies['tag'])

    os.makedirs('artifacts', exist_ok=True)
    with open(final_movies_path, 'wb') as f:
        pickle.dump(FinalMovies, f)
    with open(cv_path, 'wb') as f:
        pickle.dump(cv, f)

nn = NearestNeighbors(metric='cosine', algorithm='brute')
nn.fit(vectors)

def recommend(movie_title, n_recommendations=5):
    idx = FinalMovies[FinalMovies['title'].str.lower() == movie_title.lower()].index
    if len(idx) == 0:
        print("Movie not found in the dataset.")
        return
    idx = idx[0]
    distances, indices = nn.kneighbors(vectors[idx], n_neighbors=n_recommendations+1)
    print(f"Recommendations for '{FinalMovies.iloc[idx]['title']}':")
    for i in range(1, n_recommendations+1):
        print(FinalMovies.iloc[indices[0][i]]['title'])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  FinalMovies['tag'] = FinalMovies['tag'].apply(stems)
