In [7]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split

In [8]:
data = pd.read_csv('movie_descriptions.csv')
descriptions = data['description'].values

train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

In [9]:
# TF-IDF
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix_train = tfidf_vectorizer.fit_transform(train_data['description'])

# LDA
lda = LatentDirichletAllocation(n_components=10, random_state=42)
lda_matrix_train = lda.fit_transform(tfidf_matrix_train)

# Генерація профілів фільмів для тренувального набору
tfidf_movie_profiles_train = tfidf_matrix_train
lda_movie_profiles_train = lda_matrix_train


In [13]:

def get_recommendations(user_profile, movie_profiles, top_n=5):
    cosine_similarities = cosine_similarity(
        user_profile.reshape(1, -1), movie_profiles).flatten()
    recommended_movie_ids = cosine_similarities.argsort()[-top_n:][::-1]
    return recommended_movie_ids


def evaluate_model(test_data, model_profiles, train_data, model_name):
    hits = 0
    total = 0
    for idx, row in test_data.iterrows():
        # Передбачається, що користувач оцінює цей фільм високо
        user_ratings = {row['title']: 5}
        rated_movie_ids = []
        for movie in user_ratings.keys():
            matched_movies = train_data[train_data['title'] == movie].index
            if not matched_movies.empty:
                rated_movie_ids.append(matched_movies[0])

        if rated_movie_ids:
            # weight = [
            #     user_ratings[movie] for movie in user_ratings.keys() 
            #     if movie in train_data['title'].values]
            weight = list(user_ratings.values())
            user_profile = np.average(
                model_profiles[rated_movie_ids], axis=0, weights=weight)
            recommendations = get_recommendations(user_profile, model_profiles)
            recommended_movies = train_data.iloc[recommendations]['title'].values
            if row['title'] in recommended_movies:
                hits += 1
            total += 1

    precision = hits / total if total > 0 else 0
    print(f"{model_name} Precision: {precision:.2f}")

In [14]:
evaluate_model(test_data, tfidf_movie_profiles_train, train_data, "TF-IDF")
evaluate_model(test_data, lda_movie_profiles_train, train_data, "LDA")

IndexError: index (26) out of range