In [1]:
"""
Movie recommendation system based on genres.
"""

'\nMovie recommendation system based on genres.\n'

In [2]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [4]:
# Load movie dataset (Nomainīts, jo dati autoram glabājās lokāli)
movies = pd.read_csv("C:\Bakalaurs_praktiskais\Bakalaura-darbs\movies.csv")

In [5]:
# Data preprocessing
movies['genres'] = movies['genres'].apply(lambda x: x.lower().replace('|', ' '))

In [6]:
# Create TF-IDF vectorizer and fit the movie dataset
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(movies['genres'])

In [7]:
#Calculate similarity matrix
cosine_sim_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [8]:
def find_movie_index(movie_title, year):
    if year != None:
        movie_title = f"{movie_title} ({year})"
        if movie_title not in movies['title'].values:
            return None
        return movies[movies['title'] == movie_title].index[0]
    else:
        if not any(movies['title'].str.contains(movie_title)):
            return None
        return movies[movies['title'].str.contains(movie_title)].index[0]

In [9]:
def recommend_movies(movie_title, year=None, n_recommendations=5):
    movie_index = find_movie_index(movie_title, year)
    if movie_index is None:
        movie_not_found_message = f"Movie  '{movie_title}' not found in dataset."
        print(movie_not_found_message)
        return None

    sim_scores = list(enumerate(cosine_sim_matrix[movie_index]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:n_recommendations + 1]

    recommended_movie_indices = [i[0] for i in sim_scores]
    return movies.iloc[recommended_movie_indices]

In [10]:
if __name__ == "__main__":
    year = 1995
    movie_title = "Toy Story"
    n_recommendations = 10
    print(f"Similar recommendations for movie '{movie_title}':")

    recommendations = recommend_movies(movie_title, year, n_recommendations)
    if recommendations is not None:
        print(recommendations)

Similar recommendations for movie 'Toy Story':
      movieId                                              title  \
1706     2294                                        Antz (1998)   
2355     3114                                 Toy Story 2 (1999)   
2809     3754     Adventures of Rocky and Bullwinkle, The (2000)   
3000     4016                   Emperor's New Groove, The (2000)   
3568     4886                              Monsters, Inc. (2001)   
6194    45074                                   Wild, The (2006)   
6486    53121                             Shrek the Third (2007)   
6948    65577                     Tale of Despereaux, The (2008)   
7760    91355  Asterix and the Vikings (Astérix et les Viking...   
8219   103755                                       Turbo (2013)   

                                           genres  
1706  adventure animation children comedy fantasy  
2355  adventure animation children comedy fantasy  
2809  adventure animation children comedy fantas