In [2]:
import pandas as pd

try:
    links_df = pd.read_csv('links.csv')
    movies_df = pd.read_csv('movies.csv')
    ratings_df = pd.read_csv('ratings.csv')
    tags_df = pd.read_csv('tags.csv')

    print("Files loaded successfully:")
    print("links.csv:", links_df.head())
    print("movies.csv:", movies_df.head())
    print("ratings.csv:", ratings_df.head())
    print("tags.csv:", tags_df.head())

except FileNotFoundError as e:
    print(f"Error loading file: {e}. Make sure the files are in the correct directory.")
except Exception as e:
    print(f"An error occurred: {e}")

Files loaded successfully:
links.csv:    movieId  imdbId   tmdbId
0        1  114709    862.0
1        2  113497   8844.0
2        3  113228  15602.0
3        4  114885  31357.0
4        5  113041  11862.0
movies.csv:    movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  
ratings.csv:    userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47    

In [3]:
print("Missing values in links_df:")
print(links_df.isnull().sum())

print("\nMissing values in movies_df:")
print(movies_df.isnull().sum())

print("\nMissing values in ratings_df:")
print(ratings_df.isnull().sum())

print("\nMissing values in tags_df:")
print(tags_df.isnull().sum())

Missing values in links_df:
movieId    0
imdbId     0
tmdbId     8
dtype: int64

Missing values in movies_df:
movieId    0
title      0
genres     0
dtype: int64

Missing values in ratings_df:
userId       0
movieId      0
rating       0
timestamp    0
dtype: int64

Missing values in tags_df:
userId       0
movieId      0
tag          0
timestamp    0
dtype: int64


In [4]:
print("Data types in links_df:")
print(links_df.dtypes)

print("\nData types in movies_df:")
print(movies_df.dtypes)

print("\nData types in ratings_df:")
print(ratings_df.dtypes)

print("\nData types in tags_df:")
print(tags_df.dtypes)

Data types in links_df:
movieId      int64
imdbId       int64
tmdbId     float64
dtype: object

Data types in movies_df:
movieId     int64
title      object
genres     object
dtype: object

Data types in ratings_df:
userId         int64
movieId        int64
rating       float64
timestamp      int64
dtype: object

Data types in tags_df:
userId        int64
movieId       int64
tag          object
timestamp     int64
dtype: object


In [9]:
from sklearn.metrics.pairwise import cosine_similarity

cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [13]:
indices = pd.Series(movies_df.index, index=movies_df['title']).drop_duplicates()

def get_recommendations(title, cosine_sim=cosine_sim):
    # Check if the title exists in the index
    if title not in indices:
        print(f"Movie title '{title}' not found in the dataset. Please check the title.")
        return None

    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]
    movie_indices = [i[0] for i in sim_scores]
    return movies_df['title'].iloc[movie_indices]

In [11]:
sample_movie_title = 'Toy Story (1995)'
recommendations = get_recommendations(sample_movie_title)

print(f"Recommendations for '{sample_movie_title}':")
print(recommendations)

Recommendations for 'Toy Story (1995)':
1706                                       Antz (1998)
2355                                Toy Story 2 (1999)
2809    Adventures of Rocky and Bullwinkle, The (2000)
3000                  Emperor's New Groove, The (2000)
3568                             Monsters, Inc. (2001)
Name: title, dtype: object


In [15]:
# Get recommendations for movie that is in the dataset
sample_movie_title_3 = 'Jumanji (1995)'
recommendations_3 = get_recommendations(sample_movie_title_3)

print(f"Recommendations for '{sample_movie_title_3}':")
print(recommendations_3)

Recommendations for 'Jumanji (1995)':
53             Indian in the Cupboard, The (1995)
109             NeverEnding Story III, The (1994)
767               Escape to Witch Mountain (1975)
1514    Darby O'Gill and the Little People (1959)
1556                          Return to Oz (1985)
Name: title, dtype: object
