In [48]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

from google.colab import drive
drive.mount('/content/drive')
base_path = '/content/drive/MyDrive/Dataset_ML/Movie_Recommender/'

ratings_df = pd.read_csv(f'{base_path}ratings.csv')
movies_df = pd.read_csv(f'{base_path}movies.csv')

print("5 baris pertama DataFrame Ratings:")
print(ratings_df.head())

print("\nInformasi DataFrame Ratings:")
ratings_df.info()

print("\n5 baris pertama DataFrame Movies:")
print(movies_df.head())

print("\nInformasi DataFrame Movies:")
movies_df.info()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
5 baris pertama DataFrame Ratings:
   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931

Informasi DataFrame Ratings:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100836 entries, 0 to 100835
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   userId     100836 non-null  int64  
 1   movieId    100836 non-null  int64  
 2   rating     100836 non-null  float64
 3   timestamp  100836 non-null  int64  
dtypes: float64(1), int64(3)
memory usage: 3.1 MB

5 baris pertama DataFrame Movies:
   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Juman

In [49]:
# Menggabungkan DataFrame ratings dan movies
df = pd.merge(ratings_df, movies_df, on='movieId', how='left')

print("\n5 baris pertama DataFrame setelah digabung:")
print(df.head())
print("\nInformasi DataFrame setelah digabung:")
df.info()

# Membuat Matriks Pengguna-Item (dengan film sebagai kolom)
user_movie_matrix = df.pivot_table(index='userId', columns='title', values='rating')

print("\n5 baris pertama Matriks Pengguna-Item (Contoh kecil):")
print(user_movie_matrix.head())
print("\nBentuk Matriks Pengguna-Item:", user_movie_matrix.shape)


5 baris pertama DataFrame setelah digabung:
   userId  movieId  rating  timestamp                        title  \
0       1        1     4.0  964982703             Toy Story (1995)   
1       1        3     4.0  964981247      Grumpier Old Men (1995)   
2       1        6     4.0  964982224                  Heat (1995)   
3       1       47     5.0  964983815  Seven (a.k.a. Se7en) (1995)   
4       1       50     5.0  964982931   Usual Suspects, The (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                               Comedy|Romance  
2                        Action|Crime|Thriller  
3                             Mystery|Thriller  
4                       Crime|Mystery|Thriller  

Informasi DataFrame setelah digabung:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100836 entries, 0 to 100835
Data columns (total 6 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   user

In [50]:
# Transpose user_movie_matrix untuk menghitung kesamaan antar film
movie_user_matrix = user_movie_matrix.T

print("5 baris pertama Matriks Film-Pengguna (Transposed):")
print(movie_user_matrix.head())
print("\nBentuk Matriks Film-Pengguna:", movie_user_matrix.shape)

# Hitung kesamaan kosinus antar film
item_similarity = cosine_similarity(movie_user_matrix.fillna(0)) # Isi NaN dengan 0 untuk perhitungan kesamaan

# Ubah ke DataFrame untuk memudahkan interpretasi
item_similarity_df = pd.DataFrame(item_similarity, index=movie_user_matrix.index, columns=movie_user_matrix.index)

print("\n5x5 Matriks Kesamaan Item (Contoh kecil):")
print(item_similarity_df.iloc[:5, :5])

5 baris pertama Matriks Film-Pengguna (Transposed):
userId                                   1    2    3    4    5    6    7    \
title                                                                        
'71 (2014)                               NaN  NaN  NaN  NaN  NaN  NaN  NaN   
'Hellboy': The Seeds of Creation (2004)  NaN  NaN  NaN  NaN  NaN  NaN  NaN   
'Round Midnight (1986)                   NaN  NaN  NaN  NaN  NaN  NaN  NaN   
'Salem's Lot (2004)                      NaN  NaN  NaN  NaN  NaN  NaN  NaN   
'Til There Was You (1997)                NaN  NaN  NaN  NaN  NaN  NaN  NaN   

userId                                   8    9    10   ...  601  602  603  \
title                                                   ...                  
'71 (2014)                               NaN  NaN  NaN  ...  NaN  NaN  NaN   
'Hellboy': The Seeds of Creation (2004)  NaN  NaN  NaN  ...  NaN  NaN  NaN   
'Round Midnight (1986)                   NaN  NaN  NaN  ...  NaN  NaN  NaN   
'Salem's Lo

In [51]:
def get_similar_movies(movie_title, item_similarity_df, num_recommendations=10):
    if movie_title not in item_similarity_df.index:
        return "Film tidak ditemukan dalam database kesamaan. Coba film lain."
    similar_scores = item_similarity_df[movie_title].sort_values(ascending=False)
    similar_movies = similar_scores.drop(movie_title)
    return similar_movies.head(num_recommendations)

favorite_movie = 'Finding Nemo (2003)'

recommendations = get_similar_movies(favorite_movie, item_similarity_df, num_recommendations=10)

print(f"\nJika Anda menyukai '{favorite_movie}', mungkin Anda juga menyukai:")
print(recommendations)


Jika Anda menyukai 'Finding Nemo (2003)', mungkin Anda juga menyukai:
title
Incredibles, The (2004)                                          0.726374
Shrek (2001)                                                     0.701435
Monsters, Inc. (2001)                                            0.697340
Pirates of the Caribbean: The Curse of the Black Pearl (2003)    0.652265
Shrek 2 (2004)                                                   0.625074
Catch Me If You Can (2002)                                       0.594174
Lord of the Rings: The Return of the King, The (2003)            0.587321
Lord of the Rings: The Two Towers, The (2002)                    0.568078
Harry Potter and the Prisoner of Azkaban (2004)                  0.560267
Ocean's Eleven (2001)                                            0.556801
Name: Finding Nemo (2003), dtype: float64
