In [1]:
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds
from sklearn.metrics.pairwise import cosine_similarity


# خواندن فایل movies.csv
movies_df = pd.read_csv('movies.csv')

# خواندن فایل ratings.csv
ratings_df = pd.read_csv('ratings.csv')

# خواندن فایل tags.csv
tags_df = pd.read_csv('tags.csv')


In [2]:
# ساخت ماتریس امتیازها
ratings_matrix = ratings_df.pivot_table(index='userId', columns='movieId', values='rating')

# پر کردن مقادیر NaN با صفر
ratings_matrix = ratings_matrix.fillna(0)

ratings_np_array = ratings_matrix.values

# نمایش ماتریس امتیازها به صورت آرایه NumPy
print(ratings_np_array.shape)


(610, 9724)


In [3]:
# محاسبه ضرب ماتریس در ترانسپوز آن
result_matrix = np.dot(ratings_np_array.T, ratings_np_array)

# نمایش ماتریس حاصل
print(result_matrix)

[[3454.5   896.5   430.75 ...    0.      0.      0.  ]
 [ 896.5  1380.25  259.   ...    0.      0.      0.  ]
 [ 430.75  259.    609.25 ...    0.      0.      0.  ]
 ...
 [   0.      0.      0.   ...   12.25   12.25    0.  ]
 [   0.      0.      0.   ...   12.25   12.25    0.  ]
 [   0.      0.      0.   ...    0.      0.     16.  ]]


In [4]:
# محاسبه مقادیر ویژه و بردارهای ویژه

eigenvalues, eigenvectors = np.linalg.eigh(result_matrix)

In [5]:
# دریافت شاخص‌های مرتب‌شده به ترتیب نزولی
sorted_indices = np.argsort(eigenvalues)[::-1]

sorted_eigenvalues = eigenvalues[sorted_indices]
sorted_eigenvectors = (eigenvectors[:, sorted_indices])

singular_values = np.sqrt(np.abs(sorted_eigenvalues))
print(sorted_eigenvectors)

[[ 7.04498985e-02 -2.75911949e-02  7.84438842e-02 ...  0.00000000e+00
   0.00000000e+00 -2.75785212e-07]
 [ 3.85393459e-02 -2.06662722e-03  5.68447103e-02 ... -8.34580198e-03
  -1.44332074e-02 -3.09076203e-01]
 [ 1.59129220e-02 -2.47146155e-02  1.80051145e-02 ...  6.22818538e-02
   2.21418802e-01 -2.52877455e-02]
 ...
 [ 6.46836073e-05  5.97586244e-04 -8.71093879e-05 ...  1.38121251e-03
   1.20972432e-04  6.28515561e-04]
 [ 6.46836073e-05  5.97586244e-04 -8.71093879e-05 ...  1.09474181e-03
   1.42195778e-04  5.55782806e-04]
 [ 2.71729303e-04  1.27236200e-03  1.22833344e-04 ...  7.81965491e-04
   4.91736110e-03 -5.69314224e-03]]


In [8]:
# ابعاد ماتریس ratings_np_array
num_users, num_movies = ratings_np_array.shape

# تعیین ابعاد مناسب برای sigma_matrix
num_singular_values = min(num_users, num_movies)

# ایجاد ماتریس سیگما با ابعاد مناسب
sigma_matrix = np.zeros((num_users, num_movies))
sigma_matrix[:num_singular_values, :num_singular_values] = np.diag(singular_values[:num_singular_values])

# نمایش ماتریس سیگما
print("Sigma Matrix:")
print(sigma_matrix)

Sigma Matrix:
[[534.41989777   0.           0.         ...   0.           0.
    0.        ]
 [  0.         231.23661142   0.         ...   0.           0.
    0.        ]
 [  0.           0.         191.1508762  ...   0.           0.
    0.        ]
 ...
 [  0.           0.           0.         ...   0.           0.
    0.        ]
 [  0.           0.           0.         ...   0.           0.
    0.        ]
 [  0.           0.           0.         ...   0.           0.
    0.        ]]


In [9]:
# محاسبه ترانسپوز بردارهای ویژه مرتب شده
transposed_sorted_eigenvectors = sorted_eigenvectors.T

In [10]:
print(transposed_sorted_eigenvectors)

[[ 7.04498985e-02  3.85393459e-02  1.59129220e-02 ...  6.46836073e-05
   6.46836073e-05  2.71729303e-04]
 [-2.75911949e-02 -2.06662722e-03 -2.47146155e-02 ...  5.97586244e-04
   5.97586244e-04  1.27236200e-03]
 [ 7.84438842e-02  5.68447103e-02  1.80051145e-02 ... -8.71093879e-05
  -8.71093879e-05  1.22833344e-04]
 ...
 [ 0.00000000e+00 -8.34580198e-03  6.22818538e-02 ...  1.38121251e-03
   1.09474181e-03  7.81965491e-04]
 [ 0.00000000e+00 -1.44332074e-02  2.21418802e-01 ...  1.20972432e-04
   1.42195778e-04  4.91736110e-03]
 [-2.75785212e-07 -3.09076203e-01 -2.52877455e-02 ...  6.28515561e-04
   5.55782806e-04 -5.69314224e-03]]


In [11]:
U = np.zeros((num_users, num_users))
for i in range(min(ratings_np_array.shape)):
    U[:, i] = np.dot(ratings_np_array, sorted_eigenvectors[:, i])  / singular_values[i]

In [12]:
A = np.dot(U, sigma_matrix)
S = np.dot(A, transposed_sorted_eigenvectors)

In [13]:
S[S < 0.50] = 0.0
print (S)

[[4.  0.  4.  ... 0.  0.  0. ]
 [0.  0.  0.  ... 0.  0.  0. ]
 [0.  0.  0.  ... 0.  0.  0. ]
 ...
 [2.5 2.  2.  ... 0.  0.  0. ]
 [3.  0.  0.  ... 0.  0.  0. ]
 [5.  0.  0.  ... 0.  0.  0. ]]


In [16]:
ratings_matrix

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,0.0,0.0,0.0,0.0,0.0,2.5,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,2.5,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [106]:
def find_most_similar_user(user_id):
    # اصلاح اندیس
    user_id = user_id - 1
    
    # محاسبه شباهت cosine بین بردار نمرات کاربران
    user_ratings = ratings_matrix.iloc[user_id]
    similarities = np.dot(ratings_matrix, user_ratings) / (np.linalg.norm(ratings_matrix, axis=1) * np.linalg.norm(user_ratings))
    
    # حذف شباهت به خود کاربر
    similarities[user_id] = 0
    
    # یافتن شناسه فرد بهترین مشابه
    most_similar_user_id = np.argmax(similarities)
    
    return most_similar_user_id

def recommend_movies_for_user(user_id, num_recommendations=5):
    # یافتن فرد بهترین مشابه
    most_similar_user_id = find_most_similar_user(user_id)
    
    # دریافت فیلم‌های امتیاز داده شده توسط کاربر بهترین مشابه
    similar_user_rated_movies = ratings_matrix.iloc[most_similar_user_id][ratings_matrix.iloc[most_similar_user_id] > 0].index
    
    # حذف فیلم‌های دیده شده توسط کاربر
    seen_movies = ratings_matrix.iloc[user_id - 1][ratings_matrix.iloc[user_id - 1] > 0].index
    unseen_movies = [movie_id for movie_id in similar_user_rated_movies if movie_id not in seen_movies]
    
    # دریافت شناسه‌های فیلم‌های پیشنهادی
    recommended_movie_ids = unseen_movies[:num_recommendations]
    
    return recommended_movie_ids

# گرفتن ورودی از کاربر برای نشان‌دادن پیشنهادها
user_input = input("Enter user ID (an integer): ")
user_id = int(user_input)

# بررسی اینکه user_id معتبر است یا خیر
if user_id < 0 or user_id >= U.shape[0]:
    print("Invalid user ID. Please enter a valid user ID.")
else:
    # توصیه به کاربر با شناسه وارد شده
    recommendations = recommend_movies_for_user(user_id)
    
    # چاپ لیست فیلم‌های توصیه شده با نام فیلم‌ها
    print(f"Recommended movies for user {user_id}:")
    for i, movie_id in enumerate(recommendations, 1):
        movie_title = movies_df[movies_df['movieId'] == movie_id]['title'].values[0]
        print(f"{i}. {movie_title}")

Enter user ID (an integer):  1


Recommended movies for user 1:
1. Casino (1995)
2. Sense and Sensibility (1995)
3. Get Shorty (1995)
4. Powder (1995)
5. Twelve Monkeys (a.k.a. 12 Monkeys) (1995)
