Table Of Contents

>[Import Libraries](#scrollTo=xEjZ1jJTj2tn)

>[Create a Custom Dataset](#scrollTo=-vy-GCG9h_8_)

>[Baseline: Prefiltering & Collaborative Filtering](#scrollTo=_rQSQ3TUkb0C)

>[Suggested Methode (Markov Model)](#scrollTo=rgFZyhxKuDJ1)

>[Comparison between two approaches](#scrollTo=HXOBCX8fuhBx)



# Import Libraries

In [8]:
import pandas as pd
import numpy as np
import random
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix
from collections import defaultdict
import time

# Create a Custom Dataset

In [7]:

random.seed(42)
np.random.seed(42)

num_users = 500
num_items = 300
num_ratings = 5000

user_ids = np.random.randint(1, num_users + 1, num_ratings)
item_ids = np.random.randint(1, num_items + 1, num_ratings)
ratings = np.random.randint(1, 6, num_ratings)

# اطلاعات زمینه‌ای (مثلاً زمان تماشا و نوع دستگاه)
time_of_day = np.random.choice(["صبح", "عصر", "شب"], num_ratings)
device_type = np.random.choice(["موبایل", "لپ‌تاپ", "تلویزیون", "کامپیوتر", "شارژر"], num_ratings)

synthetic_data = pd.DataFrame({
    "user_id": user_ids,
    "item_id": item_ids,
    "rating": ratings,
    "time_of_day": time_of_day,
    "device_type": device_type
})

synthetic_data

Unnamed: 0,user_id,item_id,rating,time_of_day,device_type
0,103,215,4,شب,تلویزیون
1,436,120,5,صبح,موبایل
2,349,55,2,صبح,لپ‌تاپ
3,271,27,1,شب,موبایل
4,107,118,2,صبح,لپ‌تاپ
...,...,...,...,...,...
4995,111,166,2,شب,شارژر
4996,415,139,2,صبح,تلویزیون
4997,290,117,5,شب,کامپیوتر
4998,295,299,1,صبح,تلویزیون


# Baseline: Prefiltering & Collaborative Filtering

In [10]:
# انتخاب یک زمینه خاص برای فیلتر کردن (مثلاً "عصر" و "لپ‌تاپ")
filtered_data = synthetic_data[(synthetic_data["time_of_day"] == "عصر") &
                               (synthetic_data["device_type"] == "لپ‌تاپ")]

# ایجاد ماتریس کاربر-آیتم برای فیلترسازی مشارکتی
user_item_matrix = filtered_data.pivot(index="user_id", columns="item_id", values="rating").fillna(0)
sparse_matrix = csr_matrix(user_item_matrix)

user_similarity = cosine_similarity(sparse_matrix)

user_sim_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

print("Users Similarity")
user_sim_df.head(10)

Users Similarity


user_id,3,4,5,7,8,10,12,19,20,21,...,478,480,481,486,487,490,491,493,498,499
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
21,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [27]:
def find_similarity(target):
    if target in user_sim_df.index:
        similar_users = user_sim_df[target].sort_values(ascending=False)
        print(f"5 Users most similar to user {target}:")
        return similar_users.head(5)
    else:
        return f"User {target} does not exist in the similarity matrix."

In [28]:
find_similarity(120)

5 Users most similar to user 120:


Unnamed: 0_level_0,120
user_id,Unnamed: 1_level_1
120,1.0
420,0.948683
61,0.316228
328,0.0
314,0.0


In [29]:
find_similarity(15)

'User 15 does not exist in the similarity matrix.'

In [30]:
find_similarity(486)

5 Users most similar to user 486:


Unnamed: 0_level_0,486
user_id,Unnamed: 1_level_1
486,1.0
3,0.0
328,0.0
313,0.0
314,0.0


# Suggested Methode (Markov Model)

In [31]:
user_sequences = synthetic_data.groupby("user_id")["item_id"].apply(list)

transition_matrix = defaultdict(lambda: defaultdict(int))

for sequence in user_sequences:
    for i in range(len(sequence) - 1):
        transition_matrix[sequence[i]][sequence[i + 1]] += 1

for movie, transitions in transition_matrix.items():
    total = sum(transitions.values())
    for next_movie in transitions:
        transition_matrix[movie][next_movie] /= total

def predict_next_movie(current_movie):
    if current_movie in transition_matrix:
        return max(transition_matrix[current_movie], key=transition_matrix[current_movie].get)
    return None

test_movie = 5
predicted_movie = predict_next_movie(test_movie)
print(f"After watvhing movie {test_movie}, the model suggest to watch movie {predicted_movie}")

After watvhing movie 5, the model suggest to watch movie 197


# Comparison between two approaches

In [35]:
def evaluate_models():
    start_time = time.time()
    baseline_results = user_sim_df.mean().mean()
    baseline_time = time.time() - start_time

    start_time = time.time()
    markov_results = np.mean([len(seq) for seq in user_sequences])
    markov_time = time.time() - start_time

    print(f"Approach 1: Prefiltered & Collaboration Filtering(Mean Similarity) {baseline_results:.3f}, time= {baseline_time:.4f} seconds")
    print(f"Approach 2: Morkov Model(Mean Sequence) {markov_results:.3f}, time= {markov_time:.4f} seconds")

evaluate_models()


Approach 1: Prefiltered & Collaboration Filtering(Mean Similarity) 0.008, time= 0.0022 seconds
Approach 2: Morkov Model(Mean Sequence) 10.000, time= 0.0003 seconds
