In [94]:
import pandas as pd
import numpy as np


In [95]:
# Load the uploaded dataset

file_path = './TMDB_tv_dataset_v3.csv'

dataset = pd.read_csv(file_path)

dataset.head()

Unnamed: 0,id,name,number_of_seasons,number_of_episodes,original_language,vote_count,vote_average,overview,adult,backdrop_path,...,tagline,genres,created_by,languages,networks,origin_country,spoken_languages,production_companies,production_countries,episode_run_time
0,1399,Game of Thrones,8,73,en,21857,8.442,Seven noble families fight for control of the ...,False,/2OMB0ynKlyIenMJWI2Dy9IWT4c.jpg,...,Winter Is Coming,"Sci-Fi & Fantasy, Drama, Action & Adventure","David Benioff, D.B. Weiss",en,HBO,US,English,"Revolution Sun Studios, Television 360, Genera...","United Kingdom, United States of America",0
1,71446,Money Heist,3,41,es,17836,8.257,"To carry out the biggest heist in history, a m...",False,/gFZriCkpJYsApPZEF3jhxL4yLzG.jpg,...,The perfect robbery.,"Crime, Drama",Álex Pina,es,"Netflix, Antena 3",ES,Español,Vancouver Media,Spain,70
2,66732,Stranger Things,4,34,en,16161,8.624,"When a young boy vanishes, a small town uncove...",False,/2MaumbgBlW1NoPo3ZJO38A6v7OS.jpg,...,Every ending has a beginning.,"Drama, Sci-Fi & Fantasy, Mystery","Matt Duffer, Ross Duffer",en,Netflix,US,English,"21 Laps Entertainment, Monkey Massacre Product...",United States of America,0
3,1402,The Walking Dead,11,177,en,15432,8.121,Sheriff's deputy Rick Grimes awakens from a co...,False,/x4salpjB11umlUOltfNvSSrjSXm.jpg,...,Fight the dead. Fear the living.,"Action & Adventure, Drama, Sci-Fi & Fantasy",Frank Darabont,en,AMC,US,English,"AMC Studios, Circle of Confusion, Valhalla Mot...",United States of America,42
4,63174,Lucifer,6,93,en,13870,8.486,"Bored and unhappy as the Lord of Hell, Lucifer...",False,/aDBRtunw49UF4XmqfyNuD9nlYIu.jpg,...,It's good to be bad.,"Crime, Sci-Fi & Fantasy",Tom Kapinos,en,"FOX, Netflix",US,English,"Warner Bros. Television, DC Entertainment, Jer...",United States of America,45


In [96]:


filtered_dataset = dataset[['id', 'name', 'vote_average', 'popularity']]



# Display the first few rows of the filtered dataset

filtered_dataset.head()

Unnamed: 0,id,name,vote_average,popularity
0,1399,Game of Thrones,8.442,1083.917
1,71446,Money Heist,8.257,96.354
2,66732,Stranger Things,8.624,185.711
3,1402,The Walking Dead,8.121,489.746
4,63174,Lucifer,8.486,416.668


In [97]:
# Simulate user interactions

np.random.seed(42)  # For reproducibility

num_users = 100



simulated_votes = np.random.choice(

    range(11),

    size=(num_users, len(filtered_dataset)),

    p=[0.7] + [0.03] * 10  # 70% chance of no interaction, rest distributed

)


user_item_matrix = pd.DataFrame(

    simulated_votes, 

    columns=filtered_dataset['name']

)



user_item_matrix.head()

name,Game of Thrones,Money Heist,Stranger Things,The Walking Dead,Lucifer,Riverdale,Squid Game,Breaking Bad,The Good Doctor,WandaVision,...,Bear Video Theater,Jhanak,Art Adore En,Born to Be Y,Jokah & Tutty,母乳酱想要喷出来,Barbie Dream Squad,Picasso,女子大生危険なアルバイト,Welcome to My World
0,0,9,2,0,0,0,0,6,0,1,...,5,0,7,0,0,0,7,0,0,0
1,0,0,0,8,0,0,0,0,0,0,...,0,4,0,0,0,5,0,0,0,0
2,0,0,4,0,0,5,6,7,9,0,...,0,0,6,0,8,2,1,0,0,0
3,5,0,0,0,0,10,0,0,0,0,...,7,0,0,0,0,9,0,5,0,0
4,0,0,0,9,0,0,0,0,0,3,...,0,0,0,8,4,4,0,6,9,0


In [98]:


cleaned_user_item_matrix = user_item_matrix.loc[:, (user_item_matrix != 0).any(axis=0)]





cleaned_user_item_matrix.shape, cleaned_user_item_matrix.head()

((100, 168639),
 name  Game of Thrones  Money Heist  Stranger Things  The Walking Dead  \
 0                   0            9                2                 0   
 1                   0            0                0                 8   
 2                   0            0                4                 0   
 3                   5            0                0                 0   
 4                   0            0                0                 9   
 
 name  Lucifer  Riverdale  Squid Game  Breaking Bad  The Good Doctor  \
 0           0          0           0             6                0   
 1           0          0           0             0                0   
 2           0          5           6             7                9   
 3           0         10           0             0                0   
 4           0          0           0             0                0   
 
 name  WandaVision  ...  Bear Video Theater  Jhanak  Art Adore En  \
 0               1  ...            

In [None]:
from tqdm import tqdm

def matrix_factorization(R, num_factors, num_iterations, alpha, beta):

    num_users, num_items = R.shape

    P = np.random.rand(num_users, num_factors)

    Q = np.random.rand(num_items, num_factors)

    

    for iteration in tqdm(range(num_iterations), desc="Matrix Factorization Progress"):
       

        for i in range(num_users):

            for j in range(num_items):

                if R[i, j] > 0:

                    eij = R[i, j] - np.dot(P[i, :], Q[j, :].T)

                    for k in range(num_factors):

                        P[i, k] += alpha * (2 * eij * Q[j, k] - beta * P[i, k])

  

        


        for j in range(num_items):

            for i in range(num_users):

                if R[i, j] > 0:

                    eij = R[i, j] - np.dot(P[i, :], Q[j, :].T)

                    for k in range(num_factors):

                        Q[j, k] += alpha * (2 * eij * P[i, k] - beta * Q[j, k])

    

    return P, Q




R = cleaned_user_item_matrix.to_numpy()



num_factors = 10

num_iterations = 10

alpha = 0.01

beta = 0.01

P, Q = matrix_factorization(R, num_factors, num_iterations, alpha, beta)

R_approx = np.dot(P, Q.T)





print(R_approx[:5, :5])

Matrix Factorization Progress:  40%|████      | 4/10 [05:17<07:57, 79.61s/it]