In [13]:
import pandas as pd
df = pd.read_csv("/content/netflix_content.csv")
df.head(100)


Unnamed: 0,Title,Available Globally?,Release Date,Hours Viewed,Language Indicator,Content Type
0,The Night Agent: Season 1,Yes,2023-03-23,812100000,English,Show
1,Ginny & Georgia: Season 2,Yes,2023-01-05,665100000,English,Show
2,The Glory: Season 1 // 더 글로리: 시즌 1,Yes,2022-12-30,622800000,Korean,Show
3,Wednesday: Season 1,Yes,2022-11-23,507700000,English,Show
4,Queen Charlotte: A Bridgerton Story,Yes,2023-05-04,503000000,English,Movie
...,...,...,...,...,...,...
95,Wrong Side of the Tracks: Season 1 // Entrevía...,No,2022-05-20,97600000,Non-English,Show
96,Minions: The Rise of Gru,No,,96400000,English,Movie
97,Demon Slayer: Kimetsu no Yaiba: Tanjiro Kamado...,No,,95800000,Japanese,Movie
98,The Blacklist: Season 9,No,,95700000,English,Show


In [2]:
df['Hours Viewed'] = df['Hours Viewed'].str.replace(',','', regex=False).astype('int64')
df.dropna(subset=['Title'], inplace=True)
df.drop_duplicates(subset=["Title"], inplace=True)
df['Content_ID'] = df.reset_index().index.astype('int32')
df['Language_ID'] = df['Language Indicator'].astype('category').cat.codes
df['ContentType_ID'] = df['Content Type'].astype('category').cat.codes
df[['Content_ID', 'Title', 'Hours Viewed', 'Language_ID','ContentType_ID']].head()


Unnamed: 0,Content_ID,Title,Hours Viewed,Language_ID,ContentType_ID
0,0,The Night Agent: Season 1,812100000,0,1
1,1,Ginny & Georgia: Season 2,665100000,0,1
2,2,The Glory: Season 1 // 더 글로리: 시즌 1,622800000,3,1
3,3,Wednesday: Season 1,507700000,0,1
4,4,Queen Charlotte: A Bridgerton Story,503000000,0,0


In [8]:
#from pandas.core.arrays.sparse import dtype
import tensorflow as tf
from tensorflow.keras import layers, Model
num_contents = df['Content_ID'].nunique()
num_languages = df['Language_ID'].nunique()
num_type = df['ContentType_ID'].nunique()

content_input = layers.Input(shape=(1,), dtype=tf.int32, name='content_id')
language_input = layers.Input(shape=(1,), dtype=tf.int32, name="language_id")
type_input = layers.Input(shape=(1,), dtype=tf.int32, name="content_type")

content_embedding = layers.Embedding(input_dim=num_contents+1, output_dim=32)(content_input)
language_embedding = layers.Embedding(input_dim=num_languages+1, output_dim=8)(language_input)
type_embedding = layers.Embedding(input_dim=num_type, output_dim=4)(type_input)

content_vec = layers.Flatten()(content_embedding)
language_vec = layers.Flatten()(language_embedding)
type_vec = layers.Flatten()(type_embedding)

combined = layers.Concatenate()([content_vec, language_vec,type_vec])
x = layers.Dense(64, activation='relu')(combined)
x = layers.Dense(32,activation='relu')(x)
output = layers.Dense(num_contents, activation='softmax')(x)

model = Model(
    inputs=[content_input, language_input, type_input],
    outputs=output
)

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)


In [11]:
model.fit(
    x={
        'content_id': df['Content_ID'].values.reshape(-1, 1),
        'language_id': df['Language_ID'].values.reshape(-1, 1),
        'content_type': df['ContentType_ID'].values.reshape(-1, 1)
    },
    y=df['Content_ID'].values,
    epochs=5,
    batch_size=64
)


Epoch 1/5
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 39ms/step - accuracy: 0.0000e+00 - loss: 9.8788
Epoch 2/5
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 40ms/step - accuracy: 0.0000e+00 - loss: 9.8648
Epoch 3/5
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 39ms/step - accuracy: 0.0013 - loss: 9.5538
Epoch 4/5
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 39ms/step - accuracy: 0.0120 - loss: 8.0160
Epoch 5/5
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 36ms/step - accuracy: 0.1193 - loss: 5.8317


<keras.src.callbacks.history.History at 0x791585f18ef0>

In [12]:
import numpy as np

def recommend_similar(content_title, top_k=5):
    content_row = df[df['Title'].str.contains(content_title, case=False, na=False)].iloc[0]
    content_id = content_row['Content_ID']
    language_id = content_row['Language_ID']
    content_type_id = content_row['ContentType_ID']

    predictions = model.predict({
        'content_id': np.array([content_id]),
        'language_id': np.array([language_id]),
        'content_type': np.array([content_type_id])
    })

    top_indices = predictions[0].argsort()[-top_k-1:][::-1]
    recommendations = df[df['Content_ID'].isin(top_indices)]
    return recommendations[['Title', 'Language Indicator', 'Content Type', 'Hours Viewed']]

recommend_similar("Wednesday")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 133ms/step


Unnamed: 0,Title,Language Indicator,Content Type,Hours Viewed
3,Wednesday: Season 1,English,Show,507700000
6896,Extraordinary You: Season 1 // 어쩌다 발견한 하루: 시즌 1,Korean,Show,1400000
7062,"Wait, My Youth: Season 1 // 等等啊我的青春: 第1季",Non-English,Show,1400000
18755,First Wives Club: Season 2,English,Show,18600000
20891,Una vida Bárbara: Season 1,Non-English,Show,2400000
22950,In Another World With My Smartphone: Season 2 ...,Japanese,Show,900000
