In [24]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, Model
import numpy as np

In [2]:
df = pd.read_csv('netflix_content.csv')
df

Unnamed: 0,Title,Available Globally?,Release Date,Hours Viewed,Language Indicator,Content Type
0,The Night Agent: Season 1,Yes,2023-03-23,812100000,English,Show
1,Ginny & Georgia: Season 2,Yes,2023-01-05,665100000,English,Show
2,The Glory: Season 1 // 더 글로리: 시즌 1,Yes,2022-12-30,622800000,Korean,Show
3,Wednesday: Season 1,Yes,2022-11-23,507700000,English,Show
4,Queen Charlotte: A Bridgerton Story,Yes,2023-05-04,503000000,English,Movie
...,...,...,...,...,...,...
24807,We Are Black and British: Season 1,No,,100000,English,Show
24808,Whitney Cummings: Can I Touch It?,Yes,2019-07-30,100000,English,Movie
24809,Whitney Cummings: Jokes,No,2022-07-26,100000,English,Movie
24810,"Whose Vote Counts, Explained: Limited Series",Yes,2020-09-28,100000,English,Movie


In [3]:
df.isnull().sum()

Title                      0
Available Globally?        0
Release Date           16646
Hours Viewed               0
Language Indicator         0
Content Type               0
dtype: int64

In [4]:
df.duplicated().sum()

467

In [5]:
df.drop_duplicates()

Unnamed: 0,Title,Available Globally?,Release Date,Hours Viewed,Language Indicator,Content Type
0,The Night Agent: Season 1,Yes,2023-03-23,812100000,English,Show
1,Ginny & Georgia: Season 2,Yes,2023-01-05,665100000,English,Show
2,The Glory: Season 1 // 더 글로리: 시즌 1,Yes,2022-12-30,622800000,Korean,Show
3,Wednesday: Season 1,Yes,2022-11-23,507700000,English,Show
4,Queen Charlotte: A Bridgerton Story,Yes,2023-05-04,503000000,English,Movie
...,...,...,...,...,...,...
24798,Transformers: Cyberverse: Season 4,No,,100000,English,Show
24799,Travel Man: 48 Hours in...: Season 9,No,,100000,English,Show
24800,Two Weeks to a Stronger Core: Volume 1,Yes,,100000,English,Movie
24804,Vir Das: For India,Yes,2020-01-26,100000,English,Movie


In [6]:
df.dtypes


Title                  object
Available Globally?    object
Release Date           object
Hours Viewed           object
Language Indicator     object
Content Type           object
dtype: object

In [7]:
df['Hours Viewed'] = df['Hours Viewed'].str.replace(',','',regex = False).astype('int')

In [8]:
df['Content_ID'] = df.reset_index().index.astype('int32')

In [9]:
df['Language_Id'] = df['Language Indicator'].astype('category').cat.codes

In [10]:
df['Content_Type_Id'] = df['Content Type'].astype('category').cat.codes

In [11]:
df[['Content_ID', 'Title', 'Hours Viewed', 'Language_Id', 'Content_Type_Id']].head()

Unnamed: 0,Content_ID,Title,Hours Viewed,Language_Id,Content_Type_Id
0,0,The Night Agent: Season 1,812100000,0,1
1,1,Ginny & Georgia: Season 2,665100000,0,1
2,2,The Glory: Season 1 // 더 글로리: 시즌 1,622800000,3,1
3,3,Wednesday: Season 1,507700000,0,1
4,4,Queen Charlotte: A Bridgerton Story,503000000,0,0


In [13]:
num_contents = df['Content_ID'].nunique()
num_languages = df['Language_Id'].nunique()
num_types = df['Content_Type_Id'].nunique()

In [15]:
content_input = layers.Input(shape=(1,),dtype = tf.int32,name = 'content_id')
language_input = layers.Input(shape=(1,),dtype = tf.int32,name = 'language_id')
contenttype_input = layers.Input(shape=(1,),dtype = tf.int32,name = 'content_type')


In [16]:
content_embedding = layers.Embedding(input_dim = num_contents+1,output_dim = 32)(content_input)
language_embedding = layers.Embedding(input_dim = num_languages+1,output_dim = 8)(language_input)
contenttype_embedding = layers.Embedding(input_dim = num_types+1,output_dim = 4)(contenttype_input)

In [17]:
content_flat = layers.Flatten()(content_embedding)
language_flat = layers.Flatten()(language_embedding)
contenttype_flat = layers.Flatten()(contenttype_embedding)

In [19]:
combine = layers.Concatenate()([content_flat,language_flat,contenttype_flat])
X = layers.Dense(64,activation = 'relu')(combine)
X = layers.Dense(32,activation = 'relu')(X)
output = layers.Dense(num_contents,activation = 'softmax')(X)

In [21]:
model = Model(inputs = [content_input,language_input,contenttype_input],outputs = output)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [23]:
model.fit(
    x={
        'content_id': df['Content_ID'],
        'language_id': df['Language_Id'],
        'content_type': df['Content_Type_Id']
    },
    y=df['Content_ID'],
    epochs=5,
    batch_size=64
)

Epoch 1/5
[1m388/388[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 21ms/step - accuracy: 0.0000e+00 - loss: 10.1425
Epoch 2/5
[1m388/388[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.0000e+00 - loss: 10.1267
Epoch 3/5
[1m388/388[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - accuracy: 7.3178e-04 - loss: 9.8674
Epoch 4/5
[1m388/388[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 21ms/step - accuracy: 0.0115 - loss: 8.3849
Epoch 5/5
[1m388/388[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 22ms/step - accuracy: 0.1104 - loss: 6.2169


<keras.src.callbacks.history.History at 0x24397ef3c20>

In [33]:
def recomended(content_title, top_k=5):
    content_row = df[df['Title'].str.contains(content_title,case = False,na = False)].iloc[0]
    content_id = content_row['Content_ID']
    language_id = content_row['Language_Id']
    contenttype_id = content_row['Content_Type_Id']

    predictions = model.predict({
       'content_id' : np.array([content_id]),
       'language_id' : np.array([language_id]),
       'content_type' : np.array([contenttype_id])
    })

    top_indices = predictions[0].argsort()[-top_k-1:][::-1]
    recommdataion = df[df['Content_ID'].isin(top_indices)]
    return recommdataion[['Title','Language Indicator','Content Type','Hours Viewed']]
recomended('Stranger Things')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step


Unnamed: 0,Title,Language Indicator,Content Type,Hours Viewed
2497,Secret Obsession,English,Movie,8000000
7594,Christmas Under Wraps,English,Movie,1100000
10986,Mucho Mucho Amor: The Legend of Walter Mercado,English,Movie,400000
12646,Aurora,English,Movie,200000
15525,Genius (2018),English,Movie,100000
22360,Peep Show: Series 9,English,Movie,800000
