In [4]:
import pandas as pd
data = pd.read_csv('netflix_content.csv')
data.head()

Unnamed: 0,Title,Available Globally?,Release Date,Hours Viewed,Language Indicator,Content Type
0,The Night Agent: Season 1,Yes,2023-03-23,812100000,English,Show
1,Ginny & Georgia: Season 2,Yes,2023-01-05,665100000,English,Show
2,The Glory: Season 1 // 더 글로리: 시즌 1,Yes,2022-12-30,622800000,Korean,Show
3,Wednesday: Season 1,Yes,2022-11-23,507700000,English,Show
4,Queen Charlotte: A Bridgerton Story,Yes,2023-05-04,503000000,English,Movie


In [5]:
# drop rows with missing titles or with duplicate titles
data.dropna(subset = ['Title'], inplace = True)
data.drop_duplicates(subset = ['Title'], inplace = True)

# Create simple content ID for TensorFlow embeddings
data['Content_ID'] = data.reset_index().index.astype('int32')

# encode language indicator and content type
data['Language_ID'] = data['Language Indicator'].astype('category').cat.codes
data['ContentType_ID'] = data['Content Type'].astype('category').cat.codes
data[['Content_ID', 'Title', 'Hours Viewed', 'Language_ID', 'ContentType_ID']].head()

Unnamed: 0,Content_ID,Title,Hours Viewed,Language_ID,ContentType_ID
0,0,The Night Agent: Season 1,812100000,0,1
1,1,Ginny & Georgia: Season 2,665100000,0,1
2,2,The Glory: Season 1 // 더 글로리: 시즌 1,622800000,3,1
3,3,Wednesday: Season 1,507700000,0,1
4,4,Queen Charlotte: A Bridgerton Story,503000000,0,0


In [None]:

import torch 
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDatasetDataset, DataLoader

# Count unique values 
num_language = data['Language_ID'].nunique()
num_content = data['Content_ID'].nunique()
num_type = data['ContentType_ID'].unique()

# Pytorch model
class RecommenderModel(nn.Module):
    def __init__(self, num_language, num_content, num_type):
        super(RecommenderModel, self).__init__()  

        # Embedding layers
        self.language_embedding = nn.Embedding(num_language + 1, 32)
        self.content_embedding = nn.Embedding(num_content + 1, 4)
        self.type_embedding = nn.Embedding(num_type +1, 4)

        # Dense layers
        input_dim = 32 + 4 + 4
        self.fc1 = nn.LayerNorm(input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, num_content)

    def forward(self, language_id, content_id, type_id):
        content_vec = self.content_embedding(content_id).squeze(1)
        language_vec = self.language_embedding(language_id).squeze(1)
        type_vec = self.type_embedding(type_id).squeze(1)

        # Concatenate embeddings
        combined = torch.cat([content_vec, language_vec, type_vec], dim =1 )

        # Dense layers
        x = torch.relu(self.fc1(combined))
        x = torch.relu(self.fc2(X))
        X = self.output(x)
        return x





 
 



In [8]:
import numpy as np

def recommend_similar(content_title, top_k = 5):
    content_row = data[ data['Title'].str.contains(content_title, case = False, na = False)].iloc[0]
    content_id = content_row['Content_ID']
    language_id = content_row['Language_ID']
    content_type_id = content_row['ContentType_ID']

    predictions = model.predict({
        'content_id' : np.array([content_id]),
        'language_id' : np.array([language_id]),
        'type_id' : np.array([content_type_id])
    })

    top_indices = predictions[0].argsort()[-top_k-1:][::-1]
    reccommended_titles = data[data['Content_ID'].isin(top_indices)]['Title'].values
    recommendations = data[data['Title'].isin(reccommended_titles)]
    return recommendations[['Title', 'Language Indicator', 'Content Type', 'Hours Viewed']]
recommend_similar("Wednesday")

KeyError: 'Content_ID'