In [1]:
#import library
import string
import re
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_recommenders as tfrs

from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import coo_matrix

import warnings
warnings.filterwarnings('ignore')

In [2]:
#Preprocessing rating data
def preprocess_rating(rating):
    rating['user_id'] = rating['user_id'].astype(int)
    rating['fiction_id'] = rating['fiction_id'].astype(int)
    rating['click'] = rating['click'].astype(int)
    rating['like'] = rating['like'].astype(int)
    rating['rating'] = rating['rating'].astype(int)
    return rating

In [3]:
#Preprocessing rating data
def preprocess_fiction(fiction):
    fiction['fiction_id'] = fiction['fiction_id'].astype(int)
    fiction['overview'] = fiction['overview'].astype(str)
    fiction['language'] = fiction['language'].astype(str)
    fiction['tags'] = fiction['tags'].astype(str)
    fiction['genres'] = fiction['genres'].astype(str)
    fiction['chapter'] = fiction['chapter'].astype(str)
    return fiction

In [4]:
#menghitung count_rating, mean_rating, like, click, dan popularitas berdasarkan fiction_id
def calculate_rating(rating):
    fiction_recs = rating.groupby("fiction_id").rating.agg(['count','mean'])
    fiction_recs['click'] = rating.groupby("fiction_id").click.agg(['count'])
    fiction_recs['like'] = rating.groupby("fiction_id").like.agg(['count'])
    fiction_recs['popularity'] = fiction_recs['click'] + fiction_recs['like']
    return fiction_recs

In [5]:
def calculate_weighted_mean(fiction, fiction_recs):
    R = fiction['mean']
    v = fiction['count']
    m = fiction['count'].quantile(0.8)
    C = fiction['mean'].mean()
    fiction_recs['weighted_mean'] = (R * v + C * m) / (v + m)
    return fiction_recs['weighted_mean']

In [6]:
def preprocess_rating_pred(fiction,fiction_recs):
    mm_scaler = MinMaxScaler()
    scaled = mm_scaler.fit_transform(fiction_recs[['popularity', 'weighted_mean']])
    rating_pred = pd.DataFrame(scaled, columns=['popularity', 'weighted_mean'])
    rating_pred.index = fiction['fiction_id']
    rating_pred['score'] = rating_pred['weighted_mean'] * 0.4 + rating_pred['popularity'].astype('float64') * 0.6
    rating_pred_sorted = rating_pred.sort_values(by='score', ascending=False)
    return rating_pred_sorted

In [7]:
def preprocess_text(text):
    text = text.split(',')
    text = [re.sub('\(.*\)', '', t) for t in text]
    text = [t.translate(str.maketrans('','', string.punctuation)).lower() for t in text]
    text = [t.translate(str.maketrans('','', string.digits)) for t in text]
    return ' '.join(text)

In [8]:
def preprocess_content(content_df):
    content_df['overview'] = content_df['overview'].apply(preprocess_text)
    content_df['language'] = content_df['language'].apply(preprocess_text)
    content_df['tags'] = content_df['tags'].apply(preprocess_text)
    content_df['genres'] = content_df['genres'].apply(preprocess_text)
    content_df['atribute'] = ''
    content_df['atribute'] = content_df[content_df.columns[1:]].apply(lambda x: ' '.join(x), axis=1)
    content_df.set_index(['fiction_id','title'], inplace=True)
    content_df = content_df[['atribute']]
    return content_df

In [21]:
def predict(title, fiction, content_df, similarity_weight, top_n):
    
    data = content_df.reset_index()
    index_movie = data[data['title'] == title].index

    tfidf = TfidfVectorizer()
    tfidf_matrix = tfidf.fit_transform(content_df['atribute'])
    tfidf_matrix.shape
    cos_sim = cosine_similarity(tfidf_matrix)
    similarity = cos_sim[index_movie].T

    content_df['similarity'] = pd.DataFrame(similarity, columns=['similarity'])
    content_df['final_score'] = content_df['score']*(1-similarity_weight) + content_df['similarity']*similarity_weight
    content_df_sorted = content_df.sort_values(by='final_score', ascending=False).head(top_n)
    content_df_sorted.set_index('title', inplace=True)
    merged_df = content_df_sorted.merge(fiction, on='fiction_id', how='left')
    
    return merged_df[['title','overview','tags','genres','similarity','final_score']]

In [22]:
def cbf_recommendation_with_prediction(title, rating_dir='./try/rating.csv', fiction_dir='./try/fiction_metadata.csv'):

    #memuat data rating dan fiction_metadata
    rating = pd.read_csv(rating_dir)
    fiction_metadata = pd.read_csv(fiction_dir)

    #membuat dataframe fiction
    fiction = fiction_metadata[['fiction_id', 'title', 'overview', 'language', 'tags', 'genres', 'chapter']]

    #preprocessing data rating dan fiction_metadata
    rating = preprocess_rating(rating)
    fiction = preprocess_fiction(fiction)

    #membuat dataframe fiction_recs untuk menghitung rating count, rating mean, click, like, dan popularity
    fiction_recs = calculate_rating(rating)

    #merge dataframe fiction dengan fiction_recs
    fiction = fiction.merge(fiction_recs, how='inner', on='fiction_id')

    #menghitung weighted_mean dari algoritma hybrid
    fiction['weighted_mean'] = calculate_weighted_mean(fiction, fiction_recs)

    #membuat dataframe rating_pred untuk mengurutkan konten paling populer
    rating_pred_sorted = preprocess_rating_pred(fiction,fiction_recs)

    # Create a content_df
    content_df = fiction[['fiction_id', 'title', 'overview', 'language', 'tags', 'genres', 'chapter']]

    #melakukan preprocess pada content_df
    content_df = preprocess_content(content_df)

    #merge rating_pred dengan content_df
    content_df = rating_pred_sorted[:10000].merge(content_df, left_index=True, right_index=True, how='left')

    #rekomendasi fancition yang paling mirip dan populer
    content_df = content_df.reset_index()
    recommendation = predict(title, fiction, content_df, 0.9, 100)

    return recommendation

In [23]:
cbf_recommendation_with_prediction('Legenda Tanah Jawa')

Unnamed: 0,title,overview,tags,genres,similarity,final_score
0,Legenda Tanah Jawa,Seorang pemuda yang hidup di masa kini menemuk...,"antasi,aksi,petualangan,sejarah,legenda","sci-fi,petualangan",1.0,0.949512
1,Perjalanan ke Masa Depan,Seorang ilmuwan muda menemukan cara untuk mela...,"fiksi ilmiah,petualangan,perjalanan waktu,mist...","sci-fi,petualangan",0.238242,0.259503
2,Penyelamat Dunia,Seorang remaja yang memiliki kekuatan super ha...,"sci-fi,aksi,petualangan,alien, penyelamatan dunia","sci-fi,petualangan",0.165591,0.240946
3,Petualangan di Negeri Ajaib,Seorang atlet berjuang untuk mencapai mimpinya...,"aksi, petualangan,fantasi,penemuan","aksi,petualangan,fantasi",0.135348,0.193079
4,Raja Terakhir,"Di dunia yang dikuasai oleh iblis, seorang pem...","fantasi,aksi,petualangan,pertarungan,kekuatan","fantasi,aksi",0.1564,0.186004
5,Sang Prajurit yang Hilang,"Seorang prajurit yang pemberani, harus mencari...","prajurit,pemberani,petualangan,fantasi","petualangan,fantasi",0.115552,0.179357
6,Reinkarnasi,Seorang wanita yang meninggal dalam kecelakaan...,"reinkarnasi,keluarga,cinta,kehilangan","fantasi,drama",0.123424,0.173215
7,Kisah Hidup yang Penuh Makna,Seorang orang biasa menjalani kehidupan yang s...,"drama,kisah hidup,refleksi diri",drama,0.114507,0.167037
8,Pewaris Sang Dewa,Seorang pemuda yang terpilih sebagai pewaris s...,"fantasi,aksi,petualangan,dewa,takdir","fantasi,aksi",0.089752,0.153573
9,Ketika Cinta Bertemu Kekuatan,Seorang gadis biasa bertemu dengan seorang sup...,"romansa,aksi,pahlawan super,gadis biasa,cinta","romansa,aksi",0.041225,0.110465
