In [None]:
import sys  
sys.path.insert(1, '../')

import pandas as pd

from scripts.tfidf_model import TfiDfModel
from scripts.doc2vec_model import Doc2VecModel
from scripts.transformer_model import TransformerModel

## Load data

In [None]:
DATA_PATH = "../data/processed_data.csv"
SIM_MATRICES_DIR = "../data/sim_matrix/"

In [None]:
df = pd.read_csv(DATA_PATH, sep=";")
df["genre"] = df["genre"].apply(eval)
df = df[~df.clean_summary.isna()]
df.head()

## TF-IDF vectorizer

In [None]:
tfidf = TfiDfModel(df.clean_summary.to_list())
tfidf.calculate_similarity()
tfidf.save_sim_matrix(SIM_MATRICES_DIR)

In [None]:
idx = 320
pd.DataFrame(df.loc[idx, ["book_title", "author", "genre", "summary"]])

In [None]:
recoms = tfidf.get_recommendations(idx)
pd.DataFrame(df.loc[recoms, ["book_title", "author", "genre", "summary"]])

## Gensim

In [None]:
doc2vec = Doc2VecModel(df.summary.to_list(), 256, 0.025, 4, 20)
doc2vec.calculate_similarity()
doc2vec.save_model("../models/d2v.model")
doc2vec.save_sim_matrix(SIM_MATRICES_DIR)

In [None]:
idx = 320
pd.DataFrame(df.loc[idx, ["book_title", "author", "genre", "summary"]])

In [None]:
recoms = doc2vec.get_recommendations(idx)
pd.DataFrame(df.loc[recoms, ["book_title", "author", "genre", "summary"]])

## Sentence Transformer

In [None]:
sentence_transformer = TransformerModel(df.summary.to_list(), 'paraphrase-mpnet-base-v2')
sentence_transformer.calculate_similarity()
sentence_transformer.save_model("../models/sentence_transformer")
sentence_transformer.save_sim_matrix(SIM_MATRICES_DIR)

In [None]:
idx = 321
pd.DataFrame(df.loc[idx, ["book_title", "author", "genre", "summary"]])

In [None]:
recoms = sentence_transformer.get_recommendations([idx, idx+1])
pd.DataFrame(df.loc[recoms, ["book_title", "author", "genre", "summary"]])