In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import joblib
from joblib import dump, load
from scipy import sparse
import math

### Configeration

In [None]:
class config:
    books_data_path="/kaggle/input/book-reccomender-training/books_data.csv"
    tfidf_feature_name_path="/kaggle/input/book-reccomender-training/feature_names.npy"
    tfidf_matrix_path="/kaggle/input/book-reccomender-training/tfidf_matrix.npz"
    tfidf_vectorizer_path="/kaggle/input/book-reccomender-training/tfidf_vectorizer.joblib"
    num_recommendations=15
    final_num_recommendations=5

## Data Import

In [None]:
books=pd.read_csv(config.books_data_path)
tfidf_matrix = sparse.load_npz(config.tfidf_matrix_path)
tfidf_vectorizer = joblib.load(config.tfidf_vectorizer_path)
tfidf_feature_names = np.load(config.tfidf_feature_name_path,allow_pickle=True)

In [None]:
tfidf_df=pd.DataFrame(tfidf_matrix.toarray(), columns=tfidf_feature_names)

## Inference

In [None]:
def vectorize_text_with_tfidf(tfidf_vectorizer,text):
    tfidf_matrix=tfidf_vectorizer.transform(text)
    return pd.DataFrame(tfidf_matrix.toarray(), columns=tfidf_feature_names)

In [None]:
def map_similarity(vectorized_user_df, tfidf_df, num_recommendations=0):
    similarities = cosine_similarity(vectorized_user_df,tfidf_df)[0]
    sorted_index=similarities.argsort()
    top_indices = sorted_index[-num_recommendations:]
    top_books = {i: similarities[i] for i in top_indices}
    return top_books

In [None]:
def Content_based_filtering(user_given_text,num_recommendations=config.num_recommendations):
    vectorized_user_df=vectorize_text_with_tfidf(tfidf_vectorizer,user_given_text)
    maped_similarity=map_similarity(vectorized_user_df, tfidf_df, num_recommendations)
    return maped_similarity


In [None]:
def collabrative_filtering(similarity_map):
    for df_index in similarity_map:
        cur_book_rating=books.loc[df_index,"avg_rating"]
        cur_book_rating_count=books.loc[df_index,"rating_count"]
        rating_reliability_factor=1-(1/(1+math.log(cur_book_rating_count,2)))
        cur_final_similarity=similarity_map[df_index]*cur_book_rating*cur_book_rating_count
        similarity_map[df_index]=cur_final_similarity

    sorted_similarity=(sorted(similarity_map.items(), key=lambda x: x[1]))[::-1]
    final_selected_ids=[ids[0] for ids in sorted_similarity]
    return final_selected_ids

In [None]:
def hybrid_filtering(user_given_text,num_recommendations=config.num_recommendations):
    similarity_map=Content_based_filtering(user_given_text,num_recommendations)
    final_book_ids=collabrative_filtering(similarity_map)
    book_names = books.loc[final_book_ids,"Book-Title"].to_list()
    image_url=books.loc[final_book_ids,"Image-URL-L"].to_list()
    return book_names,image_url