In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/hybrid-filtering-system/ratings.csv
/kaggle/input/hybrid-filtering-system/RatingsCount.csv
/kaggle/input/hybrid-filtering-system/FinalData.csv
/kaggle/input/hybrid-filtering-system/AverageRatings.csv


# Introduction to Recommendation Systems 

A recommendation system is a type of information filtering system that provides personalized recommendations to users based on their preferences and behavior. These systems are commonly used in e-commerce, social media, music and video streaming platforms, and other applications where a large amount of content is available, and it can be challenging for users to find what they're looking for.

Recommendation systems work by analyzing data on user behavior, such as search queries, browsing history, and purchase history, as well as data on the items available, such as product descriptions, user ratings, and reviews. Based on this data, the system can generate personalized recommendations for each user, suggesting items that they may be interested in.

There are two main types of recommendation systems: content-based and collaborative filtering. Content-based systems recommend items based on the characteristics of the items themselves, while collaborative filtering systems recommend items based on the behavior of other users who have similar preferences.

There are also hybrid recommendation systems that combine both content-based and collaborative filtering approaches to generate more accurate and diverse recommendations. Other types of recommendation systems include knowledge-based systems, which make recommendations based on explicit user preferences, and context-aware systems, which take into account the user's location, time of day, and other contextual factors.

Overall, recommendation systems are valuable tools for businesses and users alike, enabling more personalized and efficient interactions with a wide range of content and products.

# Importing Libraries and Data

In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from surprise import Dataset, Reader, SVD
from surprise.model_selection import cross_validate
from sklearn.feature_extraction.text import CountVectorizer


In [3]:
# Load data from CSV files
final_data = pd.read_csv('/kaggle/input/hybrid-filtering-system/FinalData.csv')
ratings = pd.read_csv('/kaggle/input/hybrid-filtering-system/ratings.csv')
rating_count = pd.read_csv('/kaggle/input/hybrid-filtering-system/RatingsCount.csv')
avg_rating = pd.read_csv('/kaggle/input/hybrid-filtering-system/AverageRatings.csv')

# Data Preparation

In [4]:
books = pd.merge(final_data, avg_rating, on="book_id")
books = pd.merge(books, rating_count, on="book_id")
books["total_ratings"] = books["rating_y"] * books["rating_x"]
ratings = pd.merge(ratings, books[["book_id", "title"]], on="book_id")
books

Unnamed: 0,book_id,authors,title,Genres,rating_x,rating_y,total_ratings
0,1,Suzanne Collins,"The Hunger Games (The Hunger Games, #1)",SciFi;Drama,4.279707,22806,97603.0
1,2,"J.K. Rowling, Mary GrandPré",Harry Potter and the Sorcerer's Stone (Harry P...,Fantasy;Young-Age,4.351350,21850,95077.0
2,3,Stephenie Meyer,"Twilight (Twilight, #1)",Fantasy,3.214341,16931,54422.0
3,4,Harper Lee,To Kill a Mockingbird,Self-Help;Drama,4.329369,19088,82639.0
4,5,F. Scott Fitzgerald,The Great Gatsby,Drama,3.772224,16604,62634.0
...,...,...,...,...,...,...,...
994,995,Michelle Hodkin,"The Unbecoming of Mara Dyer (Mara Dyer, #1)",Fiction,3.961576,1015,4021.0
995,996,Janet Evanovich,"Three to Get Deadly (Stephanie Plum, #3)",Crime,4.007496,1334,5346.0
996,997,Emmuska Orczy,The Scarlet Pimpernel,History;Fiction,3.971863,1315,5223.0
997,998,"Jon Stone, Michael J. Smollin",The Monster at the End of this Book,Fiction;Kids,4.452181,1307,5819.0


In [5]:
# Preprocess the dataset
books = books.fillna("")


# Collaborative Filtering System

Collaborative filtering is a type of recommendation system that predicts a user's interests or preferences based on the behavior and preferences of similar users. Collaborative filtering works by identifying users who have similar patterns of interaction with the system and recommending items that those similar users have expressed a preference for.

Collaborative filtering can be based on explicit or implicit feedback. Explicit feedback is when users rate or review items, while implicit feedback is when the system infers user preferences based on their behavior, such as their browsing history or purchase history.

There are two main types of collaborative filtering: user-based and item-based. User-based collaborative filtering recommends items to a user based on the preferences of other users who are similar to them. Item-based collaborative filtering, on the other hand, recommends items to a user based on the similarity of the items they have interacted with in the past.

Collaborative filtering systems have some advantages over other types of recommendation systems. For example, they can make recommendations for new or unpopular items that do not yet have many ratings or reviews. However, collaborative filtering systems can also suffer from the "cold start" problem, where it is difficult to make accurate recommendations for new users or items that have not yet been rated or reviewed.

Overall, collaborative filtering is a popular and effective approach to recommendation systems that can generate accurate and personalized recommendations for users based on the behavior and preferences of similar users.

In [6]:
# Define the collaborative filtering recommender system
user_ratings = pd.pivot_table(ratings, index="user_id", columns="book_id", values="rating")

def collaborative_filtering_recommender(user_id):
    user_books = user_ratings.loc[user_id].dropna().index
    book_similarities = pd.DataFrame()
    for book_id in user_books:
        similar_books = user_ratings.corrwith(user_ratings[book_id])
        book_similarities[book_id] = similar_books
    recommendations = book_similarities.mean(axis=1).sort_values(ascending=False).head(10)
    return books.loc[recommendations.index]

In [7]:
# Test the collaborative filtering recommender system
print(collaborative_filtering_recommender(1))

  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)


         book_id                                     authors  \
book_id                                                        
532          533                                  Harper Lee   
800          801                            Jonathan Tropper   
878          879  Upton Sinclair, Earl Lee, Kathleen DeGrave   
720          721                                 Allie Brosh   
363          364                                   Dr. Seuss   
990          991                              Rainbow Rowell   
762          763                               Toni Morrison   
868          869             Muriel Barbery, Alison Anderson   
346          347                                  C.S. Lewis   
954          955                             Sophie Kinsella   

                                                     title  \
book_id                                                      
532                                      Go Set a Watchman   
800                              This is Wher

# Content Based Filtering System

Content-based filtering is a type of recommendation system that suggests items to a user based on the characteristics or features of the items themselves. The system analyzes the attributes of items that the user has interacted with in the past, such as their text, images, audio, or video content, and generates recommendations for similar items.

Content-based filtering works by building a user profile based on the features of the items that the user has interacted with in the past. The system then identifies other items with similar features and recommends those to the user. For example, if a user has interacted with several science fiction movies in the past, a content-based filtering system might recommend other science fiction movies based on the genre, actors, and plot elements of the movies they have already watched.

Content-based filtering systems can be particularly useful for recommending items that are less popular or less well-known, as they are not dependent on the behavior of other users to generate recommendations. Additionally, content-based filtering systems can be effective for users with unique or niche preferences, as they can recommend items based on specific features that the user has expressed a preference for.

In [8]:
from sklearn.metrics.pairwise import linear_kernel

# Define the content-based recommender system
tfidf = TfidfVectorizer(stop_words="english")
book_features = tfidf.fit_transform(books["authors"] + " " + books["title"])
book_sim = linear_kernel(book_features, book_features)

def content_recommender(title):
    book_idx = books[books["title"]==title].index[0]
    sim_scores = list(enumerate(book_sim[book_idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    book_indices = [i[0] for i in sim_scores]
    return books.iloc[book_indices]

# Test the content-based recommender system
print(content_recommender("The Hobbit"))

     book_id                                            authors  \
963      964                                     J.R.R. Tolkien   
465      466  Chuck Dixon, J.R.R. Tolkien, David Wenzel, Sea...   
610      611   J.R.R. Tolkien, Christopher Tolkien, Ted Nasmith   
154      155                                     J.R.R. Tolkien   
160      161                                     J.R.R. Tolkien   
18        19                                     J.R.R. Tolkien   
188      189                                     J.R.R. Tolkien   
0          1                                    Suzanne Collins   
1          2                        J.K. Rowling, Mary GrandPré   
2          3                                    Stephenie Meyer   

                                                 title  \
963  J.R.R. Tolkien 4-Book Boxed Set: The Hobbit an...   
465                          The Hobbit: Graphic Novel   
610           The Silmarillion (Middle-Earth Universe)   
154         The Two Towers (Th

# Hybrid recommender system

A hybrid recommender system is a type of recommendation system that combines multiple approaches to generate more accurate and diverse recommendations for users. Hybrid systems can combine different types of recommendation techniques, such as content-based filtering, collaborative filtering, knowledge-based recommendation, and more, to provide more personalized and relevant recommendations for users.

There are two main types of hybrid recommender systems: model-based and feature-based. Model-based hybrid systems combine multiple recommendation models, such as collaborative filtering and content-based filtering, into a single model that generates recommendations. Feature-based hybrid systems, on the other hand, use multiple features of items, such as genre, director, and actor, to generate recommendations.

In [9]:
# Define the hybrid recommender system
def hybrid_recommender(user_id):
    user_books = user_ratings.loc[user_id].dropna().index
    book_scores = pd.DataFrame(book_sim).loc[user_books].sum()
    top_books_content = book_scores.sort_values(ascending=False).head(10).index
    top_books_cf = collaborative_filtering_recommender(user_id).index
    top_books = list(set(top_books_content).union(set(top_books_cf)))
    return books.loc[top_books]

# Test the hybrid recommender system
print(hybrid_recommender(1))

  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)


     book_id                                     authors  \
842      843                                  E.L. James   
720      721                                 Allie Brosh   
532      533                                  Harper Lee   
22        23                 J.K. Rowling, Mary GrandPré   
150      151                                Rick Riordan   
346      347                                  C.S. Lewis   
990      991                              Rainbow Rowell   
95        96                                  E.L. James   
800      801                            Jonathan Tropper   
33        34                                  E.L. James   
98        99                                  E.L. James   
762      763                               Toni Morrison   
868      869             Muriel Barbery, Alison Anderson   
421      422                                J.K. Rowling   
40        41                                Rick Riordan   
363      364                            