In [95]:
import pandas as pd
import numpy as np
import json
import clips

## Load & Pre-Process Dataset

In [2]:
data_df = pd.read_csv('../data/raw/amazon_popular_books_dataset.csv', usecols=['title', 'categories', 'rating', 'reviews_count'])

# Pre-process Dataset
data_df['rating'] = pd.to_numeric(data_df['rating'].str.slice(0, 3))

# Add weighted rating
m = 100  # set the minimum number of reviews to 100
C = data_df['rating'].mean()  # calculate the mean rating across all books

# define a function to calculate the weighted rating
def weighted_rating(row):
    v = row['reviews_count']
    R = row['rating']
    return (v / (v+m)) * R + (m / (v+m)) * C

# apply the function to each row of the dataframe to calculate the weighted rating
data_df['weighted_rating'] = data_df.apply(weighted_rating, axis=1)

# Remove "Books" entry from categories
data_df['categories'] = data_df['categories'].apply(lambda categories: json.loads(categories)[1:])

data_df

Unnamed: 0,rating,reviews_count,title,categories,weighted_rating
0,4.6,13451,Wuthering Heights (Collins Classics),"[Literature & Fiction, Genre Fiction]",4.600165
1,4.8,16628,THE DAYS THE CRAYONS QUIT,"[Children's Books, Literature & Fiction]",4.798938
2,4.8,11275,War Lord: Book 13 (The Last Kingdom Series),"[Literature & Fiction, Genre Fiction]",4.798438
3,4.8,15520,Code Name Bananas: The hilarious and epic new ...,"[Children's Books, Literature & Fiction]",4.798863
4,4.8,10884,Skincare: The award-winning ultimate no-nonsen...,"[Crafts, Hobbies & Home, Home Improvement & De...",4.798383
...,...,...,...,...,...
2264,4.2,17923,Unspeakable Things,"[Mystery, Thriller & Suspense, Thrillers & Sus...",4.202343
2265,4.8,25304,What to Expect When You’re Expecting,"[Health, Fitness & Dieting, Women's Health, Pr...",4.799301
2266,4.7,11040,The Home Edit: A Guide to Organizing and Reali...,"[Crafts, Hobbies & Home, Home Improvement & De...",4.699303
2267,4.4,28030,The Family Upstairs: A Novel,"[Mystery, Thriller & Suspense, Thrillers & Sus...",4.400790


## Choose 10 Random Book Titles

In [138]:
random_10_rows = data_df.sample(n=10)
random_10_titles = random_10_rows['title'].to_list()
random_10_titles

['Astrophysics for People in a Hurry (Astrophysics for People in a Hurry Series)',
 'Running Blind: Jack Reacher, Book 4',
 'The Immortal Life of Henrietta Lacks',
 'The Boy, the Mole, the Fox and the Horse',
 'The Immortal Life of Henrietta Lacks',
 'Burn After Writing (Pink)',
 'The Home Edit: A Guide to Organizing and Realizing Your House Goals (Includes Refrigerator Labels Download)',
 'Humans',
 'Diagnostic and Statistical Manual of Mental Disorders, 5th Edition: DSM-5',
 'The Silmarillion']

## Get Unique Categories

In [139]:
categories = data_df.explode('categories')['categories'].unique()

## Create CLIPS environment

In [140]:
env = clips.Environment()

# Book Template
BOOK_TEMPLATE_STRING = """
(deftemplate book
    (slot title (type STRING))
    (slot category (type STRING))
    (slot w_rating (type FLOAT)))
"""
env.build(BOOK_TEMPLATE_STRING)

# Categorized Book Template
CATEGORIZED_BOOK_TEMPLATE_STRING = """
(deftemplate categorized-book
    (slot title (type STRING))
    (slot category (type STRING)))
"""
env.build(CATEGORIZED_BOOK_TEMPLATE_STRING)

# Uncategorized Book Template
UNCATEGORIZED_BOOK_TEMPLATE_STRING = """
(deftemplate uncategorized-book
    (slot title (type STRING)))
"""
env.build(UNCATEGORIZED_BOOK_TEMPLATE_STRING)

# Recommended Book Template
RECOMMENDED_BOOK_TEMPLATE_STRING = """
(deftemplate recommended-book
    (slot title (type STRING))
    (slot w_rating (type FLOAT)))
"""
env.build(RECOMMENDED_BOOK_TEMPLATE_STRING)

# Category Counter Template
CATEGORY_COUNTER_TEMPLATE_STRING = """
(deftemplate category-counter
    (slot category (type STRING))
    (slot count (type INTEGER)))
"""
env.build(CATEGORY_COUNTER_TEMPLATE_STRING)

# Categorized Books Rule
CATEGORIZED_BOOKS_RULE_STRING = """
(defrule categorize-books
    "Categorize books that have a category."
    (declare (salience 50))
    (book (title ?title) (category ?category))
    =>
    (assert (categorized-book (title ?title) (category ?category))))
"""
env.build(CATEGORIZED_BOOKS_RULE_STRING)

# Count Category Rule
COUNT_CATEGORY_RULE_STRING = """
(defrule count-category
    "Counter for each category."
    (declare (salience 40))
    ?categorized-book-adr <- (categorized-book (title ?title) (category ?category))
    ?category-counter-adr <- (category-counter (count ?count) (category ?category-title&:(eq ?category-title ?category)))
    ?uncategorized-book-adr <- (uncategorized-book (title ?uncategorized-title&:(eq ?uncategorized-title ?title)))
    =>
    (retract ?categorized-book-adr ?category-counter-adr)
    (assert (category-counter (category ?category) (count (+ ?count 1)))))
"""
env.build(COUNT_CATEGORY_RULE_STRING)

# Highest Category Count Rule
HIGHEST_CATEGORY_COUNT_RULE_STRING = """
(defrule highest-category-count
    "Find the category with the highest count."
    (declare (salience 30))
    (category-counter (count ?n1) (category ?category))
    (not (category-counter (count ?n2&:(> ?n2 ?n1))))
    =>
    (assert (highest-category ?category)))
"""
env.build(HIGHEST_CATEGORY_COUNT_RULE_STRING)

# Rule that picks only one "highest-category"
PICK_ONE_HIGHEST_CATEGORY_RULE_STRING = """
(defrule pick-one-highest-category
    "Picks only one 'highest category', just in case there are two categories with the same count."
    (declare (salience 20))
    (highest-category ?category)
    (not (highest-category-picked))
    =>
    (assert (most-popular-category ?category))
    (assert (highest-category-picked)))
"""
env.build(PICK_ONE_HIGHEST_CATEGORY_RULE_STRING)

# Return Recommended Books Rule
RETURN_RECOMMENDED_BOOKS_RULE_STRING = """
(defrule return-recommended-books
    "Return recommended books."
    (declare (salience 10))
    (book (title ?title) (category ?category) (w_rating ?w_rating))
    (most-popular-category ?most-popular-category&:(eq ?most-popular-category ?category))
    =>
    (assert (recommended-book (title ?title) (w_rating ?w_rating))))
"""
env.build(RETURN_RECOMMENDED_BOOKS_RULE_STRING)

book_template = env.find_template('book')
uncategorized_book_template = env.find_template('uncategorized-book')
category_counter_template = env.find_template('category-counter')

# Assert each category and set its count to 0
for category in categories:
    category_counter_template.assert_fact(category=category, count=0)

# Assert each book and its respective category
for _, row in data_df.iterrows():
    for category in row['categories']:
        book_template.assert_fact(
            title=row['title'],
            category=category,
            w_rating=row['weighted_rating']
        )

for title in random_10_titles:
    uncategorized_book_template.assert_fact(title=title)

env.run()

4542

## Recommended Books

In [141]:
most_popular_category = None
recommended_books = []

for fact in env.facts():
    if fact.template.name == 'most-popular-category':
        most_popular_category = str(fact)[24:-2]
    if fact.template.name == 'recommended-book':
        fact_str = str(fact)
        book_title = fact_str[26:fact_str.index('")')]
        w_rating = float(fact_str[fact_str.index('(w_rating') + 10:-2])

        recommended_books.append({
            'book_title': book_title,
            'w_rating': w_rating
        })

# Sort by weighted rating
recommended_books = map(
    lambda book: book['book_title'],
    sorted(recommended_books, key=lambda book: book['w_rating'], reverse=True))

# # Remove books that are in the original chosen 10 book titles
recommended_books = list(set(recommended_books) - set(random_10_titles))[:10]

print('Most Popular Category: ')
print(most_popular_category)
print()

print('Top 10 Recommended Books: ')
recommended_books

Most Popular Category: 
Politics & Social Sciences

Top 10 Recommended Books: 


['American Marxism',
 'Blackout: How Black America Can Make Its Second Escape from the Democrat Plantation',
 'Live Free or Die: America (and the World) on the Brink',
 '12 Years A Slave',
 'The New Jim Crow: Mass Incarceration in the Age of Colorblindness',
 'Peril',
 'Live Free Or Die: America (and the World) on the Brink',
 'Of Mice and Men (Penguin Great Books of the 20th Century)',
 '1984',
 'Permanent Record']