In [4]:
# Import all necessary packages

import pandas as pd 
from mlxtend.frequent_patterns import apriori, association_rules
from sklearn.pipeline import Pipeline

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [5]:
# Load datset

subset = pd.read_csv('subset.csv')

In [6]:
subset.head()

Unnamed: 0,item,user,rating,timestamp
0,1713353,A1REUF3A1YCPHM,5.0,1112140800
1,1713353,A1YRBRK2XM5D5,5.0,1081036800
2,1713353,A1V8ZR5P78P4ZU,5.0,1077321600
3,1713353,A2ZB06582NXCIV,5.0,1475452800
4,1713353,ACPQVNRD3Z09X,5.0,1469750400


In [12]:
book_id = subset['item'].unique()

In [10]:
import requests

def get_all_book_info_by_isbn(isbn):
    api_key = 'AIzaSyDvliA-0qJMm-fDsCc9gaIxaJwoJdbOR4k'  # Replace this with your actual Google API key
    url = f'https://www.googleapis.com/books/v1/volumes?q=isbn:{isbn}&key={api_key}'
    
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if data['totalItems'] > 0:
            book_info = data['items'][0]['volumeInfo']
            sale_info = data['items'][0].get('saleInfo', {})
            access_info = data['items'][0].get('accessInfo', {})
            # Extracting all available information
            full_info = {
                'title': book_info.get('title'),
                'authors': book_info.get('authors', 'N/A'),
                'publisher': book_info.get('publisher', 'N/A'),
                'publishedDate': book_info.get('publishedDate', 'N/A'),
                'description': book_info.get('description', 'N/A'),
                'industryIdentifiers': book_info.get('industryIdentifiers', 'N/A'),
                'pageCount': book_info.get('pageCount', 'N/A'),
                'dimensions': book_info.get('dimensions', 'N/A'),
                'printType': book_info.get('printType', 'N/A'),
                'categories': book_info.get('categories', 'N/A'),
                'averageRating': book_info.get('averageRating', 'N/A'),
                'ratingsCount': book_info.get('ratingsCount', 'N/A'),
                'contentVersion': book_info.get('contentVersion', 'N/A'),
                'imageLinks': book_info.get('imageLinks', 'N/A'),
                'language': book_info.get('language', 'N/A'),
                'previewLink': book_info.get('previewLink', 'N/A'),
                'infoLink': book_info.get('infoLink', 'N/A'),
                'canonicalVolumeLink': book_info.get('canonicalVolumeLink', 'N/A'),
                'saleability': sale_info.get('saleability', 'N/A'),
                'isEbook': sale_info.get('isEbook', 'N/A'),
                'listPrice': sale_info.get('listPrice', 'N/A'),
                'retailPrice': sale_info.get('retailPrice', 'N/A'),
                'buyLink': sale_info.get('buyLink', 'N/A'),
                'ebookInfo': access_info.get('epub', 'N/A'),
                'webReaderLink': access_info.get('webReaderLink', 'N/A'),
                'accessViewStatus': access_info.get('accessViewStatus', 'N/A'),
                'quoteSharingAllowed': access_info.get('quoteSharingAllowed', 'N/A')
            }
            return full_info
        else:
            return "No book found with that ISBN."
    else:
        return "Failed to retrieve data."

# Example usage
isbn = '0001713353'
all_book_info = get_all_book_info_by_isbn(isbn)
all_book_info


{'title': 'The King, the Mice and the Cheese',
 'authors': ['Nancy Gurney', 'Eric Gurney'],
 'publisher': 'Harpercollins Pub Limited',
 'publishedDate': '1986-03-01',
 'description': 'Lower primary In the style of Dr. Seuss.',
 'industryIdentifiers': [{'type': 'ISBN_10', 'identifier': '0001713353'},
  {'type': 'ISBN_13', 'identifier': '9780001713352'}],
 'pageCount': 64,
 'dimensions': 'N/A',
 'printType': 'BOOK',
 'categories': ['Animals'],
 'averageRating': 'N/A',
 'ratingsCount': 'N/A',
 'contentVersion': 'preview-1.0.0',
 'imageLinks': {'smallThumbnail': 'http://books.google.com/books/content?id=T1_iGwAACAAJ&printsec=frontcover&img=1&zoom=5&source=gbs_api',
  'thumbnail': 'http://books.google.com/books/content?id=T1_iGwAACAAJ&printsec=frontcover&img=1&zoom=1&source=gbs_api'},
 'language': 'en',
 'previewLink': 'http://books.google.com/books?id=T1_iGwAACAAJ&dq=isbn:0001713353&hl=&cd=1&source=gbs_api',
 'infoLink': 'http://books.google.com/books?id=T1_iGwAACAAJ&dq=isbn:0001713353&hl=

In [38]:
# Revised pipeline with class

# convert timestamp
class TimeStamp_convert:
    def __init__(self) -> None:
        pass

    def fit(self, X, y=None):
        return self
    
    def transform(self, dataset):
        dataset.loc[:, 'date'] = pd.to_datetime(dataset['timestamp'], unit='s')
        return dataset

# onehot encode
class Onehot:
    def __init__(self, recommend_threshold) -> None:
        self.recommend_threshold = recommend_threshold

    def fit(self, X, y=None):
        return self
    
    def transform(self, dataset):
        agg = dataset.groupby(['user', 'item']).rating.mean().reset_index()
        pivot = agg.pivot(index='user', columns='item', values='rating')
        onehot = pivot >= self.recommend_threshold
        onehot.fillna(False, inplace=True)
        return onehot       

# apriori
class Apriori:
    def __init__(self, min_support) -> None:
        self.min_support = min_support

    def fit(self, X, y=None):
        return self
    
    def transform(self, dataset):
        frequent_items = apriori(dataset, min_support=self.min_support, use_colnames=True).sort_values(by='support', ascending=False)
        return frequent_items    

# generate rules
class Rules:
    def __init__(self, metric, min_threshold) -> None:
        self.metric = metric
        self.min_threshold = min_threshold

    def fit(self, X, y=None):
        return self
    
    def transform(self, dataset):
        rules = association_rules(dataset, metric='support', min_threshold=0.0)
        return rules    

In [39]:
# Make pipeline

from sklearn.pipeline import Pipeline

pipeline = Pipeline(steps=[
    ('convert timestamp', TimeStamp_convert()),

    ('onehot encode', Onehot(recommend_threshold=0.8)),
    # Default: normalized ratings lower than 0.8/1 (4/5) traslate into 'do not recommend'

    ('apriori prunning', Apriori(min_support = 0.001)),
    # Default: items with support lower than 0.001 prunned 
    
    ('generate rules', Rules(metric='confidence', min_threshold=0.0))
    # Default: rules with confidence higher than 0.0 showed
    # Adjustment options (metric): 'support', 'confidence', 'lift', 'conviction', 'leverage', 'zhangs_metric'
])

In [40]:
pipeline.fit(subset)
rules = pipeline.transform(subset)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(0002006995),(0002005379),0.008452,0.019171,0.002577,0.304878,15.902832,0.002415,1.411017,0.945106
1,(0002005379),(0002006995),0.019171,0.008452,0.002577,0.134409,15.902832,0.002415,1.145515,0.955435
2,(0001950061),(0001720392),0.022263,0.149041,0.00134,0.060185,0.403815,-0.001978,0.905453,-0.601593
3,(0001720392),(0001950061),0.149041,0.022263,0.00134,0.00899,0.403815,-0.001978,0.986606,-0.634364
4,(0001384198),(0001720392),0.094104,0.149041,0.001237,0.013143,0.088187,-0.012789,0.862292,-0.919443
5,(0001720392),(0001384198),0.149041,0.094104,0.001237,0.008299,0.088187,-0.012789,0.913476,-0.923957


After getting the rules, you can apply filters to further narrow down to the results you need.

Below is an example how to do that.

In [24]:
filtered_rules = rules[(rules['confidence']>=0.01) & (rules['zhangs_metric']>=0)]
filtered_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(B017V4IPPO),(B017WJ5PR4),0.007398,0.007504,0.007398,1.0,133.267606,0.007343,inf,0.999894
1,(B017WJ5PR4),(B017V4IPPO),0.007504,0.007398,0.007398,0.985915,133.267606,0.007343,70.474741,1.0
2,(1720016763),(1718118562),0.003382,0.004016,0.001374,0.40625,101.15625,0.00136,1.677447,0.993474
3,(1718118562),(1720016763),0.004016,0.003382,0.001374,0.342105,101.15625,0.00136,1.514859,0.994107
