## Importing the required modules

In [248]:
!pip install stemming

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [249]:
import string
import re
from nltk.corpus import stopwords
from stemming.porter2 import stem
import networkx 
from operator import itemgetter

## Accessing the meta-data

In [250]:
Metadata_File = open('/content/amazon-meta.txt', 'r', encoding = 'utf-8')
Amazon_Products = {}

In [251]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [252]:
import re
import string
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

ps = PorterStemmer()
stopwords = set(stopwords.words("english"))
Amazon_Products = {}

def process_categories(categories):
    categories = re.compile('[%s]' % re.escape(string.digits + string.punctuation)).sub(' ', categories)
    categories = ' '.join(set(categories.lower().split()) - set(stopwords))
    categories = ' '.join(ps.stem(word) for word in categories.split())
    return categories

def add_metadata_to_products():
    global Amazon_Products
    with open('/content/amazon-meta.txt', 'r') as metadata_file:
      meta_data = {}
      for line in metadata_file:
        line = line.strip()

        if line.startswith('Id'):
            meta_data['Id'] = line[3:].strip()
        elif line.startswith('ASIN'):
            meta_data['ASIN'] = line[5:].strip()
        elif line.startswith('title'):
            meta_data['Title'] = ' '.join(line[6:].strip().split())
        elif line.startswith('group'):
            meta_data['Group'] = line[6:].strip()
        elif line.startswith('salesrank'):
            meta_data['SalesRank'] = int(line[10:].strip())
        elif line.startswith('similar'):
            meta_data['Copurchased'] = ' '.join(line.split()[2:])
        elif line.startswith('categories'):
            categories = ''
            num_categories = int(line.split()[1].strip())
            for i in range(num_categories):
                categories += metadata_file.readline().strip()
            meta_data['Categories'] = process_categories(categories)
        elif line.startswith('reviews'):
            total_reviews, avg_rating = 0, 0.0
            for review_line in metadata_file:
                review_line = review_line.strip()
                if not review_line:
                    break
                if review_line.startswith('total'):
                    total_reviews = int(review_line.split()[2].strip())
                    avg_rating = float(review_line.split()[7].strip())
            meta_data['TotalReviews'] = total_reviews
            meta_data['AvgRating'] = avg_rating

            # reset for next product
            Amazon_Products[meta_data['ASIN']] = meta_data
            meta_data = {}
add_metadata_to_products()

In [253]:
# Amazon_Products

In [254]:
# Amazon_Products.items()

## Filtering out data related to book records

In [255]:
# Amazon_Books.keys()

In [256]:
Amazon_Books = {}
for asin, metadata in Amazon_Products.items():
    if (metadata['Group'] == 'Book'):
        Amazon_Books[asin] = Amazon_Products[asin]

In [257]:
for asin, metadata in Amazon_Books.items():
    Amazon_Books[asin]['Copurchased'] = ' '.join([cp for cp in metadata['Copurchased'].split() if cp in Amazon_Books.keys()])

In [258]:
Amazon_Books

{'0827229534': {'Id': '1',
  'ASIN': '0827229534',
  'Title': 'Patterns of Preaching: A Sermon Sampler',
  'Group': 'Book',
  'SalesRank': 396585,
  'Copurchased': '0804215715 0687023955',
  'Categories': 'religion sermon clergi book spiritu subject preach christian',
  'TotalReviews': 0,
  'AvgRating': 0.0},
 '0738700797': {'Id': '2',
  'ASIN': '0738700797',
  'Title': 'Candlemas: Feast of Flames',
  'Group': 'Book',
  'SalesRank': 168596,
  'Copurchased': '1567184960',
  'Categories': 'religion book base spiritu wicca religion witchcraft subject earth',
  'TotalReviews': 0,
  'AvgRating': 0.0},
 '0486287785': {'Id': '3',
  'ASIN': '0486287785',
  'Title': 'World War II Allied Fighter Planes Trading Cards',
  'Group': 'Book',
  'SalesRank': 1270652,
  'Copurchased': '',
  'Categories': 'garden home craft book subject hobbi gener',
  'TotalReviews': 0,
  'AvgRating': 0.0},
 '0842328327': {'Id': '4',
  'ASIN': '0842328327',
  'Title': 'Life Application Bible Commentary: 1 and 2 Timothy 

## Adding recommendation property

In [259]:
Copurchase_Graph = networkx.Graph() 
for asin, metadata in Amazon_Books.items():
    Copurchase_Graph.add_node(asin) 
    for a in metadata['Copurchased'].split():
        Copurchase_Graph.add_node(a.strip()) 
        similarity = 0 
        n1 = set((Amazon_Books[asin]['Categories']).split())
        n2 = set((Amazon_Books[a]['Categories']).split()) 
        n1In2 = n1 & n2           
        n1Un2 = n1 | n2
        if(len(n1In2)) > 0:
            similarity = round(len(n1In2) / len(n1Un2), 2) 
            Copurchase_Graph.add_edge(asin, a.strip(), weight = similarity)

In [260]:
dc = networkx.degree(Copurchase_Graph) 
for asin in networkx.nodes(Copurchase_Graph):
    metadata = Amazon_Books[asin] 
    metadata['DegreeCentrality'] = int(dc[asin]) 
    ego = networkx.ego_graph(Copurchase_Graph, asin, radius = 1) 
    metadata['ClusteringCoeff'] = round(networkx.average_clustering(ego), 2)
    Amazon_Books[asin] = metadata 

In [261]:
Amazon_Books_File = open('amazon-books.txt', 'w', encoding = 'utf-8', errors = 'ignore')

Amazon_Books_File.write("Id\t" + "ASIN\t" + "Title\t" + "Categories\t" + "Group\t" + "Copurchased\t" + "SalesRank\t" + "TotalReviews\t" + "AvgRating\t" "DegreeCentrality\t" + 
                        "ClusteringCoeff\n") 

for asin, metadata in Amazon_Books.items(): # converting the meta-data into txt file
     Amazon_Books_File.write(metadata['Id'] + "\t" + \
                             asin + "\t" +  \
                             metadata['Title'] + "\t" + \
                             metadata['Categories'] + "\t" + \
                             metadata['Group'] + "\t" +  \
                             metadata['Copurchased'] + "\t" + \
                             str(metadata['SalesRank']) + "\t" + \
                             str(metadata['TotalReviews']) + "\t" + 
                             str(metadata['AvgRating']) + "\t" + \
                             str(metadata['DegreeCentrality']) + "\t" + \
                             str(metadata['ClusteringCoeff']) + "\n")

Amazon_Books_File.close()

In [262]:
# writing the adjacency edge list
Amazon_Books_File = open("amazon-books-copurchase.edgelist", 'wb') 
networkx.write_weighted_edgelist(Copurchase_Graph, Amazon_Books_File) 
Amazon_Books_File.close() 

## Reading the text file

In [263]:
Books_File = open('amazon-books.txt', 'r', encoding = 'utf-8', errors = 'ignore')
Books = {}
Books_File.readline()
for line in Books_File:
    cell = line.split("\t") 
    MetaData = {}
    MetaData['Id'] = cell[0].strip() 
    ASIN = cell[1].strip()
    MetaData['Title'] = cell[2].strip() 
    MetaData['Categories'] = cell[3].strip() 
    MetaData['Group'] = cell[4].strip() 
    MetaData['Copurchased'] = cell[5].strip()
    MetaData['SalesRank'] = int(cell[6].strip())
    MetaData['TotalReviews'] = int(cell[7].strip())
    MetaData['AvgRating'] = float(cell[8].strip())
    MetaData['DegreeCentrality'] = int(cell[9].strip())
    MetaData['ClusteringCoeff'] = float(cell[10].strip())
    Books[ASIN] = MetaData 
Books_File.close()

In [264]:
Books_File = open("amazon-books-copurchase.edgelist", "rb") 
Copurchase_Graph = networkx.read_weighted_edgelist(Books_File) 
Books_File.close() 

## Giving Book id as input for recommending books

In [276]:
print("Looking for Recommendations for Customer Purchasing this Book: ")
print("---------------------------------------------------------------")
Purchased_ASIN = '0843939338'
print("ASIN = ", Purchased_ASIN)
print("Title = ", Books[Purchased_ASIN]['Title'])
print("SalesRank = ", Books[Purchased_ASIN]['SalesRank'])
print("TotalReviews = ", Books[Purchased_ASIN]['TotalReviews'])
print("AvgRating = ", Books[Purchased_ASIN]['AvgRating'])
print("DegreeCentrality = ", Books[Purchased_ASIN]['DegreeCentrality'])
print("ClusteringCoeff = ", Books[Purchased_ASIN]['ClusteringCoeff'])

Looking for Recommendations for Customer Purchasing this Book: 
---------------------------------------------------------------
ASIN =  0843939338
Title =  White Wind
SalesRank =  43571
TotalReviews =  0
AvgRating =  0.0
DegreeCentrality =  1
ClusteringCoeff =  0.0


In [277]:
networkx.nodes(Copurchase_Graph)

NodeView(('0827229534', '0804215715', '0687023955', '0805415505', '0687179246', '0687173094', '0687336481', '0801021979', '082720230X', '0830815759', '0687045169', '0896225747', '1568540949', '0664258476', '0738700797', '1567184960', '0835608158', '1573929301', '0806524227', '080652247X', '0842328327', '0842328572', '0842328742', '0842328785', '0486220125', '0486401960', '0618138498', '0312117000', '0231118597', '080148605X', '0822217805', '0801487277', '0195110250', '0231118589', '0375709363', '0679730672', '0679739882', '0674009975', '0231073194', '0674003020', '0871318237', '0553577514', '0962741817', '1580540392', '0671010034', '0060184086', '1884820581', '0967271207', '0892817895', '0962741809', '0743237919', '0812991966', '0871318296', '1559362022', '1559360968', '1559361247', '0195110382', '1585741485', '0140246967', '0688149472', '0870211463', '1557508003', '0870211897', '0813338697', '1883809096', '0870210971', '1557500320', '0801868491', '0804100039', '0849311012', '157820032

In [278]:
n = Purchased_ASIN
ego = networkx.ego_graph(Copurchase_Graph, n, radius = 1)
Purchased_ASIN_Ego_Graph = networkx.Graph(ego)

In [279]:
threshold = 0.5 # finding the nodes having similarity measure based on category above the threshold value
Purchased_ASIN_Ego_Trim_Graph = networkx.Graph()
for f, t, e in Purchased_ASIN_Ego_Graph.edges(data = True):
    if e['weight'] >= threshold: 
        Purchased_ASIN_Ego_Trim_Graph.add_edge(f, t)

In [280]:
Purchased_ASIN_Ego_Trim_Graph

<networkx.classes.graph.Graph at 0x7fb0fbf9d060>

In [281]:
Purchased_ASIN_Neighbors = Purchased_ASIN_Ego_Trim_Graph.neighbors(Purchased_ASIN)

In [282]:
ASIN_Meta = []
for asin in Purchased_ASIN_Neighbors:
    ASIN = asin
    Title = Amazon_Books[ASIN]['Title']
    SalesRank = Amazon_Books[ASIN]['SalesRank']
    TotalReviews = Amazon_Books[ASIN]['TotalReviews']
    AvgRating = Amazon_Books[ASIN]['AvgRating']
    DegreeCentrality = Amazon_Books[ASIN]['DegreeCentrality']
    ASIN_Meta.append((ASIN, Title, SalesRank, TotalReviews, AvgRating, DegreeCentrality, ClusteringCoeff))

In [283]:
Top5_ByAbgRating_ThenByTotalReviews = sorted(ASIN_Meta, key = lambda x: (x[4], x[3]), reverse = True)[:5]

In [285]:
print()
print("Top Recommendations By AvgRating Then By TotalReviews for Users Purchased The Book: ")
print("--------------------------------------------------------------------------------------")
print('ASIN\t', 'Title\t', 'SalesRank\t', 'TotalReviews\t', 'AvgRating\t', 'DegreeCentrality\t', 'ClusteringCoeff')
for asin in Top5_ByAbgRating_ThenByTotalReviews:
    print(asin)

print()


Top Recommendations By AvgRating Then By TotalReviews for Users Purchased The Book: 
--------------------------------------------------------------------------------------
ASIN	 Title	 SalesRank	 TotalReviews	 AvgRating	 DegreeCentrality	 ClusteringCoeff
('0843947039', 'White Nights', 63959, 0, 0.0, 1, 0.0)

