In [3]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [74]:
review_data = pd.read_csv(r"C:\Users\eeejo\Downloads\review_data.csv")

In [92]:
user_review_cnts = review_data['User_id'].value_counts()
active_users = user_review_cnts[(user_review_cnts >= 10) & (user_review_cnts <= 50)].index
active_data = review_data[review_data['User_id'].isin(active_users)].dropna()

In [93]:
active_data.shape

(477848, 13)

In [94]:
active_data['User_id'].value_counts()

User_id
AEC46NVS07MJ9     50
A17QPAKQVCORV3    50
A4IJAIG9455PT     50
AYJ22I3NM8ZFR     50
A231N46EIRUF89    50
                  ..
A1CSL3TFTFOTWH    10
A38EYASPLI0G0Z    10
A9YAI11CLYUA7     10
A15XI2BEGGFEOW    10
A31A3X5C2504M5    10
Name: count, Length: 27493, dtype: int64

In this part, we will generate sentiment score for each review/text for following analysis.

In [95]:
from textblob import TextBlob
def sentiment(text):
    blob = TextBlob(text)
    sentiment_score = blob.sentiment.polarity
    return sentiment_score

active_data['sentiment_score'] = active_data['review/text'].apply(sentiment)

active_data.head()

Unnamed: 0,Title,User_id,profileName,review/helpfulness,review/score,review/time,review/summary,review/text,description,authors,publisher,publishedYear,categories
0,Its Only Art If Its Well Hung!,AVCGYZL8FQQTD,"Jim of Oz ""jim-of-oz""",7/7,4.0,1999-10-23,Nice collection of Julie Strain images,This is only for Julie Strain fans. It's a col...,Unknown,Julie Strain,Unknown,1996.0,Fiction
1,Dr. Seuss: American Icon,A30TK6U7DNS82R,Kevin Killian,10/10,5.0,2004-09-21,Really Enjoyed It,I don't care much for Dr. Seuss but after read...,Philip Nel takes a fascinating look into the k...,Philip Nel,A&C Black,2005.0,Biography & Autobiography
2,Dr. Seuss: American Icon,A3UH4UZ4RSVO82,John Granger,10/11,5.0,2004-03-09,Essential for every personal and Public Library,"If people become the books they read and if ""t...",Philip Nel takes a fascinating look into the k...,Philip Nel,A&C Black,2005.0,Biography & Autobiography
3,Dr. Seuss: American Icon,A2MVUWT453QH61,"Roy E. Perry ""amateur philosopher""",7/7,4.0,2004-07-25,Phlip Nel gives silly Seuss a serious treatment,"Theodore Seuss Geisel (1904-1991), aka &quot;D...",Philip Nel takes a fascinating look into the k...,Philip Nel,A&C Black,2005.0,Biography & Autobiography
4,Dr. Seuss: American Icon,A22X4XUPKF66MR,"D. H. Richards ""ninthwavestore""",3/3,4.0,2005-02-10,Good academic overview,Philip Nel - Dr. Seuss: American IconThis is b...,Philip Nel takes a fascinating look into the k...,Philip Nel,A&C Black,2005.0,Biography & Autobiography


In [98]:
target_user = 'A231N46EIRUF89'
target_user_books = set(active_data[(active_data['User_id'] == target_user)]['Title'])

common_users = set()
for book in target_user_books:
    users_marked_book = set(active_data[(active_data['Title'] == book) & (active_data['User_id'] != target_user) & (active_data['sentiment_score'].notna())]['User_id'])
    if len(users_marked_book) >= 5: 
        common_users.update(users_marked_book)
common_users.discard(target_user)

print(common_users)

{'ACKAP8O7E1QN3', 'A2686XXLSTQKK7', 'A36EEROMS3HRHV', 'A2YMPFOTTUE7FV', 'AZ4ESAJ3Y5ARA', 'A23BTLTIHMJN01', 'A3CNEVQOBHI6AT', 'A2FS38D943KX12', 'A2EF93YQ23LA5E', 'A3O2EENHOMRTIU', 'A2ERPJNBF6R23R', 'A2G9MO59TY2Z8L', 'AUCUB12HLJBU8', 'A3LTBD9T6ZO9E5', 'A27G3XOXB02R1C', 'A34NDNNY54LE1', 'A197XEDDNZXGYV', 'A379OQ6THHJKBJ', 'A1XZJ32DJS8YV2', 'A17QPAKQVCORV3', 'A3N6PCOSINNGFF', 'AM6SIH8AKMPRA', 'A1UORGGRUE5Z52', 'AXD81LK9824QK', 'A21XK37YF8Z8EE', 'A2ULBZ4PXSRNUT', 'A3A7TB29Y720EF', 'A5FKH3KH0AMH4', 'A1NO2UEAEBYEF6', 'AUM2E2LE0WGAA', 'A1P2HZ2H4L1BBK', 'A37AOPJUDRGQWK', 'A2P0GQ6HMJX285', 'AEXUTTC2WTN02', 'A3IHRX17E6S4I2', 'A1S70GB3XGCXIF', 'A3SUN80V5VHHPO', 'A3C8ETFYSGDTC', 'A2I1XGVO4EETN0', 'AH88WGWK9PMDL', 'A3IV1P4DO3XX09', 'A3KV4RQE2FVIZ0', 'A3LKP9IORVHTUU', 'A2MDQ0ZISTGZO5', 'A2RVOYWAS4U0HR', 'A65H9J7I56EXO', 'A2J2ZJKD9V8Q6M', 'A1V4JMQF2NQ9AS', 'A2E17AV8YW104C', 'A3V3MITG7EZ955', 'A3DRSOGQJRX10', 'A1Y3KJMVHVOUE1', 'A2JBM4IJ0ZRZ6E', 'AVCRMRB86II1E', 'A2W6PSPHJHT9DW', 'AL5N55UR11Z3M', 'A22U2

In [100]:
# calculate the sentimet score difference between target user and each user in common user list
user_sentiment_diffs = {}
for user_id in common_users:
    common_titles = set(active_data[(active_data['User_id'] == target_user)]['Title']).intersection(set(active_data[(active_data['User_id'] == user_id) & (active_data['sentiment_score'].notna())]['Title']))
    diffs = [abs(active_data[(active_data['User_id'] == target_user) & (active_data['Title'] == title)]['sentiment_score'].values[0] - active_data[(active_data['User_id'] == user_id) & (active_data['Title'] == title)]['sentiment_score'].values[0]) for title in common_titles]
    if diffs:
        user_sentiment_diffs[user_id] = np.mean(diffs)

for user_id, average_diff in user_sentiment_diffs.items():
    print("{} Sentiment Score Difference：{}".format(user_id, average_diff))

ACKAP8O7E1QN3 Sentiment Score Difference：0.040286111727299616
A2686XXLSTQKK7 Sentiment Score Difference：0.07944444444444443
A36EEROMS3HRHV Sentiment Score Difference：0.25745859213250516
A2YMPFOTTUE7FV Sentiment Score Difference：0.2426641414141414
AZ4ESAJ3Y5ARA Sentiment Score Difference：0.28180322966507176
A23BTLTIHMJN01 Sentiment Score Difference：0.2819696969696969
A3CNEVQOBHI6AT Sentiment Score Difference：0.04730769230769233
A2FS38D943KX12 Sentiment Score Difference：0.057628205128205136
A2EF93YQ23LA5E Sentiment Score Difference：0.04594235704308405
A3O2EENHOMRTIU Sentiment Score Difference：0.0
A2ERPJNBF6R23R Sentiment Score Difference：0.10031755683929597
A2G9MO59TY2Z8L Sentiment Score Difference：0.07021739130434779
AUCUB12HLJBU8 Sentiment Score Difference：0.42333333333333334
A3LTBD9T6ZO9E5 Sentiment Score Difference：0.10053451178451173
A27G3XOXB02R1C Sentiment Score Difference：0.2098611111111111
A34NDNNY54LE1 Sentiment Score Difference：0.018722943722943752
A197XEDDNZXGYV Sentiment Sco

In [104]:
three_most_similar_users = sorted(user_sentiment_diffs.items(), key=lambda x: x[1])[:3]
recommended_books = set()

for user_id, _ in three_most_similar_users:
    books_list = set(active_data[(active_data['User_id'] == user_id) & (active_data['review/score'] >= 4)]['Title'])
    # we will remove books that already marked by the target user
    books_list_filtered = books_list.difference(set(active_data[active_data['User_id'] == target_user]['Title']))
    recommended_books.update(books_list_filtered)

print("Recommended Book List")
for i, book in enumerate(recommended_books, 1):
    print("{}. {}".format(i, book))

Recommended Book List
1. Atlas Shrugged
2. FATHER AND CHILD REUNION
3. The Inspired Heart: An Artist's Journey of Transformation
4. Sexual Liberation or Sexual License?: The American Revolt Against Victorianism (American Ways Series)
5. Left to Tell: Discovering God Amidst The Rwandan Holocaust
6. The Right Questions: Truth, Meaning and Public Debate
7. Mysticism in American Literature: Thoreau's Quest and Whitman's Self
8. Captains and the Kings
9. Limitless Mind: A Guide to Remote Viewing and Transformation of Consciousness
10. NEW EARTH
11. Light: Medicine of the Future: How We Can Use It to Heal Ourselves NOW
12. On Ordered Liberty: A Treatise on the Free Society (Religion, Politics, and Society in the New Millennium)
13. The Inside Story on Teen Girls: Experts Answer Parents' Questions (Apa Lifetools)
14. Native Son (G K Hall Large Print Book Series)
15. Earth Under Fire: Humanity's Survival of the Ice Age
16. The Biology of Transcendence: A Blueprint of the Human Spirit
17. Chris