# Import libraries

In [1]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import linear_kernel
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.tokenize import RegexpTokenizer
import re
import string
import random
from PIL import Image
import requests
from io import BytesIO
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics.pairwise import cosine_similarity
from gensim.models import Word2Vec
from gensim.models.phrases import Phrases, Phraser
from matplotlib import pyplot
from gensim.models import KeyedVectors

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [2]:
import warnings
warnings.filterwarnings('ignore')

# Data Transformation

In [3]:
# Read description dataset and assign it to the variable d
d = pd.read_csv("description.csv", encoding="latin_1")
d.head()

Unnamed: 0,book_id,name,description
0,4833,The Glass Castle,"A tender, moving tale of unconditional love in..."
1,590,"Night (The Night Trilogy, #1)","Born into a Jewish ghetto in Hungary, as a chi..."
2,4264,"Angela's Ashes (Frank McCourt, #1)",Imbued on every page with Frank McCourt's asto...
3,3361,"Eat, Pray, Love","A celebrated writer's irresistible, candid, an..."
4,4535,Into Thin Air: A Personal Account of the Mount...,A bank of clouds was assembling on the not-so-...


In [4]:
# Drops all rows that has NaN value
d.dropna(inplace=True)

In [5]:
# print shape of the description dataset
d.shape

(122, 3)

In [6]:
# Read listing dataset and assign it to the variable l
l = pd.read_csv('listing.csv', encoding="latin_1")
print(l.shape)
l.head()

(1089, 4)


Unnamed: 0,book_id,genre,name,author
0,4833,Biographies & Memoirs,The Glass Castle,Jeannette Walls
1,590,Biographies & Memoirs,"Night (The Night Trilogy, #1)",Elie Wiesel
2,4264,Biographies & Memoirs,"Angela's Ashes (Frank McCourt, #1)",Frank McCourt
3,3361,Biographies & Memoirs,"Eat, Pray, Love",Elizabeth Gilbert
4,4535,Biographies & Memoirs,Into Thin Air: A Personal Account of the Mount...,Jon Krakauer


In [7]:
# Drops duplicates rows that has the column 'book_id'
l.drop_duplicates('book_id', inplace=True)

In [8]:
# Merge the dataset description (d) and listing (l) on the columns 'book_id' and 'name'
df = pd.merge(d,
              l,
              on=['book_id', 'name'], 
              how='left')
df.shape

(122, 5)

In [9]:
# print shape of concatenated dataset
df.shape

(122, 5)

In [10]:
# Reset index for the dataset (df)
df.reset_index(inplace=True)

In [11]:
# Print the dataset
df.head()

Unnamed: 0,index,book_id,name,description,genre,author
0,0,4833,The Glass Castle,"A tender, moving tale of unconditional love in...",Biographies & Memoirs,Jeannette Walls
1,1,590,"Night (The Night Trilogy, #1)","Born into a Jewish ghetto in Hungary, as a chi...",Biographies & Memoirs,Elie Wiesel
2,2,4264,"Angela's Ashes (Frank McCourt, #1)",Imbued on every page with Frank McCourt's asto...,Biographies & Memoirs,Frank McCourt
3,3,3361,"Eat, Pray, Love","A celebrated writer's irresistible, candid, an...",Biographies & Memoirs,Elizabeth Gilbert
4,4,4535,Into Thin Air: A Personal Account of the Mount...,A bank of clouds was assembling on the not-so-...,Biographies & Memoirs,Jon Krakauer


In [12]:
# Read books dataset and assign it to the variable b
b = pd.read_csv('books.csv')
print(b.shape)
b.head()

(2312, 5)


Unnamed: 0,book_id,avg_rating,no_of_ratings,user_id,user_rating
0,4833,4.25,7156.0,3466,0
1,590,4.31,7821.0,3466,5
2,4264,4.08,3836.0,3453,5
3,3361,3.52,1245.0,3453,4
4,4535,4.13,3107.0,3453,0


In [13]:
# Check if there is any Null cell in the book dataset
b.isnull().sum()

book_id          0
avg_rating       2
no_of_ratings    2
user_id          0
user_rating      0
dtype: int64

In [14]:
# Print the unique number of books in the dataset book
len(b.book_id.unique())

710

In [15]:
# Drop the colunms 'user_rating' and 'user_id'
b.drop(['user_rating', 'user_id'], axis=1, inplace=True)

In [16]:
# Drops all duplicate rows
b.drop_duplicates(inplace = True)
b.head()

Unnamed: 0,book_id,avg_rating,no_of_ratings
0,4833,4.25,7156.0
1,590,4.31,7821.0
2,4264,4.08,3836.0
3,3361,3.52,1245.0
4,4535,4.13,3107.0


In [17]:
# Reset index for the book dataset (b)
b.reset_index(drop=True, inplace=True)

In [18]:
# Print the unique number of books in the dataset book
len(b.book_id.unique())

710

In [19]:
# Print concatenated dataset
df.head()

Unnamed: 0,index,book_id,name,description,genre,author
0,0,4833,The Glass Castle,"A tender, moving tale of unconditional love in...",Biographies & Memoirs,Jeannette Walls
1,1,590,"Night (The Night Trilogy, #1)","Born into a Jewish ghetto in Hungary, as a chi...",Biographies & Memoirs,Elie Wiesel
2,2,4264,"Angela's Ashes (Frank McCourt, #1)",Imbued on every page with Frank McCourt's asto...,Biographies & Memoirs,Frank McCourt
3,3,3361,"Eat, Pray, Love","A celebrated writer's irresistible, candid, an...",Biographies & Memoirs,Elizabeth Gilbert
4,4,4535,Into Thin Air: A Personal Account of the Mount...,A bank of clouds was assembling on the not-so-...,Biographies & Memoirs,Jon Krakauer


In [20]:
# Print book dataset
b.head()

Unnamed: 0,book_id,avg_rating,no_of_ratings
0,4833,4.25,7156.0
1,590,4.31,7821.0
2,4264,4.08,3836.0
3,3361,3.52,1245.0
4,4535,4.13,3107.0


In [21]:
# Print shape of concatenated dataset
df.shape

(122, 6)

In [22]:
# Merge previous concatenated dataset (description and listing) with book dataset
df = pd.merge(df,
                 b[['book_id', 'avg_rating', 'no_of_ratings']],
                 on='book_id', 
                 how='left')
df.shape

(122, 8)

In [23]:
df.isnull().sum()

index             0
book_id           0
name              0
description       0
genre             0
author            7
avg_rating       20
no_of_ratings    20
dtype: int64

In [24]:
df.dropna(subset=['avg_rating'], inplace=True)

In [25]:
df.isnull().sum()

index            0
book_id          0
name             0
description      0
genre            0
author           6
avg_rating       0
no_of_ratings    0
dtype: int64

In [26]:
df.head()

Unnamed: 0,index,book_id,name,description,genre,author,avg_rating,no_of_ratings
0,0,4833,The Glass Castle,"A tender, moving tale of unconditional love in...",Biographies & Memoirs,Jeannette Walls,4.25,7156.0
1,1,590,"Night (The Night Trilogy, #1)","Born into a Jewish ghetto in Hungary, as a chi...",Biographies & Memoirs,Elie Wiesel,4.31,7821.0
2,2,4264,"Angela's Ashes (Frank McCourt, #1)",Imbued on every page with Frank McCourt's asto...,Biographies & Memoirs,Frank McCourt,4.08,3836.0
3,3,3361,"Eat, Pray, Love","A celebrated writer's irresistible, candid, an...",Biographies & Memoirs,Elizabeth Gilbert,3.52,1245.0
4,4,4535,Into Thin Air: A Personal Account of the Mount...,A bank of clouds was assembling on the not-so-...,Biographies & Memoirs,Jon Krakauer,4.13,3107.0


# Content-based filtering

## Text Preprocessing

In [27]:
#Utitlity functions for removing ASCII characters, converting lower case, removing stop words, html and punctuation from description

def _removeNonAscii(s):
    return "".join(i for i in s if  ord(i)<128)

def make_lower_case(text):
    return text.lower()

def remove_stop_words(text):
    text = text.split()
    stops = set(stopwords.words("english"))
    text = [w for w in text if not w in stops]
    text = " ".join(text)
    return text

def remove_html(text):
    html_pattern = re.compile('<.*?>')
    return html_pattern.sub(r'', text)

def remove_punctuation(text):
    tokenizer = RegexpTokenizer(r'\w+')
    text = tokenizer.tokenize(text)
    text = " ".join(text)
    return text

In [28]:
df['description'] = df['description'].astype(str)

In [29]:
df['cleaned'] = df['description'].apply(_removeNonAscii)

df['cleaned'] = df.cleaned.apply(func = make_lower_case)
df['cleaned'] = df.cleaned.apply(func = remove_stop_words)
df['cleaned'] = df.cleaned.apply(func=remove_punctuation)
df['cleaned'] = df.cleaned.apply(func=remove_html)


In [30]:
df.head()

Unnamed: 0,index,book_id,name,description,genre,author,avg_rating,no_of_ratings,cleaned
0,0,4833,The Glass Castle,"A tender, moving tale of unconditional love in...",Biographies & Memoirs,Jeannette Walls,4.25,7156.0,tender moving tale unconditional love family t...
1,1,590,"Night (The Night Trilogy, #1)","Born into a Jewish ghetto in Hungary, as a chi...",Biographies & Memoirs,Elie Wiesel,4.31,7821.0,born jewish ghetto hungary child elie wiesel s...
2,2,4264,"Angela's Ashes (Frank McCourt, #1)",Imbued on every page with Frank McCourt's asto...,Biographies & Memoirs,Frank McCourt,4.08,3836.0,imbued every page frank mccourt s astounding h...
3,3,3361,"Eat, Pray, Love","A celebrated writer's irresistible, candid, an...",Biographies & Memoirs,Elizabeth Gilbert,3.52,1245.0,celebrated writer s irresistible candid eloque...
4,4,4535,Into Thin Air: A Personal Account of the Mount...,A bank of clouds was assembling on the not-so-...,Biographies & Memoirs,Jon Krakauer,4.13,3107.0,bank clouds assembling not so distant horizon ...


In [31]:
# Reset index for the  dataset (df)
df.reset_index(inplace=True)

## Building Average Word2Vec Model

In [32]:
# Downloading the Google pretrained Word2Vec Model
!wget -P /root/input/ -c "https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz"

--2021-05-25 09:02:40--  https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz
Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.81.219
Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.81.219|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1647046227 (1.5G) [application/x-gzip]
Saving to: ‘/root/input/GoogleNews-vectors-negative300.bin.gz’


2021-05-25 09:03:00 (76.8 MB/s) - ‘/root/input/GoogleNews-vectors-negative300.bin.gz’ saved [1647046227/1647046227]



In [33]:
EMBEDDING_FILE = '/root/input/GoogleNews-vectors-negative300.bin.gz'
google_word2vec = KeyedVectors.load_word2vec_format(EMBEDDING_FILE, binary=True)

In [34]:
#splitting the description into words
corpus = []
for words in df['cleaned']:
    corpus.append(words.split())

In [35]:
# Training our corpus with Google Pretrained Model
%%time
google_model = Word2Vec(size = 300, window=5, min_count = 2, workers = -1)
google_model.build_vocab(corpus)

#model.intersect_word2vec_format('./word2vec/GoogleNews-vectors-negative300.bin', lockf=1.0, binary=True)
google_model.intersect_word2vec_format(EMBEDDING_FILE, lockf=1.0, binary=True)
google_model.train(corpus, total_examples=google_model.corpus_count, epochs = 5)

CPU times: user 1min 30s, sys: 702 ms, total: 1min 31s
Wall time: 1min 31s


##  Building TF-IDF Word2Vec Model

In [36]:
#Building TFIDF model and calculate TFIDF score
tfidf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df = 5, stop_words='english')
tfidf.fit(df['cleaned'])

# Getting the words from the TF-IDF model
tfidf_list = dict(zip(tfidf.get_feature_names(), list(tfidf.idf_)))
tfidf_feature = tfidf.get_feature_names() # tfidf words/col-names

In [39]:
# Building TF-IDF Word2Vec 

# Storing the TFIDF Word2Vec embeddings
tfidf_vectors = []; 
line = 0;
# for each book description
for desc in corpus: 
  # Word vectors are of zero length (Used 300 dimensions)
    sent_vec = np.zeros(300) 
    # num of words with a valid vector in the book description
    weight_sum =0; 
    # for each word in the book description
    for word in desc: 
        if word in google_model.wv.vocab and word in tfidf_feature:
            vec = google_model.wv[word]
            tf_idf = tfidf_list[word] * (desc.count(word) / len(desc))
            sent_vec += (vec * tf_idf)
            weight_sum += tf_idf
    if weight_sum != 0:
        sent_vec /= weight_sum
    tfidf_vectors.append(sent_vec)
    line += 1

In [40]:
len(tfidf_vectors)

102

In [41]:
#Recommending top 20 similar books

def recommendations(title):    
    #Reverse mapping of the index
    indices = pd.Series(df.index, index = df['name']).drop_duplicates()
    idx = indices[title]
    idc = df.index.to_list()

    # Handling right index
    if idx in idc:
      val1 = idc.index(idx)
    else:
      idc.append(idx)
      val1 = -1
    
    print('Genre: ', df['genre'][idx])
    temp = [tfidf_vectors[i] for i in idc]
    
    # finding cosine similarity for the vectors
    cosine_similarities = cosine_similarity(temp,  temp)

    # Considering metadata (book genre and book's author) in recommendations
    for i in idc:
      if df.iloc[i]['genre'] != df['genre'][idx]:
        cosine_similarities[val1][idc.index(i)] /= 2
      elif df.iloc[i]['author'] == df['author'][idx]:
        cosine_similarities[val1][idc.index(i)] *= 1.1


    books = df['name']
    sim_scores = list(enumerate(cosine_similarities[val1]))
    sim_scores = sorted(sim_scores,
    key = lambda x: x[1], reverse = True)
    sim_scores = sim_scores[1:21]

    book_indices = [i[0] for i in sim_scores]
    recommend = books.iloc[book_indices]
    return recommend

In [42]:
# Content based recommemdation for the "Fantasy League"
ls = recommendations("Fantasy League")
ls

Genre:  Children's Books


96    Beyond Courage: The Untold Story of Jewish Res...
83    It's St. Patrick's Day (Turtleback School & Li...
90                Ready, Freddy! #22: Science Fair Flop
65                                             Shortcut
7                                        Into the Wild 
26                                          Bossypants 
38                                   A Handful of Stars
73                                      If You're Lucky
27    Wild: From Lost to Found on the Pacific Crest ...
5                                 Tuesdays with Morrie 
91    On the Backroad to Heaven: Old Order Hutterite...
67                                 Reasons Mommy Drinks
16                              Me Talk Pretty One Day 
17    Marley and Me: Life and Love With the World's ...
8                       I Know Why the Caged Bird Sings
22           A Long Way Gone: Memoirs of a Boy Soldier 
95    God Made All of Me: A Book to Help Children Pr...
50                Eat the Rich: A Treatise on Ec

In [43]:
# We represented recommendations as a pandas dataframe
list_of_indexes = ls.index.to_list()
ls = df.iloc[list_of_indexes]
ls

Unnamed: 0,level_0,index,book_id,name,description,genre,author,avg_rating,no_of_ratings,cleaned
96,116,116,2441,Beyond Courage: The Untold Story of Jewish Res...,"Under the noses of the military, Georges Loing...",Children's Books,Doreen Rappaport,4.15,809.0,noses military georges loinger smuggles thousa...
83,103,103,3075,It's St. Patrick's Day (Turtleback School & Li...,This Level 1 rhyming reader introduces young c...,Children's Books,Rebecca Gomez,3.12,67.0,level 1 rhyming reader introduces young childr...
90,110,110,2709,"Ready, Freddy! #22: Science Fair Flop",Everyone's favorite first-grade shark expert i...,Children's Books,Abby Klein,3.95,96.0,everyone s favorite first grade shark expert b...
65,65,65,322,Shortcut,The train tracks ran right by Bigmama's house ...,Children's Books,Donald Crews,4.0,362.0,train tracks ran right bigmama s house cottond...
7,7,7,3190,Into the Wild,In April 1992 a young man from a well-to-do fa...,Biographies & Memoirs,Jon Krakauer,3.96,7275.0,april 1992 young man well to do family hitchhi...
26,26,26,4414,Bossypants,"Before Liz Lemon, before ""Weekend Update,"" bef...",Biographies & Memoirs,Tina Fey,3.95,6768.0,liz lemon weekend update sarah palin tina fey ...
38,38,38,3196,A Handful of Stars,This powerful middle-grade novel from the Newb...,Children's Books,Cynthia Lord,4.16,3611.0,powerful middle grade novel newbery honor auth...
73,93,93,3574,If You're Lucky,When seventeen-year-old Georgias brother drow...,Teen & Young Adult,Yvonne Prinz,3.49,455.0,seventeen year old georgias brother drowns sur...
27,27,27,239,Wild: From Lost to Found on the Pacific Crest ...,"At twenty-two, Cheryl Strayed thought she had ...",Biographies & Memoirs,Cheryl Strayed,3.97,4701.0,twenty two cheryl strayed thought lost everyth...
5,5,5,518,Tuesdays with Morrie,"Maybe it was a grandparent, or a teacher or a ...",Biographies & Memoirs,Mitch Albom,4.08,6250.0,maybe grandparent teacher colleague someone ol...


In [44]:
# Read book dataset and assign it to the variable b
b = pd.read_csv('books.csv')

In [45]:
# There is no need for the columns "no_of_ratings" and "avg_rating"
# in our implementation therefore we drop these columns
# we assigned remain data to the variable ratings
ratings = b.drop(['no_of_ratings', 'avg_rating'], axis=1)

In [46]:
# We group rows boy user_id column and remove the groups which has only one element
# We split using scikit-leanr's train_test_split function and set stratify property 
# of that function to user_id, therefore it isn't possible to split rows that belongs
# to exactly one user
ratings = ratings.groupby(['user_id']).filter(lambda x: len(x) > 1)

In [47]:
# If there is no NaN value in any row then remove that row
ratings.dropna(inplace=True)

In [48]:
# Drop duplicate rows from the dataset ratings
ratings.drop_duplicates(subset=['book_id', 'user_rating', 'user_id'], inplace=True)

In [49]:
# We take the user with id 3466
temp1 = ratings[ratings['user_id'] == 3466]

In [50]:
# Find all books that the user rated
l_of_rate = temp1['book_id'].to_list()

In [51]:
# Remove the rows that the user already rated
ls = ls[~ls.book_id.isin(l_of_rate)]

In [52]:
ls

Unnamed: 0,level_0,index,book_id,name,description,genre,author,avg_rating,no_of_ratings,cleaned
96,116,116,2441,Beyond Courage: The Untold Story of Jewish Res...,"Under the noses of the military, Georges Loing...",Children's Books,Doreen Rappaport,4.15,809.0,noses military georges loinger smuggles thousa...
83,103,103,3075,It's St. Patrick's Day (Turtleback School & Li...,This Level 1 rhyming reader introduces young c...,Children's Books,Rebecca Gomez,3.12,67.0,level 1 rhyming reader introduces young childr...
90,110,110,2709,"Ready, Freddy! #22: Science Fair Flop",Everyone's favorite first-grade shark expert i...,Children's Books,Abby Klein,3.95,96.0,everyone s favorite first grade shark expert b...
65,65,65,322,Shortcut,The train tracks ran right by Bigmama's house ...,Children's Books,Donald Crews,4.0,362.0,train tracks ran right bigmama s house cottond...
38,38,38,3196,A Handful of Stars,This powerful middle-grade novel from the Newb...,Children's Books,Cynthia Lord,4.16,3611.0,powerful middle grade novel newbery honor auth...
73,93,93,3574,If You're Lucky,When seventeen-year-old Georgias brother drow...,Teen & Young Adult,Yvonne Prinz,3.49,455.0,seventeen year old georgias brother drowns sur...
91,111,111,2181,On the Backroad to Heaven: Old Order Hutterite...,On the Backroad to Heaven is a unique guide to...,Christian Books & Bibles,Donald B. Kraybill,3.85,61.0,backroad heaven unique guide world old order a...
67,67,67,888,Reasons Mommy Drinks,Being a new mom is AHHHH! WHAT HAVE I DONE? ...,Humor & Entertainment,Lyranda Martin-Evans,3.77,188.0,new mom ahhhh done hard tiny clothes expensive...
95,115,115,2276,God Made All of Me: A Book to Help Children Pr...,It's easy to convey the message to children th...,Parenting & Relationships,Justin S. Holcomb,4.46,225.0,easy convey message children bodies particular...
50,50,50,4968,Eat the Rich: A Treatise on Economics,America's favorite political humorist leads re...,Humor & Entertainment,P. J. O'Rourke,3.88,2100.0,america s favorite political humorist leads re...


In [53]:
ls.shape

(11, 10)

# Collaborative filtering

In [54]:
# Number of rated books
len(ratings.book_id.unique())

710

In [55]:
# Number of users that rated books
len(ratings.user_id.unique())

84

In [None]:
ratings.isnull().sum()

book_id        0
user_id        0
user_rating    0
dtype: int64

## User-Based Collaorative Filtering

In [56]:
from sklearn.model_selection import train_test_split

#Assign X as the original ratings dataframe and y as the user_id column of ratings.
X = ratings.copy()
y = ratings['user_id']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, stratify=y, random_state=42)

In [57]:
print(X.shape)
print(y.shape)

(2189, 3)
(2189,)


In [58]:
X_test.head()

Unnamed: 0,book_id,user_id,user_rating
2167,2610,3466,1
1919,152,3482,3
1970,3900,3482,5
1973,2653,3483,2
615,2766,3403,3


In [59]:
X_train.head()

Unnamed: 0,book_id,user_id,user_rating
217,3093,3471,1
184,4744,3461,0
911,3082,3483,3
1896,4619,2953,1
2053,2266,3469,5


In [60]:
X_train.shape

(1641, 3)

In [61]:
len(X_train.user_id.unique())

84

In [62]:
len(X_test.user_id.unique())

63

In [74]:
#Import the mean_squared_error function
from sklearn.metrics import mean_squared_error

#Function that computes the root mean squared error (or RMSE)
def rmse(y_true, y_pred):
  return np.sqrt(mean_squared_error(y_true, y_pred))

In [75]:
#Function to compute the RMSE score obtained on the testing set by a model
def score(cf_model):
  #Construct a list of user-movie tuples from the testing dataset
  id_pairs = zip(X_test['user_id'], X_test['book_id'])
  #Predict the rating for every user-movie tuple
  y_pred = np.array([cf_model(user, book) for (user, book) in id_pairs])
  #Extract the actual ratings given by the users in the test data
  y_true = np.array(X_test['user_rating'])
  #Return the final RMSE score
  return rmse(y_true, y_pred)

In [76]:
#Build the ratings matrix using pivot_table function
r_matrix = X_train.pivot_table(values='user_rating', index='user_id', columns='book_id')

r_matrix.sample(10)

book_id,6,7,9,15,21,29,43,47,61,72,74,81,84,89,90,91,93,99,104,107,110,119,130,144,145,149,152,154,189,196,200,202,203,208,209,225,226,239,241,263,...,4744,4751,4755,4758,4771,4772,4777,4780,4805,4827,4830,4832,4833,4845,4852,4853,4854,4868,4881,4882,4885,4886,4889,4897,4901,4902,4904,4907,4919,4921,4923,4925,4941,4942,4968,4971,4975,4978,4991,4995
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
3221,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2928,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3470,,,,,,,,,,,,,,,,,1.0,,3.5,2.5,,,,,,1.0,,,,4.0,4.0,,,,,,,,,,...,,3.0,,2.0,,,,,,,,,,,,,,,4.0,,,,,,,,,,,,,,,,,,,,1.0,
1482,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
176,,,,,,,,5.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3426,,4.0,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,3.0,,,,,,,,,,,,2.0,,,,,1.0,,,,
3483,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,3.0,,,,,,,,,,,2.0,,,,,,,,,,,,,,,,,,,3.0,,,,,,,
3466,,,,,,,,,,,,,1.0,,,,,1.0,,,,,,,,,,,,,,,,,,,,0.0,,,...,,,,,,,2.0,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,
232,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,
3453,,,,,,,,,,,,,,,,,,,4.0,,5.0,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,5.0,,,,,,,,,,,,,,,,3.0


In [77]:
r_matrix.shape

(84, 677)

In [78]:
#User Based Collaborative Filter using Mean Ratings
def cf_user_mean(user_id, book_id):
  #Check if movie_id exists in r_matrix
  if book_id in r_matrix:
    #Compute the mean of all the ratings given to the movie
    mean_rating = r_matrix[book_id].mean()
  else:
    #Default to a rating of 3.0 in the absence of any information
    mean_rating = 3.0
  return mean_rating
#Compute RMSE for the Mean model
score(cf_user_mean)

1.8547286184709564

In [79]:
# Import cosine_score
from sklearn.metrics.pairwise import cosine_similarity

#Create a dummy ratings matrix with all null values imputed to 0
r_matrix_dummy = r_matrix.copy().fillna(0)

#Compute the cosine similarity matrix using the dummy ratings matrix
cosine_sim = cosine_similarity(r_matrix_dummy, r_matrix_dummy)

#Convert into pandas dataframe
cosine_sim = pd.DataFrame(cosine_sim, index=r_matrix.index, columns=r_matrix.index)

cosine_sim.head(10)

user_id,117,176,232,295,330,397,484,703,853,1083,1125,1183,1221,1283,1328,1412,1460,1482,1496,1586,1689,1784,2061,2078,2108,2122,2215,2222,2283,2460,2474,2478,2547,2549,2624,2672,2689,2760,2928,2931,...,3066,3115,3207,3217,3221,3229,3262,3270,3291,3292,3331,3364,3393,3403,3421,3426,3430,3444,3449,3453,3461,3466,3468,3469,3470,3471,3472,3474,3475,3476,3478,3479,3480,3482,3483,3484,3486,3497,7130,7131
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
117,1.0,0.0,0.0,0.025962,0.0,0.0,0.0,0.084509,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.238976,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.167248,0.0,0.0,0.023682,0.0,0.0,0.0,0.0,0.030184,0.029509,0.0,0.0,0.09754,0.0,0.084374,0.161522,0.118858,0.069701,0.088929,0.083798,0.0,0.0,0.0,0.0
176,0.0,1.0,0.0,0.181356,0.0,0.0,0.0,0.209076,0.089245,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.059032,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.09179,0.0,0.0,0.0,0.0,0.0,0.0,0.087255,0.0,0.0,0.0,0.0,0.082611,0.147046,0.0,0.0,0.0,0.0,0.0,0.008589,0.0,0.0,0.0,0.0,0.0,0.061055,0.242164,0.046371,0.0,0.124168,0.0,0.0,0.0,0.0
232,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.177043,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
295,0.025962,0.181356,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012146,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.074214,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.175623,0.097689,0.0,0.0,0.0,0.044522,0.070971,0.0,0.040787,0.0,0.0,0.0,0.0,0.031151,0.06619,0.0,0.0,0.0,0.0,0.0,0.0,0.0
330,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.081574,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04473,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068945,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011266,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
397,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.031364,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05595,0.0,0.0,0.0,0.0,0.151296,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
484,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025901,0.029292,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.100125
703,0.084509,0.209076,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076847,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.063375,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
853,0.0,0.089245,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.172919,0.057166,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.31427,0.0,0.0,0.0,0.0,0.0,0.14072,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.067647,0.0,0.0,0.0,0.052137,0.0,0.079374,0.0,0.0,0.0,0.0,0.05435,0.0,0.158763,0.0,0.174604,0.0,0.0,0.0,0.0
1083,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.244266,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [80]:
#User Based Collaborative Filter using Weighted Mean Ratings
def cf_user_wmean(user_id, book_id):
  #Check if movie_id exists in r_matrix
  if book_id in r_matrix:
    #Get the similarity scores for the user in question with every other user
    sim_scores = cosine_sim[user_id]
    #Get the user ratings for the movie in question
    m_ratings = r_matrix[book_id]

    #Extract the indices containing NaN in the m_ratings series
    idx = m_ratings[m_ratings.isnull()].index
    #Drop the NaN values from the m_ratings Series
    m_ratings = m_ratings.dropna()
    m_ratings.reset_index()
    #Drop the corresponding cosine scores from the sim_scores series
    sim_scores = sim_scores.drop(idx)

    wmean_rating = np.dot(sim_scores, m_ratings)/ (sim_scores.sum()  + 0.000000000000001)
  else:
    #Default to a rating of 3.0 in the absence of any information
    wmean_rating = 3.0
  return wmean_rating

# Recommendations of Hybrid Approach

In [81]:
# Get Hybrid recommendation for the user 3466
user_id = 3466
ls['prediction'] = -1
for index, row in ls.iterrows():
  book_id = int(row['book_id'])
  y_pred = cf_user_wmean(user_id, book_id)
  ls.loc[ls['book_id'] == book_id, 'prediction'] = y_pred 

In [82]:
ls

Unnamed: 0,level_0,index,book_id,name,description,genre,author,avg_rating,no_of_ratings,cleaned,prediction
96,116,116,2441,Beyond Courage: The Untold Story of Jewish Res...,"Under the noses of the military, Georges Loing...",Children's Books,Doreen Rappaport,4.15,809.0,noses military georges loinger smuggles thousa...,2.993671
83,103,103,3075,It's St. Patrick's Day (Turtleback School & Li...,This Level 1 rhyming reader introduces young c...,Children's Books,Rebecca Gomez,3.12,67.0,level 1 rhyming reader introduces young childr...,4.0
90,110,110,2709,"Ready, Freddy! #22: Science Fair Flop",Everyone's favorite first-grade shark expert i...,Children's Books,Abby Klein,3.95,96.0,everyone s favorite first grade shark expert b...,1.977907
65,65,65,322,Shortcut,The train tracks ran right by Bigmama's house ...,Children's Books,Donald Crews,4.0,362.0,train tracks ran right bigmama s house cottond...,0.0
38,38,38,3196,A Handful of Stars,This powerful middle-grade novel from the Newb...,Children's Books,Cynthia Lord,4.16,3611.0,powerful middle grade novel newbery honor auth...,0.0
73,93,93,3574,If You're Lucky,When seventeen-year-old Georgias brother drow...,Teen & Young Adult,Yvonne Prinz,3.49,455.0,seventeen year old georgias brother drowns sur...,4.0
91,111,111,2181,On the Backroad to Heaven: Old Order Hutterite...,On the Backroad to Heaven is a unique guide to...,Christian Books & Bibles,Donald B. Kraybill,3.85,61.0,backroad heaven unique guide world old order a...,4.507884
67,67,67,888,Reasons Mommy Drinks,Being a new mom is AHHHH! WHAT HAVE I DONE? ...,Humor & Entertainment,Lyranda Martin-Evans,3.77,188.0,new mom ahhhh done hard tiny clothes expensive...,0.0
95,115,115,2276,God Made All of Me: A Book to Help Children Pr...,It's easy to convey the message to children th...,Parenting & Relationships,Justin S. Holcomb,4.46,225.0,easy convey message children bodies particular...,4.0
50,50,50,4968,Eat the Rich: A Treatise on Economics,America's favorite political humorist leads re...,Humor & Entertainment,P. J. O'Rourke,3.88,2100.0,america s favorite political humorist leads re...,0.0


In [83]:
# Get top 5 predictions with the highest rating
ls.sort_values('prediction', ascending=False)[:5]

Unnamed: 0,level_0,index,book_id,name,description,genre,author,avg_rating,no_of_ratings,cleaned,prediction
91,111,111,2181,On the Backroad to Heaven: Old Order Hutterite...,On the Backroad to Heaven is a unique guide to...,Christian Books & Bibles,Donald B. Kraybill,3.85,61.0,backroad heaven unique guide world old order a...,4.507884
95,115,115,2276,God Made All of Me: A Book to Help Children Pr...,It's easy to convey the message to children th...,Parenting & Relationships,Justin S. Holcomb,4.46,225.0,easy convey message children bodies particular...,4.0
83,103,103,3075,It's St. Patrick's Day (Turtleback School & Li...,This Level 1 rhyming reader introduces young c...,Children's Books,Rebecca Gomez,3.12,67.0,level 1 rhyming reader introduces young childr...,4.0
73,93,93,3574,If You're Lucky,When seventeen-year-old Georgias brother drow...,Teen & Young Adult,Yvonne Prinz,3.49,455.0,seventeen year old georgias brother drowns sur...,4.0
96,116,116,2441,Beyond Courage: The Untold Story of Jewish Res...,"Under the noses of the military, Georges Loing...",Children's Books,Doreen Rappaport,4.15,809.0,noses military georges loinger smuggles thousa...,2.993671


#Testing

In [84]:
# Get all books that rated by user 3466
ratings[ratings['user_id'] == 3466]

Unnamed: 0,book_id,user_id,user_rating
0,4833,3466,0
1,590,3466,5
5,518,3466,4
7,3190,3466,0
8,3147,3466,1
...,...,...,...
2169,962,3466,2
2170,4619,3466,2
2171,4707,3466,5
2172,4991,3466,1


In [90]:
# Compare the user's high rated books with the recommendation of Hybrid system
l = pd.read_csv("listing.csv", encoding="latin_1")
temp = pd.merge(temp1, l, on=["book_id"], how="left")
temp = temp.sort_values('user_rating', ascending=False)[:10]
temp

Unnamed: 0,book_id,user_id,user_rating,genre,name,author
40,3324,3466,5,Science & Math,"Glencoe Life Science, Student Edition",McGraw-Hill Education
36,72,3466,5,"Health, Fitness & Dieting",Pregnancy the Natural Way,Zita West
21,4414,3466,5,Biographies & Memoirs,Bossypants,Tina Fey
48,2409,3466,5,History,In Defense of History,Richard J. Evans
20,4222,3466,5,Biographies & Memoirs,Infidel,Ayaan Hirsi Ali
58,4707,3466,5,Humor & Entertainment,Ring of Bright Water (Nonpareil Books),Gavin Maxwell
33,3288,3466,5,Calendars,"Montana, Wild & Scenic 2016 Square 12x12",Browntrout Publishers
1,590,3466,5,Biographies & Memoirs,"Night (The Night Trilogy, #1)",Elie Wiesel
13,2786,3466,5,Biographies & Memoirs,John Adams,David McCullough
37,226,3466,4,Children's Books,Medieval Fashions Coloring Book (Dover Fashion...,Tom Tierney


In [94]:
ls = ls.sort_values('prediction', ascending=False)[:5]
ls = ls.drop(['level_0', 'index'], axis=1, inplace=False)
ls

Unnamed: 0,book_id,name,description,genre,author,avg_rating,no_of_ratings,cleaned,prediction
91,2181,On the Backroad to Heaven: Old Order Hutterite...,On the Backroad to Heaven is a unique guide to...,Christian Books & Bibles,Donald B. Kraybill,3.85,61.0,backroad heaven unique guide world old order a...,4.507884
95,2276,God Made All of Me: A Book to Help Children Pr...,It's easy to convey the message to children th...,Parenting & Relationships,Justin S. Holcomb,4.46,225.0,easy convey message children bodies particular...,4.0
83,3075,It's St. Patrick's Day (Turtleback School & Li...,This Level 1 rhyming reader introduces young c...,Children's Books,Rebecca Gomez,3.12,67.0,level 1 rhyming reader introduces young childr...,4.0
73,3574,If You're Lucky,When seventeen-year-old Georgias brother drow...,Teen & Young Adult,Yvonne Prinz,3.49,455.0,seventeen year old georgias brother drowns sur...,4.0
96,2441,Beyond Courage: The Untold Story of Jewish Res...,"Under the noses of the military, Georges Loing...",Children's Books,Doreen Rappaport,4.15,809.0,noses military georges loinger smuggles thousa...,2.993671
