In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.sparse import coo_matrix, csr_matrix
from sklearn.preprocessing import normalize, LabelEncoder
from numpy import transpose
import scipy
from sklearn.metrics.pairwise import cosine_similarity
from collections import defaultdict
from IPython.core import display as ICD
from gensim.models.word2vec import Word2Vec
from gensim.utils import simple_preprocess
import re
from scipy.sparse.linalg import svds

Загружаем датасеты и смотрим какие поля они содержат

In [2]:
books = pd.read_csv("data/BX-Books.csv")
books

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,0195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,0002005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,0060973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,0374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,0393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...
...,...,...,...,...,...,...,...,...
271355,0440400988,There's a Bat in Bunk Five,Paula Danziger,1988,Random House Childrens Pub (Mm),http://images.amazon.com/images/P/0440400988.0...,http://images.amazon.com/images/P/0440400988.0...,http://images.amazon.com/images/P/0440400988.0...
271356,0525447644,From One to One Hundred,Teri Sloat,1991,Dutton Books,http://images.amazon.com/images/P/0525447644.0...,http://images.amazon.com/images/P/0525447644.0...,http://images.amazon.com/images/P/0525447644.0...
271357,006008667X,Lily Dale : The True Story of the Town that Ta...,Christine Wicker,2004,HarperSanFrancisco,http://images.amazon.com/images/P/006008667X.0...,http://images.amazon.com/images/P/006008667X.0...,http://images.amazon.com/images/P/006008667X.0...
271358,0192126040,Republic (World's Classics),Plato,1996,Oxford University Press,http://images.amazon.com/images/P/0192126040.0...,http://images.amazon.com/images/P/0192126040.0...,http://images.amazon.com/images/P/0192126040.0...


In [3]:
interactions = pd.read_csv("data/BX-Book-Ratings.csv", sep=";", encoding = "ISO-8859-1")
interactions = interactions[interactions["Book-Rating"] != 0]
interactions

Unnamed: 0,User-ID,ISBN,Book-Rating
1,276726,0155061224,5
3,276729,052165615X,3
4,276729,0521795028,6
6,276736,3257224281,8
7,276737,0600570967,6
...,...,...,...
1149773,276704,0806917695,5
1149775,276704,1563526298,9
1149777,276709,0515107662,10
1149778,276721,0590442449,10


In [4]:
users = pd.read_csv('data/BX-Users.csv', delimiter=';', encoding = 'ISO-8859-1')
users

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",
...,...,...,...
278853,278854,"portland, oregon, usa",
278854,278855,"tacoma, washington, united kingdom",50.0
278855,278856,"brampton, ontario, canada",
278856,278857,"knoxville, tennessee, usa",


Заполним пропущенные значения в таблице пользователей средним

In [5]:
users.fillna(users.Age.mean(), inplace=True)

# Задание 1 (Персональные рекомендации)

Создадим персональные рекомендации на основе возраста и локации. В качестве локации можно взять например страну пользователя, поэтому добавим в таблицу пользователей отдельное поле `Country`. По возрасту разобьем на три группы: $0\ -\ 18$, $19\ -\ 64$, $65$ и старше.

In [6]:
users["Country"] = users.apply(lambda x: x['Location'][x['Location'].rfind(",") + 2:], axis = 1)

Разбиваем пользователей по возрасту

In [7]:
young_users = users[users["Age"] < 19].drop(columns=["Location"])
adult_users = users[(users["Age"] > 18) & (users["Age"] < 65)].drop(columns=["Location"])
older_users = users[users["Age"] > 64].drop(columns=["Location"])

Функция, реализующая персональные рекомендации, выглядит следующим образом

In [8]:
def personal_recommendation(age, location):
    country = location[location.rfind(",") + 2:]
    
    if age < 19:
        same_users = pd.DataFrame(young_users[young_users["Country"] == country]["User-ID"])
    elif age < 65:
        same_users = pd.DataFrame(adult_users[adult_users["Country"] == country]["User-ID"])
    else: 
        same_users = pd.DataFrame(older_users[older_users["Country"] == country]["User-ID"])

    current_interactions = interactions.merge(same_users, on="User-ID", how="inner")
    current_interactions = (
        current_interactions
        .groupby("ISBN")
        .agg({'User-ID': 'count', 'Book-Rating': 'mean'})
        .reset_index()
        .rename(columns={"Book-Rating": "Mean-Rating", "User-ID": "Users-Num"})
        .sort_values(["Mean-Rating", "Users-Num"], ascending=False)
    )
    current_interactions = current_interactions[current_interactions["Users-Num"] > 5]
    print("Для ", str(age), "- летнего пользователя из", location, "такие рекомендации")
    ICD.display((current_interactions[:10]
            .merge(books, on="ISBN", how="inner")
            .drop(columns=["ISBN", "Users-Num", "Mean-Rating", "Image-URL-S", "Image-URL-M", "Image-URL-L"])
           ))

Выведем рекомендации

In [9]:
personal_recommendation(70, 'nyc, new york, usa')

Для  70 - летнего пользователя из nyc, new york, usa такие рекомендации


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,The Da Vinci Code,Dan Brown,2003,Doubleday
1,A Painted House,John Grisham,2001,Dell Publishing Company
2,Angels &amp; Demons,Dan Brown,2001,Pocket Star
3,Cold Mountain,Charles Frazier,1997,Atlantic Monthly Press
4,Timeline,MICHAEL CRICHTON,2000,Ballantine Books
5,1st to Die: A Novel,James Patterson,2002,Warner Vision
6,The Summons,John Grisham,2002,Dell Publishing Company
7,When the Wind Blows,James Patterson,1999,Warner Vision
8,The Lovely Bones: A Novel,Alice Sebold,2002,"Little, Brown"
9,The Red Tent (Bestselling Backlist),Anita Diamant,1998,Picador USA


In [10]:
personal_recommendation(42, 'farnborough, hants, united kingdom')

Для  42 - летнего пользователя из farnborough, hants, united kingdom такие рекомендации


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,The Little Prince (Wordsworth Collection),Antoine de Saint-ExupÃ©ry,1998,NTC/Contemporary Publishing Company
1,To the Edge of the Sky,Anhua Gao,2001,"Penguin Books, Limited (UK)"
2,Fingersmith,Sarah Waters,2002,Riverhead Books
3,Jonathan Livingston Seagull,Richard Bach,1976,Avon
4,Notes from a Small Island,Bill Bryson,1997,Perennial
5,Pride and Prejudice (Penguin Popular Classics),Jane Austen,1994,Penguin Books Ltd
6,Tess of the D'Urbervilles (Penguin Classics),Thomas Hardy,1978,Penguin Books
7,Name of the Rose,Umberto Eco,0,Pan Books Ltd
8,Fried Green Tomatoes at the Whistle Stop Cafe,Fannie Flagg,1992,Vintage Books USA


In [11]:
personal_recommendation(22, 'barcelona, barcelona, spain')

Для  22 - летнего пользователя из barcelona, barcelona, spain такие рекомендации


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,La Sombra del Viento,Carlos Ruiz Zafon,2003,Planeta
1,El Senor De Los Anillos: El Retorno Del Rey (T...,J. R. R. Tolkien,2001,Distribooks
2,New Vegetarian: Bold and Beautiful Recipes for...,Celia Brooks Brown,2001,Ryland Peters &amp; Small Ltd
3,1984 (Spanish Language Edition),George Orwell,1984,Destino Ediciones
4,El Guardian Entre El Centeno,J. D. Salinger,1999,AIMS International Books
5,El Hobbit,J. R. R. Tolkien,1991,Minotauro
6,El Palacio de La Luna,Paul Auster,1996,Anagrama
7,El Senor De Los Anillos: Las DOS Torres (Lord ...,J. R. R. Tolkien,2001,Minotauro
8,Harry Potter y el cÃ¡liz de fuego,J. K. Rowling,2001,Lectorum Publications
9,Un tranvÃ­a en SP (Sprako Tranbia),Unai Elorriaga,2003,Alfaguara/Santillana USA


In [12]:
personal_recommendation(14, 'appleton, wisconsin, usa')

Для  14 - летнего пользователя из appleton, wisconsin, usa такие рекомендации


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,2001,Scholastic
1,"The Return of the King (The Lord of the Rings,...",J.R.R. TOLKIEN,1986,Del Rey
2,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,2000,Scholastic
3,The Bad Beginning (A Series of Unfortunate Eve...,Lemony Snicket,1999,HarperCollins
4,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,2003,Scholastic
5,The Giver (Readers Circle),LOIS LOWRY,2002,Laurel Leaf
6,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,1999,Scholastic
7,The Thief Lord,Cornelia Funke,2002,Scholastic
8,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,2000,Scholastic
9,Anthem,Ayn Rand,1996,Signet Book


# Задание 2 (Кластеризация пользователей)

Разобьем пользователей на кластеры, используя в качестве меры близости косинусное расстояние. Для начала составим матрицу взаимодействий

In [13]:
books_meets = interactions.groupby("ISBN")["User-ID"].count().reset_index().rename(columns={"User-ID": "Users-Num"})

user_meets = interactions.groupby("User-ID")["ISBN"].count().reset_index().rename(columns={"ISBN": "Books-Num"})

interactions = interactions.merge(books_meets, on=["ISBN"]).merge(user_meets, on=["User-ID"])

interactions = interactions[(interactions["Users-Num"] > 5) & 
                            (interactions["Books-Num"] > 5) &
                            (interactions["Books-Num"] < 200)]

interactions = interactions.merge(books[["ISBN", "Image-URL-M", "Book-Title"]].rename(
    columns={"Image-URL-M": "picture_url"}), on=["ISBN"])

In [14]:
le_users = LabelEncoder()
le_books = LabelEncoder()
interactions["U_ID"] = le_users.fit_transform(interactions["User-ID"])
interactions["B_ID"] = le_books.fit_transform(interactions["ISBN"])
interactions

Unnamed: 0,User-ID,ISBN,Book-Rating,Users-Num,Books-Num,picture_url,Book-Title,U_ID,B_ID
0,86583,3404139178,9,8,18,http://images.amazon.com/images/P/3404139178.0...,Das Lacheln der Fortuna: Historischer Roman,3445,10397
1,132500,3404139178,10,8,43,http://images.amazon.com/images/P/3404139178.0...,Das Lacheln der Fortuna: Historischer Roman,5248,10397
2,66483,3404139178,10,8,83,http://images.amazon.com/images/P/3404139178.0...,Das Lacheln der Fortuna: Historischer Roman,2622,10397
3,276866,3404139178,9,8,11,http://images.amazon.com/images/P/3404139178.0...,Das Lacheln der Fortuna: Historischer Roman,10887,10397
4,106534,3404139178,6,8,6,http://images.amazon.com/images/P/3404139178.0...,Das Lacheln der Fortuna: Historischer Roman,4228,10397
...,...,...,...,...,...,...,...,...,...
110514,27551,0849912970,10,6,6,http://images.amazon.com/images/P/0849912970.0...,Traveling Light: Releasing the Burdens You Wer...,1035,9478
110515,206652,1860465811,9,8,10,http://images.amazon.com/images/P/1860465811.0...,Wind-Up Bird Chronicle,8112,10209
110516,223525,1588720284,8,6,13,http://images.amazon.com/images/P/1588720284.0...,Why Your Life Sucks: And What You Can Do Abou...,8754,10138
110517,94153,0060652926,9,7,14,http://images.amazon.com/images/P/0060652926.0...,Mere Christianity,3740,276


In [15]:
matrix = coo_matrix((interactions["Book-Rating"], (interactions["U_ID"], interactions["B_ID"])), 
                            shape=(len(set(interactions["U_ID"])), len(set(interactions["B_ID"]))))
matrix = normalize(matrix)
# matrix.shape

Заметим, что для нормализованных векторов $u$ и $v$ удвоенное косинусное расстояние и квадрат евклидова расстояния совпадают:
$$||x\ -\ y||^2\ =\ (x\ -\ y)(x\ -\ y)\ =\ x^2\ +\ y^2\ -\ 2xy\ =\ 2\ -\ 2xy\ =\ 2(1\ -\ xy)\ =\ 2\cdot cos\_dist(x,\ y)$$
Поэтому, можно использовать стандартную KMeans кластеризацию, в которой используется евклидово расстояние. Делить будем на 10 кластеров

In [20]:
from sklearn.cluster import KMeans

model = KMeans(n_clusters=10, 
               init="k-means++", 
               n_jobs=8, 
               verbose=2,
              )

In [21]:
model.fit(matrix)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=10, n_init=10, n_jobs=8, precompute_distances='auto',
       random_state=None, tol=0.0001, verbose=2)

Распределение по кластерам получилось такое

In [22]:
clusters, counts = np.unique(model.labels_, return_counts=True)
counts = pd.DataFrame({"Cluster": clusters, "Users-Num": counts})
counts

Unnamed: 0,Cluster,Users-Num
0,0,1132
1,1,257
2,2,8000
3,3,237
4,4,265
5,5,156
6,6,235
7,7,191
8,8,265
9,9,222


Теперь для каждого кластера выведем топ-10 книг

In [23]:
users_clusters = defaultdict(list)
for id, label in enumerate(model.labels_):
    users_clusters[label].append(id)
    
def show_top(users):
    users = pd.DataFrame(data=users, columns=["U_ID"])
    current_interactions = (
        interactions
        .merge(users, on="U_ID", how="inner")
        .groupby("ISBN")
        .agg({"Book-Rating": "mean", "User-ID": "count"})
        .reset_index()
        .rename(columns={"Book-Rating": "Mean-Rating", "User-ID": "Users-Num"})
        .sort_values(["Mean-Rating", "Users-Num"], ascending=False)
    )
    return (current_interactions[:10]
     .merge(books, on="ISBN", how="inner")
     .drop(columns=["ISBN", "Mean-Rating", "Users-Num", "Image-URL-S", "Image-URL-M", "Image-URL-L"])
    )

In [24]:
dfs = defaultdict(pd.DataFrame)

for clust, users in users_clusters.items():
    count = counts[counts.Cluster == clust]["Users-Num"].values[0]
    dfs["Top-10 for cluster number " + str(clust) + " with " + str(count) + " users"] = show_top(users)
    
for name, top in dfs.items():
    print(name)
    ICD.display(top)
    print("\n\n")    

Top-10 for cluster number 2 with 8000 users


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,Harry Potter and the Chamber of Secrets Postca...,J. K. Rowling,2002,Scholastic
1,Postmarked Yesteryear: 30 Rare Holiday Postcards,Pamela E. Apkarian-Russell,2001,Collectors Press
2,Sherlock Holmes: The Complete Novels and Stori...,Arthur Conan Doyle,1986,Bantam
3,Swan Song,Robert McCammon,1987,Pocket
4,Dilbert: A Book of Postcards,Scott Adams,1996,Andrews McMeel Pub
5,Natural California: A Postcard Book,Not Applicable (Na ),1990,Running Pr
6,"The Lord of the rings,",J. R. R Tolkien,1968,Allen and Unwin
7,Complete Chronicles of Narnia,C. S. Lewis,2001,Harpercollins Juvenile Books
8,Three Men in a Boat: To Say Nothing of the Dog!,Jerome Klapka Jerome,1978,Penguin Books
9,"These High, Green Hills (The Mitford Years)",Jan Karon,1997,Viking Books





Top-10 for cluster number 7 with 191 users


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,2001,Scholastic
1,Dune (Remembering Tomorrow),Frank Herbert,1996,ACE Charter
2,"Tuesdays with Morrie: An Old Man, a Young Man,...",MITCH ALBOM,1997,Doubleday
3,"House Atreides (Dune: House Trilogy, Book 1)",Brian Herbert,2000,Spectra Books
4,Desecration: Antichrist Takes the Throne (Left...,Tim Lahaye,2001,Tyndale House Publishers
5,The Unbearable Lightness of Being,Milan Kundera,1988,Harpercollins
6,The Bean Trees,Barbara Kingsolver,1989,Perennial
7,Pride and Prejudice (The Penguin English Libra...,Jane Austen,1985,Penguin Books
8,The Book of Lost Tales 1 (The History of Middl...,J. R. R. Tolkien,1992,Del Rey Books
9,The Book of Lost Tales 2 (The History of Middl...,J.R.R. TOLKIEN,1992,Del Rey





Top-10 for cluster number 3 with 237 users


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,Charlotte's Web (Trophy Newbery),E. B. White,1974,HarperTrophy
1,"A Game of Thrones (A Song of Ice and Fire, Boo...",George R.R. Martin,1997,Spectra Books
2,The Boy Next Door,Meggin Cabot,2002,Avon Trade
3,The Napping House,Audrey Wood,1984,Harcourt Children's Books
4,Blindness (Harvest Book),Jose Saramago,1999,Harvest Books
5,Running with Scissors: A Memoir,Augusten Burroughs,2003,Picador USA
6,Seabiscuit,LAURA HILLENBRAND,2003,Ballantine Books
7,The Truth Is... My Life in Love and Music,Melissa Etheridge,2001,Villard Books
8,Harry Potter and the Chamber of Secrets Postca...,J. K. Rowling,2002,Scholastic
9,Inkheart,Cornelia Funke,2003,Chicken House





Top-10 for cluster number 8 with 265 users


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,Calvin and Hobbes,Bill Watterson,1987,Andrews McMeel Publishing
1,The Giving Tree,Shel Silverstein,1964,HarperCollins Publishers
2,The Tao of Pooh,Benjamin Hoff,1983,Penguin Books
3,The Talisman,STEPHEN KING,2001,Ballantine Books
4,One Fish Two Fish Red Fish Blue Fish (I Can Re...,DR SEUSS,1960,Random House Books for Young Readers
5,"The Two Towers (The Lord of the Rings, Part 2)",J. R. R. Tolkien,1999,Houghton Mifflin Company
6,"The Return of the King (The Lord of The Rings,...",J. R. R. Tolkien,1999,Houghton Mifflin Company
7,Scientific Progress Goes 'Boink': A Calvin an...,Bill Watterson,1991,Andrews McMeel Publishing
8,"Lion, the Witch and the Wardrobe",C.S. Lewis,1970,MacMillan Publishing Company.
9,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc





Top-10 for cluster number 0 with 1132 users


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,Anne of Green Gables (Anne of Green Gables Nov...,L.M. MONTGOMERY,1982,Bantam Classics
1,The Giving Tree,Shel Silverstein,1964,HarperCollins Publishers
2,The Blue Day Book,Bradley Trevor Greive,2000,Andrews McMeel Publishing
3,The Neverending Story,Michael Ende,1984,Penguin Books
4,Cold Comfort Farm,Stella Gibbons,1995,Penguin Books
5,All Things Wise and Wonderful (All Things Wise...,James Herriot,1998,St. Martin's Press
6,"The Return of the King (The Lord of the Rings,...",J.R.R. TOLKIEN,1986,Del Rey
7,Childhood's End,Arthur C. Clarke,1987,Del Rey Books
8,Green Eggs and Ham (I Can Read It All by Mysel...,Dr. Seuss,1960,Random House Books for Young Readers
9,Fox in Socks (I Can Read It All by Myself Begi...,Dr. Seuss,1965,Random House Children's Books





Top-10 for cluster number 4 with 265 users


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,"Tuesdays with Morrie: An Old Man, a Young Man,...",MITCH ALBOM,1997,Doubleday
1,The Color Purple,Alice Walker,1985,Pocket Books
2,Where the Sidewalk Ends : Poems and Drawings,Shel Silverstein,1974,HarperCollins
3,I Know This Much Is True,Wally Lamb,1999,Regan Books
4,Bridge to Terabithia,Katherine Paterson,1987,HarperTrophy
5,The Eyre Affair: A Novel,Jasper Fforde,2003,Penguin Books
6,Snow Falling on Cedars,David Guterson,1994,Harcourt
7,Cats and Their Women,Louise Taylor,1992,"Little, Brown"
8,Autobiography of a Fat Bride : True Tales of a...,LAURIE NOTARO,2003,Villard
9,Birthright,Nora Roberts,2003,Putnam Publishing Group





Top-10 for cluster number 9 with 222 users


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,Watership Down,Richard Adams,1976,Avon
1,The God of Small Things,Arundhati Roy,1998,Perennial
2,My Year of Meats,Ruth L. Ozeki,1999,Penguin Books
3,Ender's Shadow (Ender Wiggins Saga (Hardcover)),Orson Scott Card,1999,Tor Books
4,Dragonflight (Dragonriders of Pern Trilogy (Pa...,Anne McCaffrey,1991,Del Rey Books
5,The Princess Bride: S Morgenstern's Classic Ta...,WILLIAM GOLDMAN,1987,Del Rey
6,The Hitchhiker's Guide to the Galaxy,Douglas Adams,1995,Ballantine Books
7,Monkeewrench,P. J. Tracy,2003,Putnam Publishing Group
8,"Remember When (Roberts, Nora)",Nora Roberts,2003,Putnam Publishing Group
9,We the Living,Ayn Rand,1996,Signet Book





Top-10 for cluster number 6 with 235 users


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,The Bell Jar : A Novel (Perennial Classics),Sylvia Plath,2000,Perennial
1,The Pillars of the Earth,Ken Follett,1996,Signet Book
2,Up Island: A Novel,Anne Rivers Siddons,1998,HarperTorch
3,"The Lion, the Witch, and the Wardrobe (The Chr...",C. S. Lewis,1994,HarperCollins
4,The Little Prince,Antoine de Saint-ExupÃ©ry,1968,Harcourt
5,The Land of Laughs : A Novel,Jonathan Carroll,2001,Orb Books
6,The Diving Bell and the Butterfly : A Memoir o...,JEAN-DOMINIQUE BAUBY,1998,Vintage
7,Perfume: The Story of a Murderer (Vintage Inte...,Patrick Suskind,2001,Vintage Books USA
8,Notes from a Small Island,Bill Bryson,1997,Perennial
9,Sister of My Heart,Chitra Banerjee Divakaruni,2000,Anchor Pub





Top-10 for cluster number 1 with 257 users


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,Outlander,DIANA GABALDON,1992,Dell
1,The Neverending Story,Michael Ende,1996,Puffin Books
2,So Worthy My Love,Kathleen E. Woodiwiss,1990,Avon
3,The Runaway Jury,JOHN GRISHAM,1996,Doubleday
4,"Tuesdays with Morrie: An Old Man, a Young Man,...",MITCH ALBOM,1997,Doubleday
5,Sudden Prey,John Sandford,1997,Berkley Publishing Group
6,The Rescue,Nicholas Sparks,2000,Warner Books
7,Four Blind Mice,James Patterson,2003,Warner Books
8,The Handmaid's Tale,Margaret Atwood,1989,Fawcett Books
9,Ishmael,Daniel Quinn,1993,Bantam Books





Top-10 for cluster number 5 with 156 users


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,The Purpose-Driven Life: What on Earth Am I He...,Rick Warren,2002,Zondervan Publishing Company
1,Seabiscuit,LAURA HILLENBRAND,2003,Ballantine Books
2,Red Dragon,Thomas Harris,2000,Dell Publishing Company
3,To Kill a Mockingbird,Harper Lee,1985,Warner Books
4,The Stand: Complete and Uncut,Stephen King,1991,Signet Book
5,A Midsummer Nights Dream (Bantam Classic),William Shakespeare,1988,Bantam
6,The Secret Life of Bees,Sue Monk Kidd,2002,Viking Books
7,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc
8,Divine Secrets of the Ya-Ya Sisterhood : A Novel,Rebecca Wells,1996,HarperCollins
9,The Poisonwood Bible,Barbara Kingsolver,1999,HarperCollins







# Задание 3

## Пункт 1 (Совстречаемость)
Для начала каждому пользователю сопоставим список книг, с которыми он взаимодействовал

In [25]:
cooc_interactions = interactions[["U_ID", "B_ID"]]
user_products = (cooc_interactions
                 .groupby(["U_ID"])["B_ID"]
                 .apply(list).reset_index()
                 .rename(columns={"B_ID": "Users-Books"}))
books_num = [len(i) for i in user_products["Users-Books"]]
user_products["Books-Num"] = books_num
user_products = user_products[user_products["Books-Num"] > 1]
user_products

Unnamed: 0,U_ID,Users-Books,Books-Num
1,1,"[5415, 5105, 1386, 1393, 1710]",5
2,2,"[7386, 5064, 5014, 5628, 6956, 5067]",6
3,3,"[6529, 10379, 10526, 10354]",4
4,4,"[1684, 415, 1743, 1715, 294, 4544, 9916, 2546,...",16
5,5,"[415, 421, 9916, 9068, 1198, 9923, 6249, 2661]",8
...,...,...,...
10954,10954,"[1690, 426, 1743, 7418, 1633, 5113, 4487, 2647...",28
10956,10956,"[8634, 6111, 8568, 5685]",4
10957,10957,"[7036, 111, 8937, 1151, 240, 3747, 8804, 981, ...",12
10958,10958,"[4607, 9663, 9907, 4214]",4


Составим табличку со средними оценками для каждой книги, она нам пригодится

In [26]:
ratings = dict(interactions[["B_ID", "Book-Rating"]]
           .groupby("B_ID")["Book-Rating"]
           .mean()
           .reset_index()
           .rename(columns={"Book-Rating" : "Mean-Rating"})
           .values
        )

Составми таблицу совстречаемости

In [27]:
cooc = defaultdict(float)
for i in user_products.values:
    l = len(i[1])
    for j in range(l):
        for k in range(l):
            if j == k: continue
            cooc[str(i[1][j]) + "_" + str(i[1][k])] += (ratings[i[1][j]] + ratings[i[1][k]]) / 2
cooc_list = []
for i, j in cooc.items():
    cooc_list.append(i.split("_") + [j])
cooc_rec = pd.DataFrame(cooc_list, columns=["Book-1", "Book-2", "Measure"])
cooc_rec

Unnamed: 0,Book-1,Book-2,Measure
0,5415,5105,8.368005
1,5415,1386,8.040816
2,5415,1393,7.755102
3,5415,1710,7.755102
4,5105,5415,8.368005
...,...,...,...
2562945,4214,9663,7.750000
2562946,6880,2647,6.910823
2562947,2647,6880,6.910823
2562948,2647,4019,6.696970


Функция для рекомендаций, основанных на совстречаемости, выглядит следующим образом

In [28]:
def cooc_recommendation(book_id):
    rec_books = (cooc_rec[cooc_rec["Book-1"] == str(book_id)]
                 .sort_values("Measure", ascending=False)
                )
    rec_books["Book-2"] = rec_books.apply(lambda x: le_books.inverse_transform([int(x[1])])[0], axis=1)
    book = books[books["ISBN"] == le_books.inverse_transform([int(book_id)])[0]].values[0]
    print("Пользователи, купившие книгу", book[1], "автора", str(book[2]) + ", так же покупают:")
    ICD.display((rec_books[:10]
            .merge(books, left_on='Book-2', right_on="ISBN", how="inner")
            .drop(columns=["ISBN", "Image-URL-S", "Image-URL-M", "Image-URL-L", "Book-1", "Book-2"])
           ))

Выведем несколько рекомендаций

In [29]:
cooc_recommendation(le_books.transform(["059035342X"])[0])

Пользователи, купившие книгу Harry Potter and the Sorcerer's Stone (Harry Potter (Paperback)) автора J. K. Rowling, так же покупают:


Unnamed: 0,Measure,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,500.427323,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,2000,Scholastic
1,323.437469,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,2001,Scholastic
2,289.627577,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,2003,Scholastic
3,228.865933,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,2000,Scholastic
4,207.187584,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,2002,Scholastic Paperbacks
5,190.058084,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,1999,Scholastic
6,151.907168,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,1999,Scholastic
7,113.943461,The Da Vinci Code,Dan Brown,2003,Doubleday
8,106.905951,The Hobbit : The Enchanting Prelude to The Lor...,J.R.R. TOLKIEN,1986,Del Rey
9,97.000296,Fahrenheit 451,RAY BRADBURY,1987,Del Rey


In [30]:
cooc_recommendation(le_books.transform(["0385504209"])[0])

Пользователи, купившие книгу The Da Vinci Code автора Dan Brown, так же покупают:


Unnamed: 0,Measure,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,377.755312,Angels &amp; Demons,Dan Brown,2001,Pocket Star
1,328.651414,The Lovely Bones: A Novel,Alice Sebold,2002,"Little, Brown"
2,195.728578,The Secret Life of Bees,Sue Monk Kidd,2003,Penguin Books
3,186.0319,The Red Tent (Bestselling Backlist),Anita Diamant,1998,Picador USA
4,159.056438,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,2003,Scholastic
5,141.606426,Girl with a Pearl Earring,Tracy Chevalier,2001,Plume Books
6,132.930225,Life of Pi,Yann Martel,2003,Harvest Books
7,131.759613,The Five People You Meet in Heaven,Mitch Albom,2003,Hyperion
8,123.316655,To Kill a Mockingbird,Harper Lee,1988,Little Brown &amp; Company
9,121.645906,Snow Falling on Cedars,David Guterson,1995,Vintage Books USA


In [31]:
cooc_recommendation(le_books.transform(["0064471047"])[0])

Пользователи, купившие книгу The Lion, the Witch, and the Wardrobe (The Chronicles of Narnia, Book 2) автора C. S. Lewis, так же покупают:


Unnamed: 0,Measure,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,104.364656,The Horse and His Boy,C. S. Lewis,1994,HarperCollins
1,96.205645,The Magician's Nephew (rack) (Narnia),C. S. Lewis,2002,HarperCollins
2,87.638249,The Voyage of the Dawn Treader (rack) (Narnia),C. S. Lewis,1994,HarperCollins
3,87.043011,Prince Caspian (rack) : The Return to Narnia (...,C. S. Lewis,1994,HarperCollins
4,79.55746,The Last Battle,C. S. Lewis,1994,HarperCollins
5,68.967742,The Silver Chair,C. S. Lewis,1994,HarperCollins
6,44.354839,Harry Potter and the Sorcerer's Stone (Book 1),J. K. Rowling,1998,Scholastic
7,44.085231,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,1999,Scholastic
8,44.010011,The Fellowship of the Ring (The Lord of the Ri...,J. R. R. Tolkien,1999,Houghton Mifflin Company
9,35.726862,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,1999,Scholastic


## Пункт 2 (Content-based)

In [32]:
texts = books.apply(lambda x: str(x[1]) + " " + str(x[2]) + " " + str(x[3]) + " " + str(x[4]), axis=1)

In [33]:
book_description = [simple_preprocess(re.sub("[^A-Za-z]+", " ", i.lower())) for i in texts]

In [34]:
num_features = 100
model = Word2Vec(book_description, size=num_features, min_count=1, workers=10, iter=10)

In [35]:
def avg_vector(words, model):
    feature_vec = np.zeros((num_features, ), dtype='float32')
    n_words = 0
    for word in words:
        if word in model.wv:
            n_words += 1
            feature_vec = np.add(feature_vec, model.wv[word])
    if (n_words > 0):
        feature_vec = feature_vec / n_words
    return feature_vec

books_embs = np.zeros((len(book_description), num_features))
for i in range(books_embs.shape[0]):
    books_embs[i] = avg_vector(book_description[i], model)

In [36]:
def get_rec_I2I(i):
    book = books.iloc[i]
    metrics = cosine_similarity([books_embs[i]], books_embs)
    print("Для книги", book["Book-Title"], "автора", book["Book-Author"], "такие рекомендации:")
    ICD.display((books
            .iloc[np.argsort(metrics)[0][::-1][1:11]]
            .reset_index()
            .drop(columns=["index", "ISBN", "Image-URL-S", "Image-URL-M", "Image-URL-L"])
           ))
    
def get_rec_U2I(i):
        user = user_products.iloc[i]
        user_books = user["Users-Books"]
        user_books = (pd.DataFrame({"ISBN": le_books.inverse_transform(user_books)})
                      .merge(books, on="ISBN", how="inner")
                      .drop(columns=["ISBN", "Image-URL-S", "Image-URL-M", "Image-URL-L"])
                     )
        user_vector = []
        texts = user_books.apply(lambda x: str(x[0]) + " " + str(x[1]) + " " + str(x[2]) + " " + str(x[3]), axis=1)
        for i in texts:
            user_vector = user_vector + simple_preprocess(re.sub("[^A-Za-z]+", " ", i.lower()))
        user_emb = avg_vector(user_vector, model)
        metrics = cosine_similarity([user_emb], books_embs)
        print("Для пользователя, который купил книги")
        ICD.display(user_books)
        print("\n\n\nТакие рекомендации")
        ICD.display((books
            .iloc[np.argsort(metrics)[0][::-1][1:11]]
            .reset_index()
            .drop(columns=["ISBN", "Image-URL-S", "Image-URL-M", "Image-URL-L", "index"])
           ))

In [37]:
get_rec_I2I(2809)

Для книги Harry Potter and the Sorcerer's Stone (Book 1) автора J. K. Rowling такие рекомендации:


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,Harry Potter and the Sorcerer's Stone (Book 1),J. K. Rowling,2001,Scholastic
1,Harry Potter and the Sorcerer's Stone (Book 1),J. K. Rowling,2001,Scholastic
2,Harry Potter and the Sorcerer's Stone (Book 1),J. K. Rowling,1998,Scholastic
3,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,1999,Scholastic
4,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,2001,Scholastic
5,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,2000,Scholastic
6,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,1999,Scholastic
7,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,2002,Scholastic
8,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,2000,Scholastic
9,Harry Potter and the Chamber of Secrets Postca...,J. K. Rowling,2002,Scholastic


In [38]:
get_rec_I2I(9727)

Для книги The Lion, the Witch, and the Wardrobe (The Chronicles of Narnia, Book 2) автора C. S. Lewis такие рекомендации:


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,"The Lion, the Witch and the Wardrobe (The Chro...",C. S. Lewis,1994,HarperCollins
1,"The Lion, the Witch and the Wardrobe (The Chro...",C. S. Lewis,2000,HarperChildrensAudio
2,The Voyage of the 'Dawn Treader' (The Chronicl...,C. S. Lewis,1994,HarperCollins
3,"The Lion, the Witch and the Wardrobe (rpkg) (N...",C. S. Lewis,1994,HarperCollins
4,The Chronicles of Narnia: The Magician's Nephe...,C. S. Lewis,1994,HarperCollins
5,"The Lion, the Witch and the Wardrobe (Book 1 i...",C. S. Lewis,1987,Scholastic
6,"The Lion, the Witch and the Wardrobe (rpkg) (N...",C. S. Lewis,1994,HarperTrophy
7,The Horse and His Boy (The Chronicles of Narni...,C. S. Lewis,1994,HarperTrophy
8,"The Lion, the Witch and the Wardrobe",C.S. Lewis,1998,Collins
9,"The Lion, the Witch and the Wardrobe (Chronicl...",C. S. Lewis,1986,Collier Books


In [39]:
get_rec_U2I(1999)

Для пользователя, который купил книги


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,"The Return of the King (The Lord of the Rings,...",J.R.R. TOLKIEN,1986,Del Rey
1,The Acid House,Irvine Welsh,1995,W. W. Norton &amp; Company





Такие рекомендации


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,The Fellowship of the Ring (The Lord of the Ri...,J.R.R. TOLKIEN,1986,Del Rey
1,The Hobbit and The Lord of the Rings,J.R.R. TOLKIEN,1986,Del Rey
2,"The Return of the King (The Lord of The Rings,...",J. R. R. Tolkien,1999,Houghton Mifflin Company
3,"The Return of the King (The Lord of the Rings,...",J. R. R. Tolkien,2001,Houghton Mifflin Company
4,"The Return of the King (The Lord of the Rings,...",J. R. R. Tolkien,2001,Houghton Mifflin Company
5,"The Return of the King (The Lord of the Rings,...",J. R. R. Tolkien,2003,Houghton Mifflin Company
6,"The Return of the King (The Lord of the Rings,...",J. R. R. Tolkien,2002,Houghton Mifflin Company
7,"The Return of the King (The Lord of the Rings,...",J. R. R. Tolkien,1988,Houghton Mifflin Company
8,"The Two Towers (The Lord of the Rings, Part 2)",J.R.R. TOLKIEN,1986,Del Rey
9,The Hobbit : The Enchanting Prelude to The Lor...,J.R.R. TOLKIEN,1986,Del Rey


In [40]:
get_rec_U2I(5000)

Для пользователя, который купил книги


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,"\O\"" Is for Outlaw""",Sue Grafton,1999,Henry Holt &amp; Company
1,Red Rabbit,Tom Clancy,2003,Berkley Publishing Group
2,Tom Clancy's Op-Center: Divide and Conquer (To...,Tom Clancy,2000,Berkley Publishing Group
3,The Greatest Generation,TOM BROKAW,1998,Random House
4,Hush Money,Robert B. Parker,1999,Putnam Pub Group





Такие рекомендации


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,Acts of War (Tom Clancy's Op Center (Paperback)),Tom Clancy,1997,Berkley Publishing Group
1,Shadow Watch (Tom Clancy's Power Plays (Paperb...,Tom Clancy,1999,Berkley Publishing Group
2,Call to Treason (Tom Clancy's Op Center (Paper...,Tom Clancy,2004,Berkley Publishing Group
3,The Hunt for Red October,Tom Clancy,1985,Berkley Publishing Group
4,The Hunt for Red October,Tom Clancy,1990,Berkley Publishing Group
5,Politika (Tom Clancy's Power Plays (Paperback)),Tom Clancy,1999,Berkley Publishing Group
6,Tom Clancy's Op-Center: Line of Control (Tom C...,Jeff Rovin,2001,Berkley Publishing Group
7,Tom Clancy's Op-Center: Mission of Honor (Tom ...,Jeff Rovin,2002,Berkley Publishing Group
8,Safe House (Tom Clancy's Net Force; Young Adul...,Tom Clancy,2000,Berkley Publishing Group
9,Tom Clancy's Op-Center: Divide and Conquer (To...,Tom Clancy,2000,Berkley Publishing Group


## Пункт 3 (Коллаборативная фильтрация)

In [41]:
matrix = coo_matrix((interactions["Book-Rating"], (interactions["U_ID"], interactions["B_ID"])), 
                            shape=(len(set(interactions["U_ID"])), len(set(interactions["B_ID"]))))
books_num = np.array(books_num).reshape(-1, 1)
avg_rates = np.array(np.sum(matrix, axis=1)/books_num)
shifted_rating_users = interactions.apply(lambda x: x["Book-Rating"] - avg_rates[x["U_ID"]][0], axis=1)
matrix_shifted_users = coo_matrix((shifted_rating_users, (interactions["U_ID"], interactions["B_ID"])), 
                            shape=(len(set(interactions["U_ID"])), len(set(interactions["B_ID"]))))
shifted_rating_books = interactions.apply(lambda x: x["Book-Rating"] - ratings[x["B_ID"]], axis=1)
matrix_shifted_books = coo_matrix((shifted_rating_books, (interactions["U_ID"], interactions["B_ID"])), 
                            shape=(len(set(interactions["U_ID"])), len(set(interactions["B_ID"]))))

In [42]:
def collab_U2I_user_based(idx):
    user_rates = matrix.getrow(idx).toarray()[0]
    watched_books = np.where(user_rates != 0)[0]
    metrics = cosine_similarity([user_rates], matrix).reshape(-1, 1)
    rates = csr_matrix.multiply(matrix_shifted_users.copy(), metrics)
    rates = (1 - user_rates.astype(bool)) * np.array(np.sum(rates, axis=0))[0]
    print("Для пользователя, который покупал такие книги:")
    ICD.display(books
                .merge(pd.DataFrame({"ISBN": le_books.inverse_transform(watched_books)}), on="ISBN")
                .reset_index()
                .drop(columns=["index", "ISBN", "Image-URL-S", "Image-URL-M", "Image-URL-L"])
               )
    print("Такие рекомендации: ")
    ICD.display(books
                .merge(pd.DataFrame({"ISBN": le_books.inverse_transform(np.argsort(rates)[::-1][0:10])}), on="ISBN")
                .reset_index()
                .drop(columns=["index", "ISBN", "Image-URL-S", "Image-URL-M", "Image-URL-L"])
               )
    
def collab_U2I_item_based(idx):
    user_rates = matrix_shifted_books.getrow(idx).toarray()[0]
    watched_books = np.where(user_rates != 0)[0]
    metrics = cosine_similarity(matrix.T, matrix.T)
    rates = np.matmul(user_rates,  metrics)
    rates = np.array(list(ratings.values())) + rates/np.sum(metrics, axis=1)
#     for i in watched_books:
#         rates[i] = 0.0
    np.argsort(rates)[::-1][:10], np.sort(rates)[::-1][:10]
    print("Для пользователя, который покупал такие книги:")
    ICD.display(books
                .merge(pd.DataFrame({"ISBN": le_books.inverse_transform(watched_books)}), on="ISBN")
                .reset_index()
                .drop(columns=["index", "ISBN", "Image-URL-S", "Image-URL-M", "Image-URL-L"])
               )
    print("Такие рекомендации: ")
    ICD.display(books
                .merge(pd.DataFrame({"ISBN": le_books.inverse_transform(np.argsort(rates)[::-1][:10])}), on="ISBN")
                .reset_index()
                .drop(columns=["index", "ISBN", "Image-URL-S", "Image-URL-M", "Image-URL-L"])
               )

In [46]:
collab_U2I_item_based(453)

Для пользователя, который покупал такие книги:


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,The Red Tent (Bestselling Backlist),Anita Diamant,1998,Picador USA
1,Cien AÃ±os de Soledad,Gabriel GarcÃ­a MÃ¡rquez,1994,Ediciones Catedra S.A.
2,The Ghost of Hannah Mendes,Naomi Ragen,2001,St. Martin's Griffin
3,Portrait in Sepia : A Novel,Isabel Allende,2002,Perennial
4,Daughter of Fortune: A Novel,Isabel Allende,2001,HarperTorch


Такие рекомендации: 


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,Winterdance: The Fine Madness of Running the I...,Gary Paulsen,1995,Harvest Books
1,Captain's Verses (New Directions Paperbook),Pablo Neruda,1972,New Directions Publishing Corporation
2,More Than Complete Hitchhiker's Guide: Complet...,Douglas Adams,1987,Longmeadow Press
3,More Random Acts of Kindness,Conari Press,1994,Conari Press
4,Before I Say Goodbye,Ruth Picardie,1999,Penguin Uk
5,The Hobbit,J. R. R. Tolkien,1938,Houghton Mifflin Company
6,The Holy Man,Susan Trott,1996,Riverhead Books
7,Natural California: A Postcard Book,Not Applicable (Na ),1990,Running Pr
8,Marine Sniper: 93 Confirmed Kills,Charles Henderson,1991,Berkley Publishing Group
9,Fix-It and Forget-It Cookbook: Feasting with Y...,Dawn J. Ranck,2001,Good Books


In [44]:
collab_U2I_item_based(6776)

Для пользователя, который покупал такие книги:


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,2000,Scholastic
1,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,2000,Scholastic
2,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,2003,Scholastic
3,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,2001,Scholastic
4,Harry Potter and the Sorcerer's Stone (Book 1),J. K. Rowling,2001,Scholastic
5,The Neverending Story,Michael Ende,1996,Puffin Books


Такие рекомендации: 


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,Postmarked Yesteryear: 30 Rare Holiday Postcards,Pamela E. Apkarian-Russell,2001,Collectors Press
1,The University of Chicago Spanish - English En...,Carlos Castillo,1991,Pocket
2,Charlie and the Chocolate Factory,ROALD DAHL,2001,Knopf Books for Young Readers
3,Uncle John's Supremely Satisfying Bathroom Rea...,Bathroom Readers Institute,2001,Bathroom Reader Press
4,The Lord of the Rings,J. R. R. Tolkien,2002,Houghton Mifflin Company
5,The Hobbit,J. R. R. Tolkien,1988,Houghton Mifflin Co
6,Six by Seuss: A Treasury of Dr. Seuss Classics,Seuss,1991,Random House Children's Books
7,Our Bodies Ourselves For The New Century (A To...,Boston Women's Health Book Collective,1998,Touchstone
8,The Hitchhiker's Guide to the Galaxy,Douglas Adams,1997,Ballantine Books
9,Western Garden Book,Sunset Editors,1995,Sunset Books Inc


In [45]:
collab_U2I_user_based(6776)

Для пользователя, который покупал такие книги:


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,2000,Scholastic
1,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,2000,Scholastic
2,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,2003,Scholastic
3,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,2001,Scholastic
4,Harry Potter and the Sorcerer's Stone (Book 1),J. K. Rowling,2001,Scholastic
5,The Neverending Story,Michael Ende,1996,Puffin Books


Такие рекомендации: 


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,To Kill a Mockingbird,Harper Lee,1988,Little Brown &amp; Company
1,The Lovely Bones: A Novel,Alice Sebold,2002,"Little, Brown"
2,The Red Tent (Bestselling Backlist),Anita Diamant,1998,Picador USA
3,The Da Vinci Code,Dan Brown,2003,Doubleday
4,Harry Potter and the Sorcerer's Stone (Harry P...,J. K. Rowling,1999,Arthur A. Levine Books
5,Harry Potter and the Sorcerer's Stone (Book 1),J. K. Rowling,1998,Scholastic
6,The Hobbit : The Enchanting Prelude to The Lor...,J.R.R. TOLKIEN,1986,Del Rey
7,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,1999,Scholastic
8,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,1999,Scholastic
9,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,2002,Scholastic Paperbacks


## Пункт 4 (Матричная факторизация)

In [103]:
k = 60
U, S, V = svds(matrix.astype(float), k)
user_embs = U
items_embs = V.T


def get_MF_I2I(idx):
    book = (books[books["ISBN"] == le_books.inverse_transform([idx])[0]]
     .iloc[0]
    )
    metrics = cosine_similarity([items_embs[idx]], items_embs)
    print("Для книги", book["Book-Title"], "автора", book["Book-Author"], "такие рекомендации:")
    ICD.display(books
                .merge(pd.DataFrame({"ISBN": le_books.inverse_transform(np.argsort(metrics)[0][::-1][1:11])}), on="ISBN")
                .reset_index()
                .drop(columns=["index", "ISBN", "Image-URL-S", "Image-URL-M", "Image-URL-L"])
               )

In [104]:
get_MF_I2I(7035)

Для книги Harry Potter and the Sorcerer's Stone (Book 1) автора J. K. Rowling такие рекомендации:


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,1999,Scholastic
1,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,2000,Scholastic
2,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,1999,Scholastic
3,An Isolated Incident,Susan R. Sloan,1999,Warner Books
4,Great Expectations,Charles Dickens,1998,Penguin USA (Paper)
5,"Assassin's Quest (The Farseer Trilogy, Book 3)",ROBIN HOBB,1998,Spectra
6,Ausweitung der Kampfzone.,Michel Houellebecq,2000,Rowohlt Tb.
7,Velvet Song,Jude Deveraux,1991,Pocket
8,Nightingale's Lament: A Novel of the Nightside,Simon R. Green,2004,Ace Books
9,Drums of Autumn,DIANA GABALDON,2001,Delta


In [105]:
get_MF_I2I(736)

Для книги The Lion, the Witch, and the Wardrobe (The Chronicles of Narnia, Book 2) автора C. S. Lewis такие рекомендации:


Unnamed: 0,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,The Dark Wind (Jim Chee Novels),Tony Hillerman,1990,HarperTorch
1,The Last Battle,C. S. Lewis,1994,HarperCollins
2,The Silver Chair,C. S. Lewis,1994,HarperCollins
3,The Horse and His Boy,C. S. Lewis,1994,HarperCollins
4,Prince Caspian (rack) : The Return to Narnia (...,C. S. Lewis,1994,HarperCollins
5,The Voyage of the Dawn Treader (rack) (Narnia),C. S. Lewis,1994,HarperCollins
6,The Sword in the Stone,T. H. White,1978,Laure Leaf
7,The Magician's Nephew (rack) (Narnia),C. S. Lewis,2002,HarperCollins
8,The Foot Book (Bright and Early Books for Begi...,Dr. Seuss,1968,Random House Children's Books
9,The Crystal Cave : Book One of the Arthurian S...,Mary Stewart,2003,Eos (Trade)
