## Description Based Recommender using Word Embeddings

In [1]:
import os
import re

import numpy as np
import pandas as pd
from gensim import models
from gensim.models import fasttext as ft
from IPython.display import Image
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import (
    cosine_similarity,
    euclidean_distances,
    manhattan_distances,
)

using https://github.com/Apress/applied-recommender-systems-python/tree/main/Data

In [2]:
df = pd.read_csv("../../data/Rec_sys_content.csv")

In [3]:
df.head()

Unnamed: 0,StockCode,Product Name,Description,Category,Brand,Unit Price
0,22629,Ganma Superheroes Ordinary Life Case For Samsu...,"New unique design, great gift.High quality pla...",Cell Phones|Cellphone Accessories|Cases & Prot...,Ganma,13.99
1,21238,Eye Buy Express Prescription Glasses Mens Wome...,Rounded rectangular cat-eye reading glasses. T...,Health|Home Health Care|Daily Living Aids,Eye Buy Express,19.22
2,22181,MightySkins Skin Decal Wrap Compatible with Ni...,Each Nintendo 2DS kit is printed with super-hi...,Video Games|Video Game Accessories|Accessories...,Mightyskins,14.99
3,84879,Mediven Sheer and Soft 15-20 mmHg Thigh w/ Lac...,The sheerest compression stocking in its class...,Health|Medicine Cabinet|Braces & Supports,Medi,62.38
4,84836,Stupell Industries Chevron Initial Wall D cor,Features: -Made in the USA. -Sawtooth hanger o...,Home Improvement|Paint|Wall Decals|All Wall De...,Stupell Industries,35.99


https://www.kaggle.com/datasets/leadbest/googlenewsvectorsnegative300

In [7]:
word2vecModel = models.KeyedVectors.load_word2vec_format(
    "../../models/GoogleNews-vectors-negative300.bin.gz", binary=True
)

https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.bin.gz

In [230]:
fasttext_model = ft.load_facebook_vectors("../../models/cc.en.300.bin.gz")

https://www.kaggle.com/datasets/thanakomsn/glove6b300dtxt

In [246]:
glove_df = pd.read_csv(
    "../../models/glove.6B.300d.txt",
    sep=" ",
    quoting=3,
    header=None,
    index_col=0,
)
glove_model = {key: value.values for key, value in glove_df.T.items()}

In [11]:
vectorizer = CountVectorizer(stop_words="english")
tfidf = TfidfVectorizer(stop_words="english", analyzer="word", ngram_range=(1, 3))

In [8]:
df = (
    df.assign(description=lambda _df: _df["Product Name"] + " " + _df["Description"])
    .drop_duplicates(subset=["description"], keep="first")
    .assign(
        description=lambda _df: _df["description"].apply(
            lambda x: re.sub(r"[^\w\s]", "", x.lower())
        )
    )
    .reset_index(drop=True)
)

desc_list = list(df.description)

In [279]:
def get_model(model_name):
    return (
        word2vecModel
        if model_name == "word2vec"
        else fasttext_model
        if model_name == "fasttext"
        else glove_model
    )

In [280]:
def vectorize(model_name):
    if model_name == "CountVectorizer":
        X = vectorizer.fit_transform(desc_list)
    elif model_name == "TFIDF":
        X = tfidf.fit_transform(desc_list)
    else:
        X = np.empty((len(desc_list), 300))
        model = get_model(model_name)
        for index, description in enumerate(desc_list):
            X[index] = np.sum(
                list(
                    map(
                        (
                            lambda x: model[x]
                            if x in model.keys()
                            else np.zeros((300,))
                            if type(model) == dict
                            else lambda x: model[x]
                            if model.has_index_for(x)
                            else np.zeros((300,))
                        ),
                        description.split(),
                    )
                ),
                axis=0,
            )

    return X

In [273]:
def similarity(X, sim_metric):
    return (
        cosine_similarity(X)
        if sim_metric == "cosine"
        else manhattan_distances(X)
        if sim_metric == "manhattan"
        else euclidean_distances(X)
    )

In [274]:
def rank(sim_vector, sim_metric):
    coef = -1 if sim_metric == "cosine" else 1
    mask = sim_vector * coef

    return pd.concat(
        [
            df.iloc[np.argsort(mask)][["Product Name"]].reset_index(drop=True),
            pd.Series(coef * np.sort(mask), name="Score"),
        ],
        axis=1,
    )

In [275]:
def recommend(df, product_id, vectorizer, sim_metric):
    index = df.loc[df["Product Name"] == product_id].index[0]
    X = vectorize(vectorizer)
    matrix = similarity(X, sim_metric)
    return rank(matrix[index], sim_metric)

In [276]:
product_id = 'Vickerman 14" Finial Drop Christmas Ornaments, Pack of 2'

In [277]:
recommend(df, product_id, "CountVectorizer", "cosine")

Unnamed: 0,Product Name,Score
0,"Vickerman 14"" Finial Drop Christmas Ornaments,...",1.000000
1,Fancyleo Christmas Glasses Frames 2 Pack Glitt...,0.282491
2,storefront christmas LED Decoration Light Gold...,0.196228
3,"16 inches 40 inches ""MERRY CHRISTMAS"" Letter F...",0.187070
4,12ct Light Gunmetal Gray Shatterproof 4-Finish...,0.185669
...,...,...
797,Sosoft 20 - 30 Knee High Brocade Closed Toe Co...,0.000000
798,Superior 1500 Series Brushed Microfiber Soft a...,0.000000
799,"Wet n Wild Wild Shine Nail Color, Night Prowl,...",0.000000
800,"Gillette Mach3 Turbo Refill Blade Cartridges, ...",0.000000


In [108]:
recommend(df, product_id, "CountVectorizer", "manhattan")

Unnamed: 0,Product Name,Score
0,Polo Blue by Ralph Lauren,43.0
1,Global Portuguese,43.0
2,Stepping Stones,43.0
3,Drunken Monkeys,45.0
4,Leftover Salmon,45.0
5,Auburn Leathercrafters Tuscany Leather Dog Collar,45.0
6,Good (Vinyl),45.0
7,Amerlite Niche Sealing Ring,47.0
8,Learning and Performance in Corrections,47.0
9,Always in My Heart,47.0


In [109]:
recommend(df, product_id, "CountVectorizer", "euclidean")

Unnamed: 0,Product Name,Score
0,Polo Blue by Ralph Lauren,9.0
1,Global Portuguese,9.110434
2,Auburn Leathercrafters Tuscany Leather Dog Collar,9.110434
3,Leftover Salmon,9.219544
4,Always in My Heart,9.219544
5,Stepping Stones,9.219544
6,Good (Vinyl),9.219544
7,Drunken Monkeys,9.219544
8,Chasing Hamburg (Vinyl),9.433981
9,Learning and Performance in Corrections,9.433981


In [226]:
recommend(df, product_id, "TFIDF", "cosine")

Unnamed: 0,Product Name,Score
0,"Vickerman 14"" Finial Drop Christmas Ornaments,...",1.000000
1,Fancyleo Christmas Glasses Frames 2 Pack Glitt...,0.065141
2,storefront christmas LED Decoration Light Gold...,0.052748
3,12ct Light Gunmetal Gray Shatterproof 4-Finish...,0.051223
4,"16 inches 40 inches ""MERRY CHRISTMAS"" Letter F...",0.045654
...,...,...
797,1-1/2 X 50 Yards Red Trevia Taffeta Wired Ribb...,0.000000
798,HP Storageworks P2000 Lff Drive Enclosure I/o ...,0.000000
799,Laptop AC Power Adapter Charger for HP Pavilio...,0.000000
800,Men's 212 Vip By Carolina Herrera,0.000000


In [142]:
recommend(df, product_id, "TFIDF", "manhattan")

Unnamed: 0,Product Name,Score
0,Stepping Stones,11.047069
1,Global Portuguese,11.417914
2,Polo Blue by Ralph Lauren,11.682247
3,DuraTech Roof Support Trim,12.064566
4,Silver Metallic Foil Streamers,12.068484
5,Pro Pinstripe Jersey Sleeveles,12.068616
6,Amerlite Niche Sealing Ring,12.071506
7,Leftover Salmon,12.296584
8,Drunken Monkeys,12.319899
9,"Alfred's Drum Method, Book 1",12.407311


In [143]:
recommend(df, product_id, "TFIDF", "euclidean")

Unnamed: 0,Product Name,Score
0,Fancyleo Christmas Glasses Frames 2 Pack Glitt...,1.367376
1,storefront christmas LED Decoration Light Gold...,1.37641
2,12ct Light Gunmetal Gray Shatterproof 4-Finish...,1.377517
3,"16 inches 40 inches ""MERRY CHRISTMAS"" Letter F...",1.381554
4,Is It To Late To Be Good Grinch Christmas Mens...,1.397235
5,Christopher Radko Glass Plum Frosty Snowman Ch...,1.397252
6,CMFUN Watercolor Brush Creative Flower Made wi...,1.398271
7,SKIN DECAL FOR OtterBox Symmetry Samsung Galax...,1.399823
8,Santa's Workshop Illinois Mascot and Flag Nutc...,1.400943
9,The Holiday Aisle LED C7 Faceted Christmas Lig...,1.400947


In [227]:
recommend(df, product_id, "word2vec", "cosine")

Unnamed: 0,Product Name,Score
0,"Vickerman 14"" Finial Drop Christmas Ornaments,...",1.000000
1,Fancyleo Christmas Glasses Frames 2 Pack Glitt...,0.850988
2,Red Gift Wrap Bag by K-Kraft (Festive Red Stri...,0.848707
3,12 Pcs 3D Butterfly Stickers DIY Mural Art Dec...,0.839869
4,Cat Mod Garden Complex - Unfinished/Black,0.834148
...,...,...
797,Stepping Stones,0.284019
798,Introduction to Policing 3rd Ed. + Law Enforce...,0.281768
799,NORTHBOUND INTERFACE OPTION,0.229122
800,"Mexico En La Obra de Octavio Paz, I. El Peregr...",0.223108


In [228]:
recommend(df, product_id, "word2vec", "manhattan")

Unnamed: 0,Product Name,Score
0,"Vickerman 14"" Finial Drop Christmas Ornaments,...",0.000000
1,storefront christmas LED Decoration Light Gold...,458.132740
2,8 1/2 x 14 Cardstock - Crystal Metallic (500 Q...,488.185658
3,Cavalier Spaniel St. Patricks Day Shamrock Mou...,497.004947
4,Call of the Wild Howling the Full Moon Women's...,509.222198
...,...,...
797,Emerald Scallops PRINT DESIGN - Wallet Phone C...,6176.043731
798,"Frankincense (Boswellia Serrata) Tincture, Org...",6337.300662
799,Purity Home 100% Combed Compact Cotton Sheet S...,6396.809744
800,"Partridge Berry (Mitchella Repens) Tincture, D...",6500.401748


In [229]:
recommend(df, product_id, "word2vec", "euclidean")

Unnamed: 0,Product Name,Score
0,"Vickerman 14"" Finial Drop Christmas Ornaments,...",0.000000
1,storefront christmas LED Decoration Light Gold...,32.756993
2,8 1/2 x 14 Cardstock - Crystal Metallic (500 Q...,34.820335
3,Cavalier Spaniel St. Patricks Day Shamrock Mou...,35.776462
4,Call of the Wild Howling the Full Moon Women's...,36.093413
...,...,...
797,Emerald Scallops PRINT DESIGN - Wallet Phone C...,448.028209
798,"Frankincense (Boswellia Serrata) Tincture, Org...",448.582209
799,Purity Home 100% Combed Compact Cotton Sheet S...,455.209673
800,"Partridge Berry (Mitchella Repens) Tincture, D...",460.851464


In [239]:
recommend(df, product_id, "fasttext", "cosine")

Unnamed: 0,Product Name,Score
0,"Vickerman 14"" Finial Drop Christmas Ornaments,...",1.000000
1,All Weather Cornhole Bags - Set of 8,0.953890
2,American Foxhound Christmas Sticky Note Holder...,0.949669
3,"94"" Bottom Width x 96 1/2"" Top Width x 5 1/2""H...",0.942710
4,"Efavormart Pack of 5 Premium 17"" x 17"" Washabl...",0.942074
...,...,...
797,NORTHBOUND INTERFACE OPTION,0.301081
798,Stepping Stones,0.293017
799,Pro Pinstripe Jersey Sleeveles,0.289420
800,Auburn Leathercrafters Tuscany Leather Dog Collar,0.287685


In [240]:
recommend(df, product_id, "fasttext", "manhattan")

Unnamed: 0,Product Name,Score
0,"Vickerman 14"" Finial Drop Christmas Ornaments,...",0.000000
1,"Spiral Birthday Candles, 36 Count",251.727758
2,Just Artifacts Gold Glitter Letter B,273.160419
3,Giant 36in. Purple Balloons (Set of 2),283.718795
4,(2-Pack) StealthShields Tablet Screen Protecto...,291.827587
...,...,...
797,Dr. Mercola Natural Flea and Tick Defense - A ...,3538.149722
798,Fleur De Lis Living Boxwood Ball Topiary in Pot,3673.912797
799,AARCO Enclosed Wall Mounted Bulletin Board,3805.954353
800,Emerald Scallops PRINT DESIGN - Wallet Phone C...,4028.059723


In [241]:
recommend(df, product_id, "fasttext", "euclidean")

Unnamed: 0,Product Name,Score
0,"Vickerman 14"" Finial Drop Christmas Ornaments,...",0.000000
1,"Spiral Birthday Candles, 36 Count",18.887096
2,Just Artifacts Gold Glitter Letter B,19.716557
3,Giant 36in. Purple Balloons (Set of 2),20.874181
4,(2-Pack) StealthShields Tablet Screen Protecto...,22.924846
...,...,...
797,"Foxnut (Euryale Ferox) Glycerite, Organic Seed...",371.322067
798,Dr. Mercola Natural Flea and Tick Defense - A ...,376.407371
799,Purity Home 100% Combed Compact Cotton Sheet S...,376.653247
800,"Partridge Berry (Mitchella Repens) Tincture, D...",385.909349


In [263]:
recommend(df, product_id, "glove", "cosine")

Unnamed: 0,Product Name,Score
0,"Vickerman 14"" Finial Drop Christmas Ornaments,...",1.000000
1,Red Gift Wrap Bag by K-Kraft (Festive Red Stri...,0.935554
2,Cat Mod Garden Complex - Unfinished/Black,0.934786
3,CafePress - FIN Nuts Squirrel - Cute Infant Ba...,0.934468
4,American Foxhound Christmas Sticky Note Holder...,0.933823
...,...,...
797,Babor Fluids FP Pure Intense Balancing Fluid 2...,0.174355
798,"Motorcraft Engine Fuel Filter, MTCFG 1-A",0.171479
799,Classique 766 Post Mastectomy Fashion Bra-Beig...,0.126747
800,Sports Parts Inc 03-110-03 Chaincase Oil Seal ...,0.109979


In [264]:
recommend(df, product_id, "glove", "manhattan")

Unnamed: 0,Product Name,Score
0,"Vickerman 14"" Finial Drop Christmas Ornaments,...",0.000000
1,Is It To Late To Be Good Grinch Christmas Mens...,1187.123031
2,Platinum 5 mm Comfort Fit Half Round Wedding B...,1191.951746
3,New Way 075 - Men's Sleeveless Fbi Female Body...,1202.318795
4,storefront christmas LED Decoration Light Gold...,1211.601535
...,...,...
797,"Partridge Berry (Mitchella Repens) Tincture, D...",14281.898896
798,"Foxnut (Euryale Ferox) Glycerite, Organic Seed...",14442.235805
799,Purity Home 100% Combed Compact Cotton Sheet S...,14881.429090
800,Dr. Mercola Natural Flea and Tick Defense - A ...,14909.292740


In [281]:
recommend(df, product_id, "glove", "euclidian")

Unnamed: 0,Product Name,Score
0,"Vickerman 14"" Finial Drop Christmas Ornaments,...",0.000000
1,Is It To Late To Be Good Grinch Christmas Mens...,87.295300
2,New Way 075 - Men's Sleeveless Fbi Female Body...,88.325768
3,8 1/2 x 14 Cardstock - Crystal Metallic (500 Q...,89.956751
4,Allwitty 1039 - Women's T-Shirt Ipac Pistol Gu...,90.287862
...,...,...
797,"Partridge Berry (Mitchella Repens) Tincture, D...",1430.569354
798,"Foxnut (Euryale Ferox) Glycerite, Organic Seed...",1469.177690
799,Rayne Mirrors American Made Rayne Stitched Bla...,1485.157065
800,Purity Home 100% Combed Compact Cotton Sheet S...,1527.232180
