In [1]:
import numpy as np
import pandas as pd
import mysql_reviews


In [2]:
class MF():

    def __init__(self, R, K, alpha, beta, iterations):
        """
        Perform matrix factorization to predict empty
        entries in a matrix.

        Arguments
        - R (ndarray)   : user-item rating matrix
        - K (int)       : number of latent dimensions
        - alpha (float) : learning rate
        - beta (float)  : regularization parameter
        """

        self.R = R
        self.num_users, self.num_items = R.shape
        self.K = K
        self.alpha = alpha
        self.beta = beta
        self.iterations = iterations

    def train(self):
        # Initialize user and item latent feature matrice
        self.P = np.random.normal(scale=1./self.K, size=(self.num_users, self.K))
        self.Q = np.random.normal(scale=1./self.K, size=(self.num_items, self.K))

        # Initialize the biases
        self.b_u = np.zeros(self.num_users)
        self.b_i = np.zeros(self.num_items)
        self.b = np.mean(self.R[np.where(self.R != 0)])

        # Create a list of training samples
        self.samples = [
            (i, j, self.R[i, j])
            for i in range(self.num_users)
            for j in range(self.num_items)
            if self.R[i, j] > 0
        ]

        # Perform stochastic gradient descent for number of iterations
        training_process = []
        for i in range(self.iterations):
            np.random.shuffle(self.samples)
            self.sgd()
            mse = self.mse()
            training_process.append((i, mse))
            if (i+1) % 10 == 0:
                print("Iteration: %d ; error = %.4f" % (i+1, mse))

        return training_process

    def mse(self):
        """
        A function to compute the total mean square error
        """
        xs, ys = self.R.nonzero()
        predicted = self.full_matrix()
        error = 0
        for x, y in zip(xs, ys):
            error += pow(self.R[x, y] - predicted[x, y], 2)
        return np.sqrt(error)

    def sgd(self):
        """
        Perform stochastic graident descent
        """
        for i, j, r in self.samples:
            # Computer prediction and error
            prediction = self.get_rating(i, j)
            e = (r - prediction)

            # Update biases
            self.b_u[i] += self.alpha * (e - self.beta * self.b_u[i])
            self.b_i[j] += self.alpha * (e - self.beta * self.b_i[j])

            # Update user and item latent feature matrices
            self.P[i, :] += self.alpha * (e * self.Q[j, :] - self.beta * self.P[i,:])
            self.Q[j, :] += self.alpha * (e * self.P[i, :] - self.beta * self.Q[j,:])

    def get_rating(self, i, j):
        """
        Get the predicted rating of user i and item j
        """
        prediction = self.b + self.b_u[i] + self.b_i[j] + self.P[i, :].dot(self.Q[j, :].T)
        return prediction

    def full_matrix(self):
        """
        Computer the full matrix using the resultant biases, P and Q
        """
        return self.b + self.b_u[:,np.newaxis] + self.b_i[np.newaxis:,] + self.P.dot(self.Q.T)

In [3]:
def get_user_perfume_data():
    cursor = mysql_reviews.get_cursor()

    reviews_dict = mysql_reviews.get_reviews(cursor)

    user_perfume_data = pd.DataFrame(reviews_dict)

    user_perfume_data["title"] = (
        user_perfume_data["en_name"] + "/" + user_perfume_data["brand"]
    )

    user_perfume_data["rating"] = user_perfume_data["stars"].apply(
        pd.to_numeric)
    user_perfume_data["userId"] = user_perfume_data["UserNick"]
    nich = user_perfume_data[["userId", "title", "category"]]

    user_perfume_data.drop(
        [
            "id",
            "kr_brand",
            "kr_name",
            "longevity",
            "mood",
            "comment",
            "FragranceBrand",
            "brand",
            "en_name",
            "stars",
            "UserNick",
            "category"
        ],
        axis=1,
        inplace=True,
    )
    return user_perfume_data, nich

def get_perfume_matrix():
    user_perfume_data, nich = get_user_perfume_data()
    df_user_perfume_rating = user_perfume_data.pivot_table(
        "rating", index="userId", columns="title"
    ).fillna(0)
    user_list = df_user_perfume_rating.index.values
    user_row_dict = dict(zip(list(user_list), range(len(user_list))))
    matrix = df_user_perfume_rating.values
    return matrix, user_row_dict, df_user_perfume_rating

In [4]:
def predict(k=20, a=0.01, b=0.001, iter=1000):
    matrix, user_row_dict, df_user_perfume_rating = get_perfume_matrix()
    mf = MF(matrix, K=k, alpha=a, beta = b, iterations=iter)
    mf.train()
    prediction = mf.full_matrix()
    df_preds = pd.DataFrame(prediction, columns = df_user_perfume_rating.columns)
    return df_preds, user_row_dict, df_preds



In [5]:
df_preds, user_row_dict, df_user_perfume_rating = predict(k=30,iter=1000)

Iteration: 10 ; error = 19.1945
Iteration: 20 ; error = 17.5388
Iteration: 30 ; error = 16.0976
Iteration: 40 ; error = 14.6010
Iteration: 50 ; error = 12.8819
Iteration: 60 ; error = 10.9218
Iteration: 70 ; error = 8.9016
Iteration: 80 ; error = 7.0618
Iteration: 90 ; error = 5.5358
Iteration: 100 ; error = 4.3326
Iteration: 110 ; error = 3.4016
Iteration: 120 ; error = 2.6832
Iteration: 130 ; error = 2.1283
Iteration: 140 ; error = 1.6989
Iteration: 150 ; error = 1.3665
Iteration: 160 ; error = 1.1087
Iteration: 170 ; error = 0.9081
Iteration: 180 ; error = 0.7511
Iteration: 190 ; error = 0.6273
Iteration: 200 ; error = 0.5286
Iteration: 210 ; error = 0.4493
Iteration: 220 ; error = 0.3847
Iteration: 230 ; error = 0.3316
Iteration: 240 ; error = 0.2875
Iteration: 250 ; error = 0.2506
Iteration: 260 ; error = 0.2195
Iteration: 270 ; error = 0.1932
Iteration: 280 ; error = 0.1708
Iteration: 290 ; error = 0.1517
Iteration: 300 ; error = 0.1354
Iteration: 310 ; error = 0.1214
Iteration: 

In [6]:
df_preds.head(10)

title,34boulevardsaintgermain/diptyque,4711acquacoloniabloodorange&basil/4711,agarmusk/ramonmonegal,alarose/maisonfranciskurkdjian,alizarin/penhaligon's,amazinggrace/philosophy,amazinggraceballetrose/philosophy,amber&lavender/jomalone,ambermusk/aerinlauder,americancream/lush,...,whitesuede/tomford,whitetea/elizabetharden,wildbluebell/jomalone,woodsage&seasalt/jomalone,yuzu/acquadiparma,레이디고디바/향기의미술관,은은한비누/paffem,잔느사마리의초상/향기의미술관,코튼키스/포맨트,코튼허그/포맨트
0,3.27498,3.555338,3.854367,4.381592,3.246367,3.892197,3.130593,3.188143,3.294121,3.954702,...,4.313595,3.291398,4.829637,4.998207,3.526353,3.488072,3.396205,3.531452,3.968536,3.501741
1,3.289074,3.454463,3.825594,4.404728,3.001016,3.576846,3.341267,3.053783,3.100627,3.44722,...,4.089474,3.217824,4.710239,4.664342,3.446294,3.469649,3.468742,3.507086,4.00633,3.385396
2,3.095099,3.001072,3.651921,4.338051,2.972171,2.003458,3.998919,2.751387,2.916848,3.332963,...,3.893987,3.009563,4.460086,4.151593,3.306512,3.357834,3.253124,3.396034,3.999876,3.001046
3,3.193381,3.343667,3.630514,4.288847,3.144774,3.602055,3.171007,2.976619,3.060068,3.824481,...,4.191738,3.034609,4.509775,4.299033,3.386416,3.355565,3.236202,3.433404,3.791945,3.358163
4,3.129611,3.423567,3.695295,4.537913,3.379348,4.4207,2.613588,3.050166,3.230479,4.997803,...,4.656554,3.055625,4.514597,3.99907,3.533057,3.363209,3.069241,3.445011,3.82003,3.642708
5,3.036549,3.18869,3.694313,4.12779,2.974551,2.944841,3.342104,2.82017,2.816244,3.365812,...,3.909563,2.97319,4.529195,4.439605,3.287235,3.368529,3.194035,3.398661,3.897871,3.061284
6,3.10624,3.289103,3.5806,4.314155,2.933108,3.627379,2.948056,2.707435,2.852633,3.469452,...,3.998745,2.787089,4.421515,4.45573,3.269947,3.172146,3.140314,3.259624,3.746524,3.316229
7,2.901839,3.233436,3.618259,4.062176,2.750773,3.380386,3.093662,2.839342,2.754284,3.182021,...,3.831181,2.981575,4.316925,4.401045,3.305395,3.344574,3.215463,3.244187,3.765703,3.145746
8,3.00162,3.630476,3.999952,4.522069,3.40209,3.64481,3.448226,3.2448,3.357142,4.083643,...,4.451491,3.550779,4.811581,4.801396,3.657731,3.677895,3.465354,3.766101,4.200276,3.534441
9,3.798868,3.900032,4.262627,4.852102,3.857702,4.076422,3.60898,3.492521,3.579588,4.359097,...,4.922413,3.559009,5.14174,4.988114,3.955441,3.944605,3.893815,3.956967,4.391429,3.903612


In [21]:
def recommend_perfumes(user_id, df_preds, user_row_dict, df_user_perfume_rating,  num_recommendations=5):
    # df_preds, user_row_dict, df_user_perfume_rating = predict(iter=1000)
    user_perfume_data, nich = get_user_perfume_data()

    user_row_number = user_row_dict[user_id]
    sorted_user_predictions = df_preds.iloc[user_row_number].sort_values(
        ascending=False
    )
    print(sorted_user_predictions)

    user_history = user_perfume_data[user_perfume_data.userId == user_id].sort_values(
        ["rating"], ascending=False
    )
    user_perfume_data = user_perfume_data.loc[nich["category"].isin([1])]
    recommendations = user_perfume_data[
        ~user_perfume_data["title"].isin(
            user_history["title"])
    ]
    recommendations = recommendations.merge(
        pd.DataFrame(sorted_user_predictions).reset_index(), on="title"
    )
    recommendations = recommendations.rename(
        columns={user_row_number: "Predictions"}
    ).sort_values("Predictions", ascending=False)
    recommendations.reset_index(inplace=True)
    recommendations.drop(["userId", "rating", "index"], axis=1, inplace=True)
    recommendations.drop_duplicates(inplace=True)
    recommendations = recommendations.iloc[:num_recommendations, :]
    return recommendations, user_history

In [22]:
recommend_perfumes('0',df_preds, user_row_dict, df_user_perfume_rating, 5)


title
limebasil&mandarin/jomalone                 4.999037
woodsage&seasalt/jomalone                   4.998207
gypsywater/byredo                           4.997419
jasminperledethé/fragonard                  4.903491
l'eau de néroli eau de toilette/diptyque    4.844277
                                              ...   
mademoisellerochas/rochas                   2.406147
jimmychooman/jimmychoo                      2.332229
lolitalempicka/lolitalempicka               2.219293
ferrariblack/ferrari                        2.036873
mojaveghost/byredo                          2.003000
Name: 0, Length: 260, dtype: float64


(                                       title  Predictions
 0   l'eau de néroli eau de toilette/diptyque     4.844277
 1                      wildbluebell/jomalone     4.829637
 3               englishpear&fressia/jomalone     4.667810
 12                      fleurdepeau/diptyque     4.609243
 13                        whiteflowers/creed     4.523116,
                          title  rating userId
 0  limebasil&mandarin/jomalone     5.0      0
 3            gypsywater/byredo     5.0      0
 6    woodsage&seasalt/jomalone     5.0      0
 1             warmcotton/clean     4.0      0
 2      blackberry&bay/jomalone     4.0      0
 5             slowdance/byredo     3.0      0
 4           mojaveghost/byredo     2.0      0)

In [9]:
recommend_perfumes('1',df_preds, user_row_dict, df_user_perfume_rating, 100)


(                                     title  Predictions
 0              limebasil&mandarin/jomalone     4.786388
 3                    wildbluebell/jomalone     4.710239
 5                woodsage&seasalt/jomalone     4.664342
 15                       gypsywater/byredo     4.613399
 18                    fleurdepeau/diptyque     4.572302
 19                      whiteflowers/creed     4.471826
 20              tamdaoeaudeparfum/diptyque     4.435623
 23        eaudessenseaudetoilette/diptyque     4.428974
 27              creedoriginalvétiver/creed     4.425333
 28         vetyverioeaudetoilette/diptyque     4.394026
 30                  creedgreenneroli/creed     4.383511
 31                   creedroyalwater/creed     4.356001
 32            creedmillésimeimpérial/creed     4.340342
 33     l'ombredansl'eaudetoilette/diptyque     4.305574
 34   geraniumodorataeaudetoilette/diptyque     4.274918
 35         eaucapitaleeaudeparfum/diptyque     4.241017
 36                roseofnoman'

In [10]:
recommend_perfumes('2',df_preds, user_row_dict, df_user_perfume_rating, 100)


(                                        title  Predictions
 0                 limebasil&mandarin/jomalone     5.383874
 3                        fleurdepeau/diptyque     5.280240
 4    l'eau de néroli eau de toilette/diptyque     5.199245
 5                          whiteflowers/creed     5.167651
 6                            orphéon/diptyque     5.131113
 7                           gypsywater/byredo     5.102048
 10                      wildbluebell/jomalone     5.089552
 12        l'ombredansl'eaudetoilette/diptyque     5.080535
 13           eaudessenseaudetoilette/diptyque     5.066516
 17            eaudeminthéeaudeparfum/diptyque     5.049941
 18               creedmillésimeimpérial/creed     5.046937
 19            vetyverioeaudetoilette/diptyque     5.042049
 21                 creedoriginalvétiver/creed     5.021687
 22                      creedroyalwater/creed     5.016662
 23                  woodsage&seasalt/jomalone     4.979225
 33            eaucapitaleeaudeparfum/di

In [11]:
ho, ho_h = recommend_perfumes('36',df_preds, user_row_dict, df_user_perfume_rating, 10)
ho

Unnamed: 0,title,Predictions
0,whiteflowers/creed,5.353811
1,englishpear&fressia/jomalone,5.222828
10,wildbluebell/jomalone,5.212257
12,limebasil&mandarin/jomalone,5.174818
15,fleurdepeau/diptyque,5.11361
16,gypsywater/byredo,5.019084
19,eaucapitaleeaudeparfum/diptyque,4.992167
20,l'eau de néroli eau de toilette/diptyque,4.98774
21,woodsage&seasalt/jomalone,4.940776
31,orphéon/diptyque,4.928724


In [12]:
hyun, hun_h = recommend_perfumes('37',df_preds, user_row_dict, df_user_perfume_rating, 10)
hyun

Unnamed: 0,title,Predictions
0,fleurdepeau/diptyque,5.069138
1,limebasil&mandarin/jomalone,4.838951
4,whiteflowers/creed,4.658116
5,wildbluebell/jomalone,4.613012
7,l'eau de néroli eau de toilette/diptyque,4.563233
8,l'ombredansl'eaudetoilette/diptyque,4.481524
9,gypsywater/byredo,4.47374
12,creedoriginalvétiver/creed,4.467891
13,vetyverioeaudetoilette/diptyque,4.433893
15,eaudeminthéeaudeparfum/diptyque,4.414922


In [13]:
yj, yj_h = recommend_perfumes('38',df_preds, user_row_dict, df_user_perfume_rating, 10)
yj


Unnamed: 0,title,Predictions
0,limebasil&mandarin/jomalone,5.116344
3,fleurdepeau/diptyque,5.053914
4,wildbluebell/jomalone,4.983584
6,whiteflowers/creed,4.902052
7,l'eau de néroli eau de toilette/diptyque,4.799445
8,englishpear&fressia/jomalone,4.786034
17,gypsywater/byredo,4.773225
20,orphéon/diptyque,4.704042
21,eaucapitaleeaudeparfum/diptyque,4.649403
22,creedoriginalvétiver/creed,4.631744


In [14]:
bowon, bowon_h = recommend_perfumes('35',df_preds, user_row_dict, df_user_perfume_rating, 10)
bowon

Unnamed: 0,title,Predictions
0,englishpear&fressia/jomalone,4.90192
9,fleurdepeau/diptyque,4.767961
10,wildbluebell/jomalone,4.563825
12,whiteflowers/creed,4.528865
13,orphéon/diptyque,4.474456
14,l'eau de néroli eau de toilette/diptyque,4.403809
15,gypsywater/byredo,4.400195
18,eaucapitaleeaudeparfum/diptyque,4.379103
19,nectarineblossom&honey/jomalone,4.333386
21,blackberry&bay/jomalone,4.325368
