In [68]:
import numpy as np
import pandas as pd
import mysql_reviews


In [69]:
class MF():

    def __init__(self, R, K, alpha, beta, iterations):
        """
        Perform matrix factorization to predict empty
        entries in a matrix.

        Arguments
        - R (ndarray)   : user-item rating matrix
        - K (int)       : number of latent dimensions
        - alpha (float) : learning rate
        - beta (float)  : regularization parameter
        """

        self.R = R
        self.num_users, self.num_items = R.shape
        self.K = K
        self.alpha = alpha
        self.beta = beta
        self.iterations = iterations

    def train(self):
        # Initialize user and item latent feature matrice
        self.P = np.random.normal(scale=1./self.K, size=(self.num_users, self.K))
        self.Q = np.random.normal(scale=1./self.K, size=(self.num_items, self.K))

        # Initialize the biases
        self.b_u = np.zeros(self.num_users)
        self.b_i = np.zeros(self.num_items)
        self.b = np.mean(self.R[np.where(self.R != 0)])

        # Create a list of training samples
        self.samples = [
            (i, j, self.R[i, j])
            for i in range(self.num_users)
            for j in range(self.num_items)
            if self.R[i, j] > 0
        ]

        # Perform stochastic gradient descent for number of iterations
        training_process = []
        for i in range(self.iterations):
            np.random.shuffle(self.samples)
            self.sgd()
            mse = self.mse()
            training_process.append((i, mse))
            if (i+1) % 10 == 0:
                print("Iteration: %d ; error = %.4f" % (i+1, mse))

        return training_process

    def mse(self):
        """
        A function to compute the total mean square error
        """
        xs, ys = self.R.nonzero()
        predicted = self.full_matrix()
        error = 0
        for x, y in zip(xs, ys):
            error += pow(self.R[x, y] - predicted[x, y], 2)
        return np.sqrt(error)

    def sgd(self):
        """
        Perform stochastic graident descent
        """
        for i, j, r in self.samples:
            # Computer prediction and error
            prediction = self.get_rating(i, j)
            e = (r - prediction)

            # Update biases
            self.b_u[i] += self.alpha * (e - self.beta * self.b_u[i])
            self.b_i[j] += self.alpha * (e - self.beta * self.b_i[j])

            # Update user and item latent feature matrices
            self.P[i, :] += self.alpha * (e * self.Q[j, :] - self.beta * self.P[i,:])
            self.Q[j, :] += self.alpha * (e * self.P[i, :] - self.beta * self.Q[j,:])

    def get_rating(self, i, j):
        """
        Get the predicted rating of user i and item j
        """
        prediction = self.b + self.b_u[i] + self.b_i[j] + self.P[i, :].dot(self.Q[j, :].T)
        return prediction

    def full_matrix(self):
        """
        Computer the full matrix using the resultant biases, P and Q
        """
        return self.b + self.b_u[:,np.newaxis] + self.b_i[np.newaxis:,] + self.P.dot(self.Q.T)

In [70]:
def get_user_perfume_data():
    cursor = mysql_reviews.get_cursor()

    reviews_dict = mysql_reviews.get_reviews(cursor)

    user_perfume_data = pd.DataFrame(reviews_dict)

    user_perfume_data["title"] = (
        user_perfume_data["en_name"] + "/" + user_perfume_data["brand"]
    )

    user_perfume_data["rating"] = user_perfume_data["stars"].apply(
        pd.to_numeric)
    user_perfume_data["userId"] = user_perfume_data["UserNick"]
    nich = user_perfume_data[["userId", "title", "category"]]

    user_perfume_data.drop(
        [
            "id",
            "kr_brand",
            "kr_name",
            "longevity",
            "mood",
            "comment",
            "FragranceBrand",
            "brand",
            "en_name",
            "stars",
            "UserNick",
            "category"
        ],
        axis=1,
        inplace=True,
    )
    return user_perfume_data, nich

def get_perfume_matrix():
    user_perfume_data, nich = get_user_perfume_data()
    df_user_perfume_rating = user_perfume_data.pivot_table(
        "rating", index="userId", columns="title"
    ).fillna(0)
    user_list = df_user_perfume_rating.index.values
    user_row_dict = dict(zip(list(user_list), range(len(user_list))))
    matrix = df_user_perfume_rating.values
    return matrix, user_row_dict, df_user_perfume_rating

In [71]:
def predict(k=20, a=0.01, b=0.001, iter=1000):
    matrix, user_row_dict, df_user_perfume_rating = get_perfume_matrix()
    mf = MF(matrix, K=k, alpha=a, beta = b, iterations=iter)
    mf.train()
    prediction = mf.full_matrix()
    df_preds = pd.DataFrame(prediction, columns = df_user_perfume_rating.columns)
    return df_preds, user_row_dict, df_preds



In [120]:
df_preds, user_row_dict, df_user_perfume_rating = predict(k=30,iter=1000)

Iteration: 10 ; error = 19.1211
Iteration: 20 ; error = 17.4714
Iteration: 30 ; error = 16.0384
Iteration: 40 ; error = 14.5517
Iteration: 50 ; error = 12.8464
Iteration: 60 ; error = 10.9018
Iteration: 70 ; error = 8.8923
Iteration: 80 ; error = 7.0570
Iteration: 90 ; error = 5.5279
Iteration: 100 ; error = 4.3173
Iteration: 110 ; error = 3.3810
Iteration: 120 ; error = 2.6604
Iteration: 130 ; error = 2.1035
Iteration: 140 ; error = 1.6710
Iteration: 150 ; error = 1.3345
Iteration: 160 ; error = 1.0724
Iteration: 170 ; error = 0.8684
Iteration: 180 ; error = 0.7093
Iteration: 190 ; error = 0.5845
Iteration: 200 ; error = 0.4860
Iteration: 210 ; error = 0.4076
Iteration: 220 ; error = 0.3447
Iteration: 230 ; error = 0.2937
Iteration: 240 ; error = 0.2520
Iteration: 250 ; error = 0.2177
Iteration: 260 ; error = 0.1892
Iteration: 270 ; error = 0.1655
Iteration: 280 ; error = 0.1456
Iteration: 290 ; error = 0.1289
Iteration: 300 ; error = 0.1148
Iteration: 310 ; error = 0.1030
Iteration: 

In [121]:
df_preds.head(10)

title,34 boulevard saint germain/diptyque,4711 acqua colonia blood orange & basil/4711,a la rose/maison francis kurkdjian,agar musk/ramon monegal,alizarin/penhaligon's,amazing grace ballet rose/philosophy,amazing grace/philosophy,amber musk/aerin lauder,american cream/lush,angeli di firenze/santa maria novella,...,white shirts/loe,white suede/tom ford,white tea/elizabeth arden,wild bluebell/jomalone,wood sage & sea salt/jomalone,레이디 고디바/향기의미술관,은은한 비누/paffem,잔느 사마리의 초상/향기의미술관,코튼키스/포맨트,코튼허그/포맨트
0,3.18212,3.52854,4.299488,3.739769,3.335239,3.17096,3.876316,3.085033,3.885432,4.227394,...,4.177066,4.354567,3.149374,4.713196,4.998967,3.451798,3.321116,3.483174,3.930587,3.456371
1,3.096312,3.207844,4.192811,3.592271,3.000974,3.334436,3.046643,2.977521,3.326327,3.974421,...,4.179452,4.116038,2.887679,4.525974,4.637458,3.371652,3.251943,3.30825,3.800904,3.098592
2,2.927086,3.001046,4.070946,3.630664,3.245343,3.998835,2.003636,3.184653,3.327613,3.770696,...,4.214115,4.088099,2.990468,4.588792,4.588657,3.456784,3.318261,3.308396,3.999842,3.000986
3,3.123418,3.330694,4.387077,3.596123,3.221292,3.159011,3.613936,2.988979,3.960904,4.070042,...,4.276243,4.134012,2.950033,4.49974,4.301261,3.362837,3.099662,3.29222,3.760726,3.278353
4,3.123934,3.513636,4.474342,3.667954,3.309955,2.703043,4.370145,3.064392,4.997834,4.379209,...,4.243673,4.127727,3.081587,4.515599,3.99947,3.455323,3.062756,3.329074,3.688909,3.477937
5,3.138814,3.121964,4.306996,3.540023,3.16951,3.155005,3.294847,3.016281,3.038529,4.075277,...,4.04348,4.00352,2.919131,4.501821,4.481232,3.177648,3.156434,3.272606,3.739603,3.181782
6,2.927037,3.144489,4.072098,3.584517,3.188196,3.211372,3.127331,3.08407,3.592654,3.947265,...,3.891769,4.003188,2.955694,4.246927,4.206012,3.341918,3.190586,3.2724,3.775377,3.173341
7,3.070811,3.221529,4.176521,3.621387,3.298804,3.290673,3.198796,3.058552,3.174195,3.971426,...,3.967452,4.177737,3.133722,4.498166,4.63748,3.331567,3.209019,3.33669,3.775622,3.257715
8,3.001705,3.536256,4.421829,3.999952,3.591962,3.812607,3.225384,3.433203,4.083904,4.34739,...,4.505851,4.372722,3.382191,4.704021,4.830914,3.775465,3.600096,3.645613,4.269579,3.534022
9,3.760626,3.919773,5.026134,4.207454,3.874965,3.687116,4.268609,3.664995,4.611776,4.77776,...,4.749026,4.694178,3.596163,5.032791,4.792179,3.983126,3.791462,3.925171,4.339965,3.915398


In [122]:
def recommend_perfumes(user_id, df_preds, user_row_dict, df_user_perfume_rating,  num_recommendations=5):
    # df_preds, user_row_dict, df_user_perfume_rating = predict(iter=1000)
    user_perfume_data, nich = get_user_perfume_data()
    user_row_number = user_row_dict[user_id]
    sorted_user_predictions = df_preds.iloc[user_row_number].sort_values(
        ascending=False
    )
    user_history = user_perfume_data[user_perfume_data.userId == user_id].sort_values(
        ["rating"], ascending=False
    )
    user_perfume_data = user_perfume_data.loc[nich["category"].isin([1])]
    recommendations = user_perfume_data[
        ~user_perfume_data["title"].isin(
            user_history["title"])
    ]
    recommendations = recommendations.merge(
        pd.DataFrame(sorted_user_predictions).reset_index(), on="title"
    )
    recommendations = recommendations.rename(
        columns={user_row_number: "Predictions"}
    ).sort_values("Predictions", ascending=False)
    recommendations.reset_index(inplace=True)
    recommendations.drop(["userId", "rating", "index"], axis=1, inplace=True)
    recommendations.drop_duplicates(inplace=True)
    recommendations = recommendations.iloc[:num_recommendations, :]
    return recommendations, user_history

In [123]:
recommend_perfumes('0',df_preds, user_row_dict, df_user_perfume_rating, 100)


(                              title  Predictions
 0            wild bluebell/jomalone     4.713196
 2            fleur de peau/diptyque     4.658123
 3               white flowers/creed     4.585515
 4             black orchid/tom ford     4.558685
 6    volutes eau de parfum/diptyque     4.487672
 ..                              ...          ...
 119                la tulipe/byredo     2.845697
 121                 do son/diptyque     2.729034
 122                  oyedo/diptyque     2.678916
 124                orpheon/diptyque     2.590186
 125      waterlily cologne/jomalone     2.571122
 
 [70 rows x 2 columns],
                             title  rating userId
 0  lime basil & mandarin/jomalone     5.0      0
 3              gypsy water/byredo     5.0      0
 6   wood sage & sea salt/jomalone     5.0      0
 1               warm cotton/clean     4.0      0
 2       blackberry & bay/jomalone     4.0      0
 5               slow dance/byredo     3.0      0
 4             mojave gh

In [124]:
recommend_perfumes('1',df_preds, user_row_dict, df_user_perfume_rating, 100)


(                               title  Predictions
 0      wood sage & sea salt/jomalone     4.637458
 10   english pear & fressia/jomalone     4.574371
 19    lime basil & mandarin/jomalone     4.565208
 22               white flowers/creed     4.530036
 23            wild bluebell/jomalone     4.525974
 ..                               ...          ...
 131                  do son/diptyque     2.737788
 132                   oyedo/diptyque     2.636810
 134                 orpheon/diptyque     2.580011
 135      creed aventus for her/creed     2.465705
 136       waterlily cologne/jomalone     2.354392
 
 [73 rows x 2 columns],
                                           title  rating userId
 12             eclat d'arpege pour homme/lanvin     5.0      1
 13                       modern princess/lanvin     5.0      1
 8                     blackberry & bay/jomalone     4.0      1
 9                   peony & blushsuede/jomalone     4.0      1
 11  l'ombre dans l'eau eau de toilette/di

In [125]:
recommend_perfumes('2',df_preds, user_row_dict, df_user_perfume_rating, 100)


(                              title  Predictions
 0               white flowers/creed     5.361563
 1    lime basil & mandarin/jomalone     5.356931
 4            fleur de peau/diptyque     5.282991
 5     wood sage & sea salt/jomalone     5.211637
 15           wild bluebell/jomalone     5.200169
 ..                              ...          ...
 140                  oyedo/diptyque     3.428239
 142     creed aventus for her/creed     3.392338
 143                orpheon/diptyque     3.332427
 144                 do son/diptyque     3.278963
 145      waterlily cologne/jomalone     2.962076
 
 [75 rows x 2 columns],
                               title  rating userId
 18            eclat d'arpege/lanvin     5.0      2
 19             jeanne lanvin/lanvin     5.0      2
 20  english pear & fressia/jomalone     5.0      2
 21        illicit flower/jimmy choo     5.0      2
 22            jimmy choo/jimmy choo     3.0      2
 23         flight of fancy/anna sui     3.0      2)

In [126]:
ho, ho_h = recommend_perfumes('36',df_preds, user_row_dict, df_user_perfume_rating, 10)
ho

Unnamed: 0,title,Predictions
0,wood sage & sea salt/jomalone,5.397189
10,english pear & fressia/jomalone,5.214933
19,lime basil & mandarin/jomalone,5.179691
22,black orchid/tom ford,5.174812
24,white flowers/creed,5.128583
25,creed millésime impérial/creed,5.087192
26,gypsy water/byredo,5.080983
29,eau de minthé eau de parfum/diptyque,5.05539
30,velvet orchid/tom ford,5.046695
31,volutes eau de parfum/diptyque,5.036142


In [127]:
hyun, hun_h = recommend_perfumes('37',df_preds, user_row_dict, df_user_perfume_rating, 10)
hyun

Unnamed: 0,title,Predictions
0,fleur de peau/diptyque,5.008716
1,lime basil & mandarin/jomalone,4.853955
4,wild bluebell/jomalone,4.654641
6,gypsy water/byredo,4.594886
9,black orchid/tom ford,4.516858
11,white flowers/creed,4.460124
12,volutes eau de parfum/diptyque,4.448409
13,orphéon/diptyque,4.447166
14,geranium odorata eau de toilette/diptyque,4.436527
15,melograno/santa maria novella,4.432282


In [128]:
yj, yj_h = recommend_perfumes('38',df_preds, user_row_dict, df_user_perfume_rating, 10)
yj


Unnamed: 0,title,Predictions
0,fleur de peau/diptyque,5.204353
1,lime basil & mandarin/jomalone,5.16529
4,english pear & fressia/jomalone,4.905699
13,wild bluebell/jomalone,4.871728
15,gypsy water/byredo,4.820174
18,white flowers/creed,4.81901
19,orphéon/diptyque,4.784488
20,melograno/santa maria novella,4.776569
23,black orchid/tom ford,4.741816
25,creed green neroli/creed,4.666675


In [129]:
bowon, bowon_h = recommend_perfumes('35',df_preds, user_row_dict, df_user_perfume_rating, 10)
bowon

Unnamed: 0,title,Predictions
0,fleur de peau/diptyque,4.889114
1,english pear & fressia/jomalone,4.780513
10,white flowers/creed,4.675277
11,creed original vétiver/creed,4.511982
12,wild bluebell/jomalone,4.496841
14,black orchid/tom ford,4.454596
16,gypsy water/byredo,4.403648
19,volutes eau de parfum/diptyque,4.360212
20,grey vetiver/tom ford,4.339533
22,eau duelle eau de toilette/diptyque,4.330955
