In [536]:
import pandas as pd
import turicreate as tc
import mysql.connector as db
from sklearn.model_selection import train_test_split

In [537]:
mydb = db.connect(
  host = "localhost",
  user = "root",
  passwd = "",
  database = "ecomm_ml"
)

lineItems = mydb.cursor()
lineItems.execute("SELECT `customer_id`, `product_id`, `quantity` FROM line_items")
lineItems = lineItems.fetchall()

customers = mydb.cursor()
customers.execute("SELECT `id` from `customers`")
customers = customers.fetchall()

In [538]:
lineItems = pd.DataFrame(lineItems,
                         columns=['customer_id', 'product_id', 'quantity'])

In [539]:
customers_list = []
for i in customers:
    customers_list.append(i[0])

In [540]:
lineItems = lineItems.groupby(['customer_id', 'product_id'],
                              as_index=False)['quantity'].sum()

In [541]:
def minmax_normalisation(data):
    # Before we use the formula it would be easier to pivot the table to a matrix
    matrix = pd.pivot_table(data,
                            values='quantity',
                            index='customer_id',
                            columns='product_id')
    matrix_normalised = (matrix - matrix.min()) / (matrix.max() - matrix.min())
    # Reset the index
    matrix_normalised = matrix_normalised.reset_index()
    # Create a new index column in the data for the minmax quantity
    matrix_normalised.index.names = ['minmax_quantity']
    # Use pandas melt to change the matrix back to a list
    matrix_normalised = pd.melt(matrix_normalised,
                                id_vars=['customer_id'],
                                value_name='minmax_quantity').dropna()
    return matrix_normalised

In [542]:
def const_quantity(data):
    '''
    The constant data had to be created
    to be able to run RMSE tests
    :param data:
    :return data:
    '''
    data['quantity_one'] = 1
    return data

In [543]:
def transform_bins(s):
    '''
    1 = 0
    2 = 1
    2-3 = 3
    4-8 = 4
    < 9 = 5
    '''
    if s == 1:
        s = 1
    elif s == 2:
        s = 2
    elif s > 2 and s <= 4:
        s = 3
    elif s > 4 and s <= 8:
        s = 4
    elif s > 8:
        s = 5
    return s

# frame.apply(f, axis=0)

def binning_normalisation(data):
    '''
    Group Data into Bins
    :param data:
    :return normalised:
    '''
    data['quantity'] = data['quantity'].transform(transform_bins)
    return data

In [544]:
def ml_split(items):
    # Split the data for ML model 80:20 ratio
    train, test = train_test_split(items, test_size=.2)
    # Use the data as a Turi SFrame
    train_data = tc.SFrame(train)
    test_data = tc.SFrame(test)
    return train_data, test_data

In [545]:
def generate_recommendations(data, customers, alg, target=None, recommend=True):
    if alg == "popularity":
        model = tc.popularity_recommender.create(data,
                                                 user_id="customer_id",
                                                 item_id="product_id",
                                                 target=target,
                                                 verbose=False)

    elif alg == "similarity":
        model = tc.item_similarity_recommender.create(data,
                                                      user_id="customer_id",
                                                      item_id="product_id",
                                                      target=target,
                                                      similarity_type='cosine',
                                                      verbose=False)

    if recommend:
        recommendations = model.recommend(users=customers, k=5, verbose=False)
        return recommendations
    else:
        return model

In [546]:
lineItemsConst = const_quantity(lineItems)
lineItemsMinMax = minmax_normalisation(lineItems)
lineItemsBins = binning_normalisation(lineItems)

In [547]:
items_normal_train, items_normal_test = ml_split(lineItemsConst)
items_minmax_train, items_minmax_test = ml_split(lineItemsMinMax)
items_bins_train, items_bins_test = ml_split(lineItemsBins)

# Popularity search

## Normal quantity

In [548]:
popularity_recommendations_normal = generate_recommendations(items_normal_train,
                                                             customers_list,
                                                             "popularity",
                                                             "quantity")
popularity_recommendations_normal.print_rows(20)

+-------------+---------------+--------------------+------+
| customer_id |   product_id  |       score        | rank |
+-------------+---------------+--------------------+------+
|      1      | 1466318520419 | 2.161073825503356  |  1   |
|      1      | 1466096451683 | 1.7588235294117647 |  2   |
|      1      | 1466096418915 | 1.5869565217391304 |  3   |
|      1      | 1466095894627 | 1.5867549668874172 |  4   |
|      1      | 1466095927395 | 1.5324947589098532 |  5   |
|      2      | 1466318520419 | 2.161073825503356  |  1   |
|      2      | 1466096451683 | 1.7588235294117647 |  2   |
|      2      | 1466096418915 | 1.5869565217391304 |  3   |
|      2      | 1466095927395 | 1.5324947589098532 |  4   |
|      2      | 1466096189539 | 1.4973110670818002 |  5   |
|      3      | 1466318520419 | 2.161073825503356  |  1   |
|      3      | 1466096451683 | 1.7588235294117647 |  2   |
|      3      | 1466096418915 | 1.5869565217391304 |  3   |
|      3      | 1466095894627 | 1.586754

## Min-Max Normalisation

In [549]:
popularity_recommendations_minmax = generate_recommendations(items_minmax_train,
                                                             customers_list,
                                                             "popularity",
                                                             "minmax_quantity")
popularity_recommendations_minmax.print_rows(20)

+-------------+---------------+---------------------+------+
| customer_id |   product_id  |        score        | rank |
+-------------+---------------+---------------------+------+
|      1      | 1466096451683 | 0.11492063492063484 |  1   |
|      1      | 1466096418915 | 0.09313725490196072 |  2   |
|      1      |  15571187078  | 0.07575757575757576 |  3   |
|      1      | 1239867981830 | 0.07017543859649122 |  4   |
|      1      | 1466096386147 | 0.06346555323590802 |  5   |
|      2      | 1466096451683 | 0.11492063492063484 |  1   |
|      2      | 1466096418915 | 0.09313725490196072 |  2   |
|      2      |  15571187078  | 0.07575757575757576 |  3   |
|      2      | 1239867981830 | 0.07017543859649122 |  4   |
|      2      | 1466096386147 | 0.06346555323590802 |  5   |
|      3      | 1466096451683 | 0.11492063492063484 |  1   |
|      3      | 1466096418915 | 0.09313725490196072 |  2   |
|      3      |  15571187078  | 0.07575757575757576 |  3   |
|      3      | 12398679

## Grouping data into Bins

In [550]:
popularity_recommendations_binning = generate_recommendations(items_bins_train,
                                                             customers_list,
                                                             "popularity",
                                                             "quantity")
popularity_recommendations_binning.print_rows(20)

+-------------+---------------+--------------------+------+
| customer_id |   product_id  |       score        | rank |
+-------------+---------------+--------------------+------+
|      1      | 1466318520419 | 2.1390728476821192 |  1   |
|      1      | 1466096451683 |  1.69364161849711  |  2   |
|      1      | 1466096418915 | 1.6074074074074074 |  3   |
|      1      | 1466095894627 | 1.6025974025974026 |  4   |
|      1      | 1466095927395 | 1.5229166666666667 |  5   |
|      2      | 1466318520419 | 2.1390728476821192 |  1   |
|      2      | 1466096451683 |  1.69364161849711  |  2   |
|      2      | 1466096418915 | 1.6074074074074074 |  3   |
|      2      | 1466095894627 | 1.6025974025974026 |  4   |
|      2      | 1466095927395 | 1.5229166666666667 |  5   |
|      3      | 1466318520419 | 2.1390728476821192 |  1   |
|      3      | 1466096451683 |  1.69364161849711  |  2   |
|      3      | 1466096418915 | 1.6074074074074074 |  3   |
|      3      | 1466095894627 | 1.602597

## Constant Quantity

In [551]:
popularity_recommendations_const = generate_recommendations(items_normal_train,
                                                            customers_list,
                                                            "popularity",
                                                            "quantity_one")
popularity_recommendations_const.print_rows(20)

+-------------+---------------+-------+------+
| customer_id |   product_id  | score | rank |
+-------------+---------------+-------+------+
|      1      |  33141385926  |  1.0  |  1   |
|      1      | 1466095861859 |  1.0  |  2   |
|      1      | 1466096353379 |  1.0  |  3   |
|      1      | 1466096386147 |  1.0  |  4   |
|      1      | 1466095894627 |  1.0  |  5   |
|      2      | 1466095861859 |  1.0  |  1   |
|      2      | 1466096353379 |  1.0  |  2   |
|      2      | 1466096189539 |  1.0  |  3   |
|      2      | 1466096386147 |  1.0  |  4   |
|      2      | 1466095829091 |  1.0  |  5   |
|      3      | 1466095861859 |  1.0  |  1   |
|      3      | 1466096353379 |  1.0  |  2   |
|      3      | 1466096189539 |  1.0  |  3   |
|      3      | 1466096386147 |  1.0  |  4   |
|      3      | 1466095894627 |  1.0  |  5   |
|      4      |  33141385926  |  1.0  |  1   |
|      4      | 1466096353379 |  1.0  |  2   |
|      4      | 1466096189539 |  1.0  |  3   |
|      4     

## Implicit Data

In [565]:
popularity_recommendations_const = generate_recommendations(items_normal_train,
                                                            customers_list,
                                                            "popularity")
popularity_recommendations_const.print_rows(20)

+-------------+---------------+--------+------+
| customer_id |   product_id  | score  | rank |
+-------------+---------------+--------+------+
|      1      | 1466095861859 | 901.0  |  1   |
|      1      | 1466095894627 | 755.0  |  2   |
|      1      | 1466096386147 | 484.0  |  3   |
|      1      | 1466095927395 | 477.0  |  4   |
|      1      | 1466096353379 | 443.0  |  5   |
|      2      | 1466096189539 | 3533.0 |  1   |
|      2      | 1466095960163 | 1610.0 |  2   |
|      2      | 1466095829091 | 1448.0 |  3   |
|      2      | 1466095861859 | 901.0  |  4   |
|      2      | 1466096386147 | 484.0  |  5   |
|      3      | 1466096189539 | 3533.0 |  1   |
|      3      | 1466095960163 | 1610.0 |  2   |
|      3      | 1466095861859 | 901.0  |  3   |
|      3      | 1466095894627 | 755.0  |  4   |
|      3      | 1466096386147 | 484.0  |  5   |
|      4      | 1466096189539 | 3533.0 |  1   |
|      4      | 1466095960163 | 1610.0 |  2   |
|      4      | 1466095829091 | 1448.0 |

# Similarity Search (Collaborative Filtering)

## Normal quantity

In [552]:
similarity_recommendations_normal = generate_recommendations(items_normal_train,
                                                             customers_list,
                                                             "similarity",
                                                             "quantity")
similarity_recommendations_normal.print_rows(20)

+-------------+---------------+----------------------+------+
| customer_id |   product_id  |        score         | rank |
+-------------+---------------+----------------------+------+
|      1      | 1466095861859 | 0.12028110027313232  |  1   |
|      1      | 1466095894627 | 0.04642679293950399  |  2   |
|      1      | 1466095927395 | 0.03547161817550659  |  3   |
|      1      |  15571182086  | 0.03263006607691447  |  4   |
|      1      | 1466096320611 |  0.0321167508761088  |  5   |
|      2      | 1466095927395 | 0.057584524154663086 |  1   |
|      2      | 1466095829091 | 0.05057936906814575  |  2   |
|      2      | 1466095861859 | 0.048455655574798584 |  3   |
|      2      | 1466095960163 |  0.0461689829826355  |  4   |
|      2      | 1466096189539 | 0.04253202676773071  |  5   |
|      3      | 1466096189539 |  0.3347851037979126  |  1   |
|      3      | 1466095960163 |  0.3214147090911865  |  2   |
|      3      | 1466095861859 | 0.23949110507965088  |  3   |
|      3

## Min-Max Normalisation

In [574]:
similarity_recommendations_minmax = generate_recommendations(items_minmax_train,
                                                             customers_list,
                                                             "similarity",
                                                             "minmax_quantity")
similarity_recommendations_minmax.print_rows(20)

+-------------+---------------+-----------------------+------+
| customer_id |   product_id  |         score         | rank |
+-------------+---------------+-----------------------+------+
|      1      | 1466096386147 |          0.0          |  1   |
|      1      | 1466096353379 |          0.0          |  2   |
|      1      |   9672405955  |          0.0          |  3   |
|      1      | 1466095861859 |          0.0          |  4   |
|      1      | 1466095927395 |          0.0          |  5   |
|      2      | 1466096353379 |          0.0          |  1   |
|      2      |   9672405955  |          0.0          |  2   |
|      2      | 1466095861859 |          0.0          |  3   |
|      2      | 1466095927395 |          0.0          |  4   |
|      2      | 1466096189539 |          0.0          |  5   |
|      3      | 1466095960163 |  0.01810455322265625  |  1   |
|      3      | 1466096189539 |  0.006688117980957031 |  2   |
|      3      | 1466096451683 |  0.005774974822998047 |

In [554]:
sim_zeros = pd.DataFrame(similarity_recommendations_minmax)
len(sim_zeros[sim_zeros.score == 0])

21785

## Grouping into Bins

In [555]:
similarity_recommendations_binning = generate_recommendations(items_bins_train,
                                                             customers_list,
                                                             "similarity",
                                                             "quantity")
similarity_recommendations_binning.print_rows(20)

+-------------+---------------+----------------------+------+
| customer_id |   product_id  |        score         | rank |
+-------------+---------------+----------------------+------+
|      1      | 1466095861859 |  0.1184834639231364  |  1   |
|      1      | 1466095894627 | 0.042665998140970864 |  2   |
|      1      |  15571187078  | 0.032188733418782554 |  3   |
|      1      |  15571182086  | 0.031768461068471275 |  4   |
|      1      | 1466095927395 |  0.0315625270207723  |  5   |
|      2      | 1466096189539 | 0.19822579622268677  |  1   |
|      2      | 1466095829091 | 0.14990448951721191  |  2   |
|      2      | 1466095861859 | 0.13272696733474731  |  3   |
|      2      | 1466095894627 | 0.051471710205078125 |  4   |
|      2      | 1466096418915 | 0.028578221797943115 |  5   |
|      3      | 1466096418915 | 0.010383253097534179 |  1   |
|      3      | 1466096451683 | 0.010099828243255615 |  2   |
|      3      | 1466096320611 | 0.009865729808807374 |  3   |
|      3

## Constant Quantity

In [556]:
similarity_recommendations_const = generate_recommendations(items_normal_train,
                                                            customers_list,
                                                            "similarity",
                                                            "quantity_one")
similarity_recommendations_const.print_rows(20)

+-------------+---------------+----------------------+------+
| customer_id |   product_id  |        score         | rank |
+-------------+---------------+----------------------+------+
|      1      | 1466095861859 | 0.16687522331873575  |  1   |
|      1      | 1466095894627 | 0.07578690846761067  |  2   |
|      1      | 1466095927395 | 0.06043688456217448  |  3   |
|      1      | 1466096320611 | 0.03969768683115641  |  4   |
|      1      |  15571182086  | 0.03475914398829142  |  5   |
|      2      | 1466095927395 | 0.09498220682144165  |  1   |
|      2      | 1466096189539 | 0.09184300899505615  |  2   |
|      2      | 1466095861859 | 0.08608400821685791  |  3   |
|      2      | 1466095829091 |  0.0774688720703125  |  4   |
|      2      | 1466095960163 | 0.05804884433746338  |  5   |
|      3      | 1466096189539 | 0.27323251962661743  |  1   |
|      3      | 1466095960163 | 0.19844740629196167  |  2   |
|      3      | 1466095861859 | 0.14533203840255737  |  3   |
|      3

## Implicit Data

In [573]:
similarity_recommendations_implicit = generate_recommendations(items_normal_train,
                                                               customers_list,
                                                               "similarity")
similarity_recommendations_implicit.print_rows(20)

+-------------+---------------+----------------------+------+
| customer_id |   product_id  |        score         | rank |
+-------------+---------------+----------------------+------+
|      1      | 1466095861859 | 0.16687522331873575  |  1   |
|      1      | 1466095894627 | 0.07578690846761067  |  2   |
|      1      | 1466095927395 | 0.06043688456217448  |  3   |
|      1      | 1466096320611 | 0.03969768683115641  |  4   |
|      1      |  15571182086  | 0.03475914398829142  |  5   |
|      2      | 1466095927395 | 0.09498220682144165  |  1   |
|      2      | 1466096189539 | 0.09184300899505615  |  2   |
|      2      | 1466095861859 | 0.08608400821685791  |  3   |
|      2      | 1466095829091 |  0.0774688720703125  |  4   |
|      2      | 1466095960163 | 0.05804884433746338  |  5   |
|      3      | 1466096189539 | 0.27323251962661743  |  1   |
|      3      | 1466095960163 | 0.19844740629196167  |  2   |
|      3      | 1466095861859 | 0.14533203840255737  |  3   |
|      3

# Testing the Models

In [568]:
popularity_model_normal = generate_recommendations(items_normal_train,
                                                   customers_list,
                                                   "popularity",
                                                   "quantity",
                                                   False)
popularity_model_minmax = generate_recommendations(items_minmax_train,
                                                   customers_list,
                                                   "popularity",
                                                   "minmax_quantity",
                                                   False)
popularity_model_binning = generate_recommendations(items_bins_train,
                                                    customers_list,
                                                    "popularity",
                                                    "quantity",
                                                    False)
popularity_model_const = generate_recommendations(items_normal_train,
                                                       customers_list,
                                                       "popularity",
                                                       "quantity_one",
                                                       False)
popularity_model_implicit = generate_recommendations(items_normal_train,
                                                       customers_list,
                                                       "popularity",
                                                       recommend=False)

In [570]:
similarity_model_normal = generate_recommendations(items_normal_train,
                                                   customers_list,
                                                   "similarity",
                                                   "quantity",
                                                   False)
similarity_model_minmax = generate_recommendations(items_minmax_train,
                                                   customers_list,
                                                   "similarity",
                                                   "minmax_quantity",
                                                   False)
similarity_model_binning = generate_recommendations(items_bins_train,
                                                    customers_list,
                                                    "similarity",
                                                    "quantity",
                                                    False)
similarity_model_const = generate_recommendations(items_normal_train,
                                                       customers_list,
                                                       "similarity",
                                                       "quantity_one",
                                                       False)
similarity_model_implicit = generate_recommendations(items_normal_train,
                                                    customers_list,
                                                    "similarity",
                                                    recommend=False)

In [571]:
models_normal = [popularity_model_normal, similarity_model_normal]
models_minmax = [popularity_model_minmax, similarity_model_minmax]
models_bins = [popularity_model_binning, similarity_model_binning]
models_const = [popularity_model_const, similarity_model_const]
models_implicit = [popularity_model_implicit, similarity_model_implicit]

names_normal = ['Popularity Model without Normalisation',
                'Cosine Similarity without Normalisation']
names_minmax = ['Popularity Model with MinMax Normalisation',
                'Cosine Similarity with MinMax Normalisation']
names_bins = ['Popularity Model with Grouped Data into Bins',
              'Cosine Similarity with Grouped Data into Bins']
names_const = ['Popularity Model with Constant Quantity',
                    'Cosine Similarity with Constant Quantity']
names_implicit = ['Popularity Model with Implicit Data',
                  'Cosine Similarity with Implicit Data']

In [560]:
test_normal = tc.recommender.util.compare_models(items_normal_test,
                                                 models_normal,
                                                 model_names=names_normal)

PROGRESS: Evaluate model Popularity Model without Normalisation



Precision and recall summary statistics by cutoff
+--------+----------------------+----------------------+
| cutoff |    mean_precision    |     mean_recall      |
+--------+----------------------+----------------------+
|   1    | 0.016028146989835856 | 0.015832681782642688 |
|   2    | 0.018960125097732615 | 0.03456476413864989  |
|   3    | 0.014855355746677115 | 0.039614281991138915 |
|   4    | 0.03176309616888185  | 0.11630179827990623  |
|   5    | 0.03917122752150126  | 0.17800364868386723  |
|   6    | 0.08698201720093833  |  0.4760555121188429  |
|   7    | 0.08109013738411701  |  0.5195790982538422  |
|   8    | 0.07564503518373732  |  0.5558704717227009  |
|   9    | 0.07054122144036132  |   0.58401746155851   |
|   10   | 0.07208756841282236  |  0.6662431587177486  |
+--------+----------------------+----------------------+
[10 rows x 3 columns]


Overall RMSE: 0.5808631514596044

Per User RMSE (best)
+-------------+------+-------+
| customer_id | rmse | count |
+---------


Precision and recall summary statistics by cutoff
+--------+---------------------+--------------------+
| cutoff |    mean_precision   |    mean_recall     |
+--------+---------------------+--------------------+
|   1    |  0.3025801407349488 | 0.2908848058378946 |
|   2    | 0.22869429241594985 | 0.4386565024758931 |
|   3    |  0.1862131873859783 | 0.5330336200156367 |
|   4    | 0.15510164190774073 | 0.5830075579880104 |
|   5    | 0.15879593432369016 | 0.7368712535835289 |
|   6    | 0.14119103466249683 | 0.7780818347667449 |
|   7    | 0.12671730146319699 | 0.8135261923377664 |
|   8    | 0.11332095387021095 | 0.830531665363567  |
|   9    | 0.10324906611067684 | 0.851055512118842  |
|   10   | 0.09503518373729479 | 0.8698853270784465 |
+--------+---------------------+--------------------+
[10 rows x 3 columns]


Overall RMSE: 1.409195095278454

Per User RMSE (best)
+-------------+---------------------+-------+
| customer_id |         rmse        | count |
+-------------+--------

In [561]:
test_minmax = tc.recommender.util.compare_models(items_minmax_test,
                                                 models_minmax,
                                                 model_names=names_minmax)

PROGRESS: Evaluate model Popularity Model with MinMax Normalisation



Precision and recall summary statistics by cutoff
+--------+----------------------+----------------------+
| cutoff |    mean_precision    |     mean_recall      |
+--------+----------------------+----------------------+
|   1    | 0.020270270270270285 | 0.016593799682034956 |
|   2    | 0.01311605723370431  | 0.020634605193428666 |
|   3    | 0.012586115527291997 | 0.03043852676205613  |
|   4    | 0.01192368839427663  | 0.03871886592474827  |
|   5    | 0.019157392686804452 | 0.08141229464758878  |
|   6    | 0.020204027556968707 | 0.10504107048224684  |
|   7    | 0.023733817851464882 |  0.145131160572337   |
|   8    | 0.02344992050874404  |  0.1620561738208797  |
|   9    | 0.02128599187422717  |  0.165434552199258   |
|   10   | 0.020826709062003203 |  0.1795111287758346  |
+--------+----------------------+----------------------+
[10 rows x 3 columns]


Overall RMSE: 0.10339553774523909

Per User RMSE (best)
+-------------+----------------------+-------+
| customer_id |         


Precision and recall summary statistics by cutoff
+--------+---------------------+---------------------+
| cutoff |    mean_precision   |     mean_recall     |
+--------+---------------------+---------------------+
|   1    |  0.2225755166931637 |  0.2113606253312135 |
|   2    | 0.14069952305246416 |  0.2653285638579753 |
|   3    |  0.1083730789613143 |  0.3053391626921047 |
|   4    | 0.08555246422893512 |  0.3198131955484895 |
|   5    | 0.08076311605723373 |  0.375900900900901  |
|   6    |  0.0792262851086379 |  0.443514838367779  |
|   7    | 0.07284805814217601 | 0.47385400105988373 |
|   8    | 0.08490659777424478 |  0.6215421303656603 |
|   9    | 0.09066419360537022 |  0.743561208267092  |
|   10   | 0.08286963434022263 |  0.7532988871224168 |
+--------+---------------------+---------------------+
[10 rows x 3 columns]


Overall RMSE: 0.11047988028921633

Per User RMSE (best)
+-------------+------+-------+
| customer_id | rmse | count |
+-------------+------+-------+
|     

In [562]:
test_bins = tc.recommender.util.compare_models(items_bins_test,
                                               models_bins,
                                               model_names=names_bins)

PROGRESS: Evaluate model Popularity Model with Grouped Data into Bins



Precision and recall summary statistics by cutoff
+--------+----------------------+----------------------+
| cutoff |    mean_precision    |     mean_recall      |
+--------+----------------------+----------------------+
|   1    | 0.015772870662460574 | 0.015772870662460574 |
|   2    | 0.01892744479495269  | 0.034634595162986344 |
|   3    | 0.014852786540483696 | 0.039563617245005324 |
|   4    | 0.029672712933753953 | 0.10738696109358574  |
|   5    | 0.03817034700315454  | 0.17435594111461603  |
|   6    |  0.0895110410094638  |  0.4941508937960045  |
|   7    |  0.079202343397927   |  0.5087407991587816  |
|   8    | 0.07689274447949523  |  0.5652602523659302  |
|   9    | 0.07189800210304953  |  0.5935199789695054  |
|   10   | 0.07275236593059947  |  0.6693611987381719  |
+--------+----------------------+----------------------+
[10 rows x 3 columns]


Overall RMSE: 0.5668790302202669

Per User RMSE (best)
+-------------+------+-------+
| customer_id | rmse | count |
+---------


Precision and recall summary statistics by cutoff
+--------+---------------------+---------------------+
| cutoff |    mean_precision   |     mean_recall     |
+--------+---------------------+---------------------+
|   1    | 0.30835962145110424 |  0.296004206098843  |
|   2    | 0.23935331230283916 | 0.45491587802313355 |
|   3    | 0.19045741324921117 |  0.5405494216614095 |
|   4    |  0.1594045741324921 |  0.5903654048370152 |
|   5    |  0.1638012618296531 |  0.7503943217665611 |
|   6    | 0.14550473186119864 |  0.7932439537329112 |
|   7    | 0.12916854438936423 |  0.8203864353312298 |
|   8    |  0.1146983438485805 |  0.8318874868559405 |
|   9    | 0.10418857343147572 |  0.850157728706624  |
|   10   | 0.09617507886435334 |  0.8711882229232389 |
+--------+---------------------+---------------------+
[10 rows x 3 columns]


Overall RMSE: 1.4206465534616832

Per User RMSE (best)
+-------------+---------------------+-------+
| customer_id |         rmse        | count |
+-------

In [563]:
test_const = tc.recommender.util.compare_models(items_normal_test,
                                                     models_const,
                                                     model_names=names_const)

PROGRESS: Evaluate model Popularity Model with Constant Quantity



Precision and recall summary statistics by cutoff
+--------+---------------------+---------------------+
| cutoff |    mean_precision   |     mean_recall     |
+--------+---------------------+---------------------+
|   1    | 0.08131352619233796 | 0.07597081052905923 |
|   2    | 0.11180609851446452 | 0.20448918425853527 |
|   3    | 0.11467292155329668 | 0.31929241594996044 |
|   4    | 0.16184519155590285 |  0.5956476413865003 |
|   5    | 0.14347146207975014 |  0.660802710450872  |
|   6    | 0.12744331508991436 |  0.6985926505082096 |
|   7    | 0.12453926058304461 |  0.8015376596299186 |
|   8    | 0.11571540265832694 |  0.8423247328642177 |
|   9    | 0.10628963600034753 |  0.8694943966640597 |
|   10   | 0.09761532447224386 |  0.8881287464164698 |
+--------+---------------------+---------------------+
[10 rows x 3 columns]


Overall RMSE: 0.0

Per User RMSE (best)
+-------------+------+-------+
| customer_id | rmse | count |
+-------------+------+-------+
|     2936    | 0.0  |


Precision and recall summary statistics by cutoff
+--------+---------------------+--------------------+
| cutoff |    mean_precision   |    mean_recall     |
+--------+---------------------+--------------------+
|   1    |  0.3526192337763882 | 0.3380244983059692 |
|   2    |  0.2718921032056292 | 0.5098058378941883 |
|   3    | 0.21318738597862943 | 0.6009577795152472 |
|   4    |  0.2093432369038307 | 0.7774302840761014 |
|   5    | 0.17623143080531642 | 0.8182825123794616 |
|   6    |  0.1525931717487623 | 0.8431717487620539 |
|   7    |  0.1328046464872111 | 0.8546390409173841 |
|   8    | 0.12128616106333068 | 0.8833724263747731 |
|   9    | 0.11171922508904517 |  0.91445139431848  |
|   10   | 0.10289288506645819 | 0.9362783424550433 |
+--------+---------------------+--------------------+
[10 rows x 3 columns]


Overall RMSE: 0.8785995981722208

Per User RMSE (best)
+-------------+---------------------+-------+
| customer_id |         rmse        | count |
+-------------+-------

In [572]:
test_const = tc.recommender.util.compare_models(items_normal_test,
                                                     models_implicit,
                                                     model_names=names_implicit)

PROGRESS: Evaluate model Popularity Model with Implicit Data





Precision and recall summary statistics by cutoff
+--------+---------------------+---------------------+
| cutoff |    mean_precision   |     mean_recall     |
+--------+---------------------+---------------------+
|   1    | 0.43197810789679514 | 0.39982408131352615 |
|   2    |  0.3010164190774044 |  0.5532642689601273 |
|   3    |  0.2367735209799323 |  0.6477065415689351 |
|   4    | 0.19224003127443293 |  0.6966379984362773 |
|   5    | 0.16309616888193895 |  0.7383372426374784 |
|   6    | 0.14685952567109764 |  0.8003648683867609 |
|   7    | 0.13375404892214907 |  0.8515767526713571 |
|   8    | 0.11982017200938229 |  0.8716445139431852 |
|   9    | 0.10915645903917993 |  0.8930153765962984 |
|   10   | 0.09956997654417517 |  0.9037659629919216 |
+--------+---------------------+---------------------+
[10 rows x 3 columns]

PROGRESS: Evaluate model Cosine Similarity with Implicit Data





Precision and recall summary statistics by cutoff
+--------+---------------------+---------------------+
| cutoff |    mean_precision   |     mean_recall     |
+--------+---------------------+---------------------+
|   1    | 0.34636434714620856 | 0.32577534532186614 |
|   2    | 0.26876465989053994 | 0.49755668491008664 |
|   3    | 0.21175397445921304 |  0.5836916862131866 |
|   4    | 0.20826817826426888 |  0.760164190774042  |
|   5    | 0.17951524628616092 |  0.8193249934844931 |
|   6    | 0.15604639040917384 |  0.8533359395360964 |
|   7    |  0.1367139506310731 |  0.870471722700026  |
|   8    | 0.12289874902267414 |  0.8931456867344281 |
|   9    | 0.11045956042046716 |  0.9028212144904879 |
|   10   | 0.10054730258014068 |  0.9124315871774816 |
+--------+---------------------+---------------------+
[10 rows x 3 columns]

