In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from surprise import Reader, Dataset
from surprise.model_selection import cross_validate
from surprise import accuracy
from surprise.model_selection import train_test_split
from surprise import SVD, evaluate
from surprise import NMF
from surprise import BaselineOnly
from surprise import KNNBasic

In [2]:
def mask(column,threshold):
    counts = pd.value_counts(column)
    mask = column.isin(counts[counts > threshold].index)
    column[~mask] = "others"
    return column

In [3]:
bookings_data = pd.read_csv('BookingsData.csv')

bookings_data = bookings_data.drop_duplicates()
bookings_data["Customer_Name"] = bookings_data["Customer_Name"].str.lower()
bookings_data["Product_Family"] = bookings_data["Product_Family"].str.lower()
bookings_data["BOARD_GEO"] = bookings_data["BOARD_GEO"].str.lower()
bookings_data["Customer_Name"] = bookings_data["Customer_Name"].replace('\s+', ' ', regex=True)
bookings_data["TX_Date_mod"] = pd.to_datetime(bookings_data['TX_Date'])
bookings_data = bookings_data[(bookings_data["TX_Date_mod"] > '2012')]
bookings_data.loc[(bookings_data['Product_Family'] == "lan desk (other)"),"Product_Family"] = 'others'

# if trying classification
bookings_data['Product_Family'] = mask(bookings_data['Product_Family'], 50)
bookings_data['PRODUCTCODE'] = mask(bookings_data['PRODUCTCODE'], 20)
bookings_data.count()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


Customer_Name      141799
Account_ID          14265
Bookings Amount    141799
TX_Date            141799
Seats              141799
GEO                 98553
BOARD_GEO          141799
VERTICAL           125251
Product_Family     141799
Deal Type          141799
Product Type       141799
Product Segment    135956
Contract Start     113938
Contract End        89477
PRODUCTCODE        141799
FAMILY             139802
NAME               141799
DESCRIPTION         87502
TX_Date_mod        141799
dtype: int64

In [4]:
bookings_data['Seats_mod'] = np.where(bookings_data['Seats'] > 0, 1, (np.where(bookings_data['Seats'] == 0, 0, -1)))

In [5]:
company_standards = pd.read_csv('standardization2.csv')
company_standards_2 = pd.read_csv('standardization2_2.csv')
print(company_standards.shape)

bookings_data = bookings_data.merge(company_standards, left_on='Customer_Name', right_on='Similar_Name', how='outer')
bookings_data = bookings_data.rename(index=str, columns={"Customer_Name_x": "Customer_Name_old1", 
                                                         "Customer_Name_y": "Customer_Name"})

bookings_data = bookings_data.merge(company_standards_2, left_on='Customer_Name', right_on='Similar_Name', how='outer')
bookings_data = bookings_data.rename(index=str, columns={"Customer_Name_x": "Customer_Name_old2", 
                                                         "Customer_Name_y": "Customer_Name"})

bookings_data["Customer_Name"] = np.where(bookings_data.Customer_Name.isna(), 
                    np.where(bookings_data.Customer_Name_old2.isna(), bookings_data.Customer_Name_old1 , bookings_data.Customer_Name_old2),
                                       bookings_data.Customer_Name)   

print(len(bookings_data.Customer_Name.unique()))
print(len(bookings_data.Customer_Name_old2.unique()))
print(len(bookings_data.Customer_Name_old1.unique()))
print(bookings_data.shape)

(2572, 3)
21762
1308
23664
(149892, 26)


In [6]:
table = bookings_data.pivot_table(values=['Seats_mod'], index=['Customer_Name'], columns=['Product_Family'],
                     aggfunc='sum').fillna(0)
table

Unnamed: 0_level_0,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod
Product_Family,anti virus,application manager,application virtualization,asset manager,datanow,desktopnow,discovery,dsm,emss,environment manager,...,security suite,server manager,service desk,service management,shavlik oem royalties,shavlik patch,uem other,user management,xtraction,xtraction oem royalty
Customer_Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
interceramic,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
movitex,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
oao bank petrocommerz,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
##(##)############,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#eská lékárna a.s.,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
's heeren loo,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
't sit,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
(bet365) hillside (new media) ltd,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
(dhhs) us department of health &,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.0,0.0,2.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0
(dhhs) us department of health & human services,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0


In [40]:
# from scipy.sparse.linalg import *
# U, sigma, Vt = svds(table)

# sigma = np.diag(sigma)

# all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt)
# preds_df = pd.DataFrame(all_user_predicted_ratings, columns = table.columns, index= table.index)

# preds_df

In [41]:
# user_data = table.loc["amundi"].unstack(-1)

# prod = np.dot(sigma, Vt)

# print(U.shape)
# print(sigma.shape)
# print(Vt.shape)

# print(table.shape)
# print(preds_df.shape)
# prod.shape

In [7]:
data = bookings_data[["Customer_Name","Product_Family","Seats_mod"]]
#data.rename(columns={'Seats_mod': 'count'},inplace=True)
data = pd.DataFrame({'count' : data.groupby( ["Customer_Name", "Product_Family" ] ).size()}).reset_index()
# print(data)
data["count"].describe()

count    33942.000000
mean         4.416122
std         18.417743
min          1.000000
25%          1.000000
50%          2.000000
75%          4.000000
max       1468.000000
Name: count, dtype: float64

In [8]:
sum(data.groupby(["Customer_Name"])["Product_Family"].count() < 2)

14628

In [9]:
import turicreate
data1 = turicreate.SFrame(data)
train, test = turicreate.recommender.util.random_split_by_user(data1, 
                    user_id='Customer_Name', item_id='Product_Family', item_test_proportion = 1)
popularity_model = turicreate.popularity_recommender.create(train, 
                                    user_id='Customer_Name', item_id='Product_Family', target='count')
item_sim_model_cosine = turicreate.item_similarity_recommender.create(train, 
                        user_id='Customer_Name', item_id='Product_Family', target='count', similarity_type='cosine')
item_sim_model_jaccard = turicreate.item_similarity_recommender.create(train, 
                        user_id='Customer_Name', item_id='Product_Family', target='count', similarity_type='jaccard')
item_sim_model_pearson = turicreate.item_similarity_recommender.create(train, 
                        user_id='Customer_Name', item_id='Product_Family', target='count', similarity_type="pearson")
factorization_model = turicreate.factorization_recommender.create(train, 
                                    user_id='Customer_Name', item_id='Product_Family', target='count')
factorization_model_als = turicreate.factorization_recommender.create(train, 
                                    user_id='Customer_Name', item_id='Product_Family', target='count',solver = 'als')
ranking_factorization_model = turicreate.ranking_factorization_recommender.create(train, 
                                    user_id='Customer_Name', item_id='Product_Family', target='count')

models = [popularity_model, item_sim_model_cosine, item_sim_model_jaccard
                                                 ,item_sim_model_pearson, factorization_model, factorization_model_als,
                                                ranking_factorization_model ]

# popularity_model.evaluate(train_data)
# popularity_recomm = popularity_model.recommend(users=["amundi","acm","axians"],k=15)
# popularity_recomm.print_rows(num_rows=45)

In [10]:
turicreate.recommender.util.compare_models(test, models, metric='precision_recall')

PROGRESS: Evaluate model M0



Precision and recall summary statistics by cutoff
+--------+-----------------------+-----------------------+
| cutoff |      mean_recall      |     mean_precision    |
+--------+-----------------------+-----------------------+
|   1    |          0.0          |          0.0          |
|   2    | 0.0020000000000000018 | 0.0015000000000000013 |
|   3    |         0.0025        | 0.0013333333333333344 |
|   4    |  0.06295238095238083  |         0.0315        |
|   5    |  0.08101190476190469  |  0.02979999999999999  |
|   6    |  0.08101190476190469  |  0.024833333333333357 |
|   7    |  0.09766190476190482  |  0.027285714285714302 |
|   8    |  0.11016190476190471  |  0.025499999999999988 |
|   9    |  0.14138809523809526  |  0.027222222222222214 |
|   10   |  0.14238809523809504  |  0.024600000000000018 |
+--------+-----------------------+-----------------------+
[10 rows x 3 columns]

PROGRESS: Evaluate model M1



Precision and recall summary statistics by cutoff
+--------+---------------------+---------------------+
| cutoff |     mean_recall     |    mean_precision   |
+--------+---------------------+---------------------+
|   1    | 0.07981904761904761 | 0.17400000000000004 |
|   2    | 0.09760476190476187 | 0.11400000000000005 |
|   3    | 0.10475714285714278 | 0.08400000000000003 |
|   4    | 0.11172380952380949 | 0.06774999999999987 |
|   5    | 0.17217619047619037 | 0.07860000000000003 |
|   6    |  0.2299428571428574 | 0.08433333333333334 |
|   7    | 0.24659285714285722 | 0.07828571428571422 |
|   8    | 0.27809285714285714 | 0.07499999999999987 |
|   9    | 0.29642619047619057 | 0.07088888888888888 |
|   10   |  0.3285928571428575 | 0.06860000000000004 |
+--------+---------------------+---------------------+
[10 rows x 3 columns]

PROGRESS: Evaluate model M2



Precision and recall summary statistics by cutoff
+--------+---------------------+---------------------+
| cutoff |     mean_recall     |    mean_precision   |
+--------+---------------------+---------------------+
|   1    | 0.07981904761904766 | 0.17399999999999996 |
|   2    | 0.09760476190476182 | 0.11399999999999992 |
|   3    |  0.1580571428571428 | 0.11666666666666667 |
|   4    | 0.21582380952380956 | 0.11574999999999996 |
|   5    | 0.22297619047619024 |  0.0974000000000001 |
|   6    | 0.23962619047619033 |  0.0881666666666667 |
|   7    |  0.2711261904761903 | 0.08299999999999996 |
|   8    | 0.27809285714285703 | 0.07499999999999989 |
|   9    |  0.2964261904761905 | 0.07088888888888896 |
|   10   |  0.3171261904761906 | 0.06720000000000001 |
+--------+---------------------+---------------------+
[10 rows x 3 columns]

PROGRESS: Evaluate model M3



Precision and recall summary statistics by cutoff
+--------+-----------------------+-----------------------+
| cutoff |      mean_recall      |     mean_precision    |
+--------+-----------------------+-----------------------+
|   1    |          0.0          |          0.0          |
|   2    | 0.0020000000000000018 | 0.0015000000000000011 |
|   3    | 0.0024999999999999988 | 0.0013333333333333346 |
|   4    |  0.06295238095238095  |  0.03150000000000002  |
|   5    |  0.08101190476190481  |  0.029800000000000004 |
|   6    |  0.08101190476190481  |  0.02483333333333333  |
|   7    |   0.0976619047619047  |  0.02728571428571426  |
|   8    |  0.11016190476190482  |  0.025499999999999984 |
|   9    |  0.14138809523809523  |  0.02722222222222221  |
|   10   |  0.14238809523809506  |  0.024599999999999986 |
+--------+-----------------------+-----------------------+
[10 rows x 3 columns]

PROGRESS: Evaluate model M4



Precision and recall summary statistics by cutoff
+--------+----------------------+----------------------+
| cutoff |     mean_recall      |    mean_precision    |
+--------+----------------------+----------------------+
|   1    | 0.060452380952380924 | 0.12200000000000004  |
|   2    | 0.060952380952381015 |  0.0615000000000001  |
|   3    | 0.06295238095238101  | 0.04199999999999999  |
|   4    | 0.06295238095238101  | 0.03149999999999996  |
|   5    | 0.08101190476190477  | 0.02980000000000001  |
|   6    |  0.0935119047619047  | 0.027000000000000003 |
|   7    | 0.11016190476190477  | 0.02914285714285714  |
|   8    |  0.2798642857142857  | 0.04937499999999996  |
|   9    | 0.31109047619047536  |  0.0484444444444445  |
|   10   | 0.31109047619047536  | 0.043600000000000035 |
+--------+----------------------+----------------------+
[10 rows x 3 columns]

PROGRESS: Evaluate model M5



Precision and recall summary statistics by cutoff
+--------+---------------------+---------------------+
| cutoff |     mean_recall     |    mean_precision   |
+--------+---------------------+---------------------+
|   1    | 0.05776666666666666 | 0.11300000000000007 |
|   2    | 0.07441666666666671 | 0.07750000000000008 |
|   3    | 0.15423571428571445 | 0.10966666666666669 |
|   4    |  0.2146880952380952 | 0.11274999999999996 |
|   5    | 0.23247380952380955 | 0.10100000000000002 |
|   6    | 0.23944047619047615 | 0.08733333333333329 |
|   7    |  0.2716071428571434 | 0.08171428571428567 |
|   8    |  0.3731904761904761 | 0.08650000000000008 |
|   9    |  0.5428928571428574 | 0.09811111111111105 |
|   10   |  0.5743928571428573 | 0.09349999999999999 |
+--------+---------------------+---------------------+
[10 rows x 3 columns]

PROGRESS: Evaluate model M6



Precision and recall summary statistics by cutoff
+--------+---------------------+---------------------+
| cutoff |     mean_recall     |    mean_precision   |
+--------+---------------------+---------------------+
|   1    | 0.07981904761904764 |  0.1739999999999999 |
|   2    |  0.2495214285714289 | 0.18250000000000002 |
|   3    |  0.3099738095238091 | 0.16233333333333344 |
|   4    |  0.3677404761904763 | 0.15000000000000008 |
|   5    | 0.46932380952380987 |  0.1440000000000002 |
|   6    | 0.46932380952380987 | 0.12000000000000009 |
|   7    |  0.5416071428571425 | 0.11557142857142859 |
|   8    |  0.5593928571428571 | 0.10787499999999993 |
|   9    |  0.6005095238095236 | 0.10188888888888888 |
|   10   |  0.6005095238095236 | 0.09170000000000003 |
+--------+---------------------+---------------------+
[10 rows x 3 columns]



[{'precision_recall_by_user': Columns:
  	Customer_Name	str
  	cutoff	int
  	precision	float
  	recall	float
  	count	int
  
  Rows: 18000
  
  Data:
  +----------------------------+--------+---------------------+
  |       Customer_Name        | cutoff |      precision      |
  +----------------------------+--------+---------------------+
  | 121 financial credit union |   1    |         0.0         |
  | 121 financial credit union |   2    |         0.0         |
  | 121 financial credit union |   3    |         0.0         |
  | 121 financial credit union |   4    |         0.25        |
  | 121 financial credit union |   5    |         0.2         |
  | 121 financial credit union |   6    | 0.16666666666666666 |
  | 121 financial credit union |   7    | 0.14285714285714285 |
  | 121 financial credit union |   8    |        0.125        |
  | 121 financial credit union |   9    |  0.1111111111111111 |
  | 121 financial credit union |   10   |         0.1         |
  +---------------

In [11]:
turicreate.recommender.util.compare_models(test, models, metric='rmse')

PROGRESS: Evaluate model M0

Overall RMSE: 27.778382946097285

Per User RMSE (best)
+--------------------+-------+---------------------+
|   Customer_Name    | count |         rmse        |
+--------------------+-------+---------------------+
| suncoast solutions |   1   | 0.06743002544529242 |
+--------------------+-------+---------------------+
[1 rows x 3 columns]


Per User RMSE (worst)
+---------------+-------+-------------------+
| Customer_Name | count |        rmse       |
+---------------+-------+-------------------+
|     mapfre    |   3   | 616.1265997902863 |
+---------------+-------+-------------------+
[1 rows x 3 columns]


Per Item RMSE (best)
+----------------+-------+--------+
| Product_Family | count |  rmse  |
+----------------+-------+--------+
|       go       |   2   | 0.3125 |
+----------------+-------+--------+
[1 rows x 3 columns]


Per Item RMSE (worst)
+----------------+-------+--------------------+
| Product_Family | count |        rmse        |
+----------

[{'rmse_by_item': Columns:
  	Product_Family	str
  	count	int
  	rmse	float
  
  Rows: 42
  
  Data:
  +----------------+-------+---------------------+
  | Product_Family | count |         rmse        |
  +----------------+-------+---------------------+
  |    protect     |  191  |  4.0187967897649814 |
  | patch manager  |  122  |   9.92596707284377  |
  |   patchlink    |   6   |  0.5416666666666667 |
  |      sccm      |   2   |  0.8955223880597014 |
  |      dsm       |   41  |  5.8361207616589965 |
  | server manager |   5   |  1.6182356962131053 |
  | managed planet |   24  |  2.241071117936272  |
  |    datanow     |   10  |  2.279957740191351  |
  | shavlik patch  |   89  |  1.9605214827981594 |
  |   scupdates    |   7   | 0.35714285714285715 |
  +----------------+-------+---------------------+
  [42 rows x 3 columns]
  Note: Only the head of the SFrame is printed.
  You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.,
  'rmse_by_user': Columns:
 

In [None]:
# from turicreate.toolkits.recommender.util import precision_recall_by_user
# recs = popularity_model.recommend()
# precision_recall_by_user(test, recs)

In [12]:
ranking_factorization_model.predict(test)

dtype: float
Rows: 1536
[-43.048351499730174, -24.694973203831736, -45.24988195627314, -46.689132902318065, -36.68563863962275, -45.0952455732165, -46.89804098337275, -48.558014127904, -31.480182859593455, -31.480182859593455, -46.11708090036494, -42.3574526044665, -39.937301847630565, -43.87170812814814, -46.025505277806346, -43.048351499730174, -37.66322347848994, -45.089748594456736, -39.937301847630565, -31.480182859593455, -46.76623937814814, -43.87170812814814, -43.296291563206736, -36.68563863962275, -31.480182859593455, -37.66322347848994, -31.480182859593455, -42.3574526044665, -31.480182859593455, -43.048351499730174, -24.694973203831736, -37.66322347848994, -45.0952455732165, -31.480182859593455, -31.480182859593455, -48.558014127904, -24.694973203831736, -46.57019445627314, -45.089748594456736, -39.937301847630565, -31.480182859593455, -31.480182859593455, -42.3574526044665, -37.66322347848994, -39.937301847630565, -43.048351499730174, -45.089748594456736, -43.8717081281481

In [13]:
ranking_factorization_model.recommend()

In [133]:
ranking_factorization_model.summary

<bound method Model.summary of Class                            : RankingFactorizationRecommender

Schema
------
User ID                          : Customer_Name
Item ID                          : Product_Family
Target                           : count
Additional observation features  : 0
User side features               : []
Item side features               : []

Statistics
----------
Number of observations           : 32406
Number of users                  : 20762
Number of items                  : 47

Training summary
----------------
Training time                    : 6.1474

Model Parameters
----------------
Model class                      : RankingFactorizationRecommender
num_factors                      : 32
binary_target                    : 0
side_data_factorization          : 1
solver                           : auto
nmf                              : 0
max_iterations                   : 25

Regularization Settings
-----------------------
regularization                   : 0

In [134]:
ranking_factorization_model.evaluate(test)


Precision and recall summary statistics by cutoff
+--------+---------------------+---------------------+
| cutoff |    mean_precision   |     mean_recall     |
+--------+---------------------+---------------------+
|   1    | 0.17400000000000032 | 0.07981904761904754 |
|   2    | 0.18250000000000013 | 0.24952142857142856 |
|   3    | 0.16233333333333347 | 0.30997380952380954 |
|   4    | 0.14999999999999997 |  0.3677404761904765 |
|   5    | 0.14399999999999996 | 0.46932380952380937 |
|   6    | 0.11999999999999998 | 0.46932380952380937 |
|   7    | 0.11557142857142866 |  0.5416071428571427 |
|   8    | 0.10787499999999994 |  0.5593928571428579 |
|   9    | 0.10188888888888878 |  0.6005095238095246 |
|   10   | 0.09170000000000006 |  0.6005095238095246 |
+--------+---------------------+---------------------+
[10 rows x 3 columns]


Overall RMSE: 52.42981670284759

Per User RMSE (best)
+-------------------------------+-------+-------------------+
|         Customer_Name         | count

{'precision_recall_by_user': Columns:
 	Customer_Name	str
 	cutoff	int
 	precision	float
 	recall	float
 	count	int
 
 Rows: 18000
 
 Data:
 +----------------------------+--------+--------------------+---------------------+
 |       Customer_Name        | cutoff |     precision      |        recall       |
 +----------------------------+--------+--------------------+---------------------+
 | 121 financial credit union |   1    |        1.0         | 0.16666666666666666 |
 | 121 financial credit union |   2    |        0.5         | 0.16666666666666666 |
 | 121 financial credit union |   3    | 0.6666666666666666 |  0.3333333333333333 |
 | 121 financial credit union |   4    |        0.5         |  0.3333333333333333 |
 | 121 financial credit union |   5    |        0.4         |  0.3333333333333333 |
 | 121 financial credit union |   6    | 0.3333333333333333 |  0.3333333333333333 |
 | 121 financial credit union |   7    | 0.2857142857142857 |  0.3333333333333333 |
 | 121 financial cre

In [146]:
ranking_factorization_model.recommend(["labcorp"],k=5)

Customer_Name,Product_Family,score,rank
labcorp,protect,0.7432290278512946,1
labcorp,res uem,0.5579525889340352,2
labcorp,shavlik patch,0.477814263902876,3
labcorp,desktopnow,0.4619537880050164,4
labcorp,patch manager,0.457939933925193,5


In [147]:
ranking_factorization_model.recommend(["labcorp","new_customer"],k=10)

Customer_Name,Product_Family,score,rank
labcorp,protect,0.7432290278512946,1
labcorp,res uem,0.5579525889340352,2
labcorp,shavlik patch,0.477814263902876,3
labcorp,desktopnow,0.4619537880050164,4
labcorp,patch manager,0.457939933925193,5
labcorp,ldms,0.4512761000423464,6
labcorp,security suite,0.4208169526902028,7
labcorp,emss,0.4120341516602677,8
labcorp,dsm,0.4035170362062038,9
labcorp,res itsm,0.3814781804843055,10


In [149]:
popularity_model.recommend(["labcorp","new_customer"],k=5)

Customer_Name,Product_Family,score,rank
labcorp,xtraction oem royalty,139.5,1
labcorp,ht oem,24.11764705882353,2
labcorp,shavlik oem royalties,23.916666666666668,3
labcorp,patch manager,11.826443202979515,4
labcorp,oem licenced royalties,8.125,5
new_customer,xtraction oem royalty,139.5,1
new_customer,ht oem,24.11764705882353,2
new_customer,shavlik oem royalties,23.916666666666668,3
new_customer,patch manager,11.826443202979515,4
new_customer,heat 2014 saas,11.041666666666666,5


In [151]:
ranking_factorization_model.get_similar_items(["heat 2014 saas"],k=5)

Product_Family,similar,score,rank
heat 2014 saas,application manager,0.296579897403717,1
heat 2014 saas,itsm core,0.2599488794803619,2
heat 2014 saas,uem other,0.2178467214107513,3
heat 2014 saas,performance manager,0.2022912204265594,4
heat 2014 saas,letmobile,0.1915725767612457,5


In [152]:
popularity_model.get_similar_users(["labcorp"],k=10)

Customer_Name,similar,score,rank
labcorp,interceramic,1.0,1
labcorp,movitex,1.0,2
labcorp,landessportbund niedersachsen e. v. ...,1.0,3
labcorp,##(##)############,1.0,4
labcorp,landesklinikum thermenregion ...,1.0,5
labcorp,'s heeren loo,1.0,6
labcorp,'t sit,1.0,7
labcorp,(bet365) hillside (new media) ltd ...,1.0,8
labcorp,landesk software corporation ...,1.0,9
labcorp,landeshauptstadt stuttgart ...,1.0,10


In [15]:
ranking_factorization_model = turicreate.ranking_factorization_recommender.create(train, 
                                    user_id='Customer_Name', item_id='Product_Family',
                                    num_factors=47, regularization=1e-4)
ranking_factorization_model.evaluate(test)




Precision and recall summary statistics by cutoff
+--------+---------------------+---------------------+
| cutoff |     mean_recall     |    mean_precision   |
+--------+---------------------+---------------------+
|   1    |  0.169702380952381  | 0.19100000000000011 |
|   2    | 0.24952142857142828 |  0.1825000000000001 |
|   3    | 0.30997380952380976 | 0.16233333333333352 |
|   4    |  0.367740476190476  | 0.15000000000000005 |
|   5    |  0.4693238095238095 |  0.1440000000000001 |
|   6    |  0.515607142857143  | 0.12916666666666662 |
|   7    |  0.5567238095238106 | 0.11842857142857142 |
|   8    |  0.5879500000000004 |       0.10875       |
|   9    |  0.6602333333333333 | 0.10655555555555554 |
|   10   |  0.676883333333334  | 0.10010000000000006 |
+--------+---------------------+---------------------+
[10 rows x 3 columns]



{'precision_recall_by_user': Columns:
 	Customer_Name	str
 	cutoff	int
 	precision	float
 	recall	float
 	count	int
 
 Rows: 18000
 
 Data:
 +----------------------------+--------+--------------------+---------------------+
 |       Customer_Name        | cutoff |     precision      |        recall       |
 +----------------------------+--------+--------------------+---------------------+
 | 121 financial credit union |   1    |        0.0         |         0.0         |
 | 121 financial credit union |   2    |        0.5         | 0.16666666666666666 |
 | 121 financial credit union |   3    | 0.6666666666666666 |  0.3333333333333333 |
 | 121 financial credit union |   4    |        0.5         |  0.3333333333333333 |
 | 121 financial credit union |   5    |        0.4         |  0.3333333333333333 |
 | 121 financial credit union |   6    | 0.3333333333333333 |  0.3333333333333333 |
 | 121 financial credit union |   7    | 0.2857142857142857 |  0.3333333333333333 |
 | 121 financial cre

In [19]:
b = ranking_factorization_model.recommend(data.Customer_Name.unique(), k=10)

In [20]:
b

Customer_Name,Product_Family,score,rank
interceramic,ldms,0.7658251538057307,1
interceramic,anti virus,0.5073603173226029,2
interceramic,patch manager,0.3794770946089673,3
interceramic,desktopnow,0.3579375326974204,4
interceramic,managed planet,0.2672887922967776,5
interceramic,service desk,0.2631351189517417,6
interceramic,shavlik patch,0.2556236334574495,7
interceramic,others,0.2504824132619376,8
interceramic,protect,0.2439095709341365,9
interceramic,datanow,0.2014662011622051,10


In [22]:
b.export_csv("Recommended Products.csv")

In [27]:
c = ranking_factorization_model.get_similar_items(turicreate.SArray(data.Product_Family.unique()),k=5)

In [28]:
c

Product_Family,similar,score,rank
security suite,heat classic,3.4024985421834377e+38,1
security suite,application manager,2.28003287790778e+38,2
security suite,protect,1.5719323677346077e+20,3
security suite,emss,7.739758776250112e+19,4
security suite,managed planet,4.359883781917848e+19,5
anti virus,ldms,8.856625975168231e+19,1
anti virus,dsm,2.8911675844555964e+19,2
anti virus,xtraction,1.7787075579950924e+19,3
anti virus,ldim (inventory manager),1.659177449916308e+19,4
anti virus,ht oem,7.129859016615789e+18,5


In [29]:
c.export_csv("Similar Products.csv")

In [31]:
d = ranking_factorization_model.get_similar_users(turicreate.SArray(data.Customer_Name.unique()),k=10)

In [32]:
d

Customer_Name,similar,score,rank
interceramic,aerovironment inc.,1.0246274642209668e+18,1
interceramic,nfi interactive logistics llc ...,1.0024566368805848e+18,2
interceramic,university of north georgia ...,9.927185372712796e+17,3
interceramic,uga-college of veterinary medicine of georgia ...,9.847624711326924e+17,4
interceramic,university of west georgia ...,9.740440194682716e+17,5
interceramic,university of georgia- oit/college of education ...,9.640168170203382e+17,6
interceramic,health resources and services ...,9.491523131660042e+17,7
interceramic,owatonna public schools- use owa501 ...,9.481389757620552e+17,8
interceramic,board of regents,9.194040640038502e+17,9
interceramic,system administrative services ...,9.083675098816184e+17,10


In [33]:
d.export_csv("Similar Users.csv")