In [37]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from surprise import Reader, Dataset
from surprise.model_selection import cross_validate
from surprise import accuracy
from surprise.model_selection import train_test_split
from surprise import SVD, evaluate
from surprise import NMF
from surprise import BaselineOnly
from surprise import KNNBasic

In [38]:
def mask(column,threshold):
    counts = pd.value_counts(column)
    mask = column.isin(counts[counts > threshold].index)
    column[~mask] = "others"
    return column

In [39]:
bookings_data = pd.read_csv('BookingsData.csv')

In [40]:
bookings_data = bookings_data.drop_duplicates()
bookings_data["Customer_Name"] = bookings_data["Customer_Name"].str.lower()
bookings_data["Product_Family"] = bookings_data["Product_Family"].str.lower()
bookings_data["BOARD_GEO"] = bookings_data["BOARD_GEO"].str.lower()
bookings_data["Customer_Name"] = bookings_data["Customer_Name"].replace('\s+', ' ', regex=True)
bookings_data["TX_Date_mod"] = pd.to_datetime(bookings_data['TX_Date'])
bookings_data = bookings_data[(bookings_data["TX_Date_mod"] > '2012')]
bookings_data.loc[(bookings_data['Product_Family'] == "lan desk (other)"),"Product_Family"] = 'others'

# if trying classification
bookings_data['Product_Family'] = mask(bookings_data['Product_Family'], 50)
bookings_data['PRODUCTCODE'] = mask(bookings_data['PRODUCTCODE'], 20)
bookings_data.count()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


Customer_Name      141799
Account_ID          14265
Bookings Amount    141799
TX_Date            141799
Seats              141799
GEO                 98553
BOARD_GEO          141799
VERTICAL           125251
Product_Family     141799
Deal Type          141799
Product Type       141799
Product Segment    135956
Contract Start     113938
Contract End        89477
PRODUCTCODE        141799
FAMILY             139802
NAME               141799
DESCRIPTION         87502
TX_Date_mod        141799
dtype: int64

In [41]:
bookings_data['Seats_mod'] = np.where(bookings_data['Seats'] > 0, 1, (np.where(bookings_data['Seats'] == 0, 0, -1)))

In [42]:
company_standards = pd.read_csv('standardization2.csv')
company_standards_2 = pd.read_csv('standardization2_2.csv')
print(company_standards.shape)

bookings_data = bookings_data.merge(company_standards, left_on='Customer_Name', right_on='Similar_Name', how='inner')
bookings_data = bookings_data.rename(index=str, columns={"Customer_Name_x": "Customer_Name_old1", 
                                                         "Customer_Name_y": "Customer_Name"})

bookings_data = bookings_data.merge(company_standards_2, left_on='Customer_Name', right_on='Similar_Name', how='inner')
bookings_data = bookings_data.rename(index=str, columns={"Customer_Name_x": "Customer_Name_old2", 
                                                         "Customer_Name_y": "Customer_Name"})

print(len(bookings_data.Customer_Name.unique()))
print(len(bookings_data.Customer_Name_old2.unique()))

(2572, 3)
113
149


In [43]:
table = bookings_data.pivot_table(values=['Seats_mod'], index=['Customer_Name'], columns=['Product_Family'],
                     aggfunc='sum').fillna(0)
table

Unnamed: 0_level_0,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod
Product_Family,anti virus,application manager,application virtualization,asset manager,datanow,desktopnow,discovery,dsm,emss,environment manager,...,res uem,sccm,security suite,server manager,service desk,shavlik oem royalties,shavlik patch,uem other,user management,xtraction
Customer_Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
a-katsastus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
acm,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
american express,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0
amundi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0
axians,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,36.0
baxter ag,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0
bcd travel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
bitdefender srl,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0
bnp paribas,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,36.0,6.0,...,24.0,0.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
canada life,0.0,0.0,0.0,0.0,0.0,28.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [44]:
bookings_data["TX_Date_mod"] = pd.to_datetime(bookings_data['TX_Date'])

In [45]:
bookings_data.loc[(bookings_data['Product_Family'] == "lan desk (other)"),"Product_Family"] = 'others'

In [46]:
from scipy.sparse.linalg import *
U, sigma, Vt = svds(table)

sigma = np.diag(sigma)

all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt)
preds_df = pd.DataFrame(all_user_predicted_ratings, columns = table.columns, index= table.index)

preds_df

Unnamed: 0_level_0,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod,Seats_mod
Product_Family,anti virus,application manager,application virtualization,asset manager,datanow,desktopnow,discovery,dsm,emss,environment manager,...,res uem,sccm,security suite,server manager,service desk,shavlik oem royalties,shavlik patch,uem other,user management,xtraction
Customer_Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
a-katsastus,-2.436551e-04,-1.466241e-02,4.838733e-05,-1.970031e-06,-5.538204e-03,1.292342e-02,-1.347190e-04,1.373383e-04,-2.187377e-04,4.545986e-02,...,1.626631e-02,-1.032804e-02,4.081362e-04,-8.502938e-06,3.040352e-03,-1.509783e-06,-2.797620e-03,-1.206456e-04,9.285145e-03,-3.489866e-02
acm,-2.086257e-04,-4.460397e-03,1.200570e-04,-2.131721e-06,-4.294201e-03,1.217583e-01,-5.697503e-06,4.157986e-03,5.954898e-02,6.866486e-02,...,4.468154e-02,-4.457877e-03,7.023761e-04,-9.127119e-06,6.340753e-03,2.162579e-05,1.588585e-02,-1.122845e-04,1.648060e-02,-1.938316e-02
american express,8.514908e-04,1.417642e-01,3.627169e-04,3.561403e-07,7.345572e-03,8.545619e-01,1.366657e-03,1.853300e-02,-1.779608e-02,-4.414102e-04,...,1.105129e-01,9.356360e-02,4.732981e-02,2.235399e-06,-1.408974e-03,1.480006e-05,1.424309e-01,1.499355e-04,1.738257e-03,2.636058e-01
amundi,6.054399e-02,-2.915530e-02,7.022469e-04,9.274888e-04,2.050842e+00,-8.492438e-01,1.715449e-02,-1.424303e-02,-2.504585e-01,3.228219e-01,...,3.274816e+00,-1.075823e-01,1.836894e-02,3.933613e-03,1.460887e-01,-4.095351e-04,3.251179e+00,4.718626e-02,2.495303e+00,-1.955813e-01
axians,7.988711e-03,1.565555e+00,3.970252e-03,-1.500706e-05,5.726875e-02,9.370584e+00,1.487203e-02,2.262581e-01,6.424684e-01,-2.838914e-02,...,1.188662e+00,1.029295e+00,-8.516727e-03,-5.533758e-05,4.508458e-03,1.851236e-03,1.493012e+00,7.687298e-04,-3.525838e-02,2.924043e+00
baxter ag,2.070374e-02,4.687530e-02,5.567930e-04,3.128594e-04,6.957245e-01,2.466815e-01,6.436168e-03,9.346125e-03,6.104959e-03,1.993943e-01,...,1.222447e+00,-1.250015e-03,3.958139e-02,1.327115e-03,5.798189e-02,-2.162904e-04,1.189480e+00,1.597560e-02,8.711660e-01,2.475295e-02
bcd travel,4.190957e-02,-1.888199e-02,4.891517e-04,6.399616e-04,1.415363e+00,-5.818003e-01,1.185521e-02,-9.932747e-03,-1.810188e-01,2.211962e-01,...,2.259266e+00,-7.336632e-02,3.065121e-01,2.714390e-03,1.004622e-01,-4.399457e-04,2.244434e+00,3.260102e-02,1.721390e+00,-1.325345e-01
bitdefender srl,2.261331e-06,-2.852984e-06,-3.692841e-08,-2.354044e-09,-5.452307e-06,-1.191838e-05,4.415226e-07,-3.451320e-06,-7.847491e-05,3.824893e-06,...,4.778093e-05,-7.544940e-07,5.657072e-07,-1.601903e-08,-6.818911e-06,1.990843e+00,2.922434e-05,3.029981e-06,1.854542e-05,4.759770e-06
bnp paribas,9.258485e-03,6.020613e-01,2.800657e-03,-1.869951e-05,8.432113e-01,1.790668e+00,1.355515e-02,1.085780e+00,3.761549e+01,6.919208e-02,...,2.681827e+00,1.183614e-01,1.195506e+01,-3.711833e-05,1.094583e+00,4.504958e-03,3.518937e-01,8.183863e-03,8.191220e-02,1.423040e+00
canada life,1.943427e-01,3.851957e+00,1.202029e-02,2.632091e-03,6.000963e+00,2.129894e+01,8.638720e-02,4.740465e-01,-1.187061e+00,8.924590e-01,...,1.231926e+01,2.292693e+00,2.877508e-01,1.118242e-02,3.737196e-01,-1.531562e-03,1.313952e+01,1.374131e-01,7.099931e+00,6.764691e+00


In [47]:
user_data = table.loc["amundi"].unstack(-1)

prod = np.dot(sigma, Vt)

print(U.shape)
print(sigma.shape)
print(Vt.shape)

print(table.shape)
print(preds_df.shape)
prod.shape

(113, 6)
(6, 6)
(6, 40)
(113, 40)
(113, 40)


(6, 40)

In [48]:
data = bookings_data[["Customer_Name","Product_Family","Seats_mod"]]
data = pd.DataFrame({'count' : data.groupby( ["Customer_Name", "Product_Family" ] ).size()}).reset_index()
data["count"].describe()

count     399.000000
mean       22.644110
std        97.220925
min         1.000000
25%         2.000000
50%         4.000000
75%        15.000000
max      1440.000000
Name: count, dtype: float64

In [49]:
data1 = Dataset.load_from_df(data, Reader(rating_scale=(1,5000)))

In [22]:
#data1.split(n_folds=5)
#trainset, testset = train_test_split(data1, test_size=.25)
trainset = data1.build_full_trainset()

# svd
algo = SVD()
algo.fit(trainset)
evaluate(algo, data1, measures=['RMSE'])
#predictions_svd = algo.test(testset)
# print("-----break----")
# print(accuracy.rmse(predictions_svd))

# nmf
algo = NMF()
algo.fit(trainset)
evaluate(algo, data1, measures=['RMSE'])
#predictions_nmf = algo.test(testset)
# print("-----break----")
# print(accuracy.rmse(predictions_nmf))



Evaluating RMSE of algorithm SVD.

------------
Fold 1
RMSE: 4980.7380
------------
Fold 2
RMSE: 4908.2729
------------
Fold 3
RMSE: 4933.2311
------------
Fold 4
RMSE: 4985.5726
------------
Fold 5
RMSE: 4989.2406
------------
------------
Mean RMSE: 4959.4110
------------
------------
Evaluating RMSE of algorithm NMF.

------------
Fold 1
RMSE: 56.4959
------------
Fold 2
RMSE: 123.8676
------------
Fold 3
RMSE: 166.9318
------------
Fold 4
RMSE: 23.4266
------------
Fold 5
RMSE: 21.8171
------------
------------
Mean RMSE: 78.5078
------------
------------


CaseInsensitiveDefaultDict(list,
                           {'rmse': [56.495877143472754,
                             123.86764496600887,
                             166.93178198949647,
                             23.42664103433405,
                             21.817098749789547]})

In [23]:
algo.predict("amundi","desktopnow")

Prediction(uid='amundi', iid='desktopnow', r_ui=None, est=5.406726246206629, details={'was_impossible': False})

In [24]:
print('Using ALS')
bsl_options = {'method': 'als',
               'n_epochs': 5,
               'reg_u': 12,
               'reg_i': 5
               }
algo = BaselineOnly(bsl_options=bsl_options)

algo.fit(trainset)
evaluate(algo, data1, measures=['RMSE'])

Using ALS
Estimating biases using als...
Evaluating RMSE of algorithm BaselineOnly.

------------
Fold 1
Estimating biases using als...
RMSE: 54.9155
------------
Fold 2
Estimating biases using als...
RMSE: 125.4009
------------
Fold 3
Estimating biases using als...
RMSE: 165.1699
------------
Fold 4
Estimating biases using als...
RMSE: 32.4536
------------
Fold 5
Estimating biases using als...
RMSE: 30.9854
------------
------------
Mean RMSE: 81.7851
------------
------------




CaseInsensitiveDefaultDict(list,
                           {'rmse': [54.9154619503457,
                             125.40094398759113,
                             165.16994548834552,
                             32.45358861567655,
                             30.985448621999787]})

In [25]:
print('Using SGD')
bsl_options = {'method': 'sgd',
               'learning_rate': .00005,
               }
algo = BaselineOnly(bsl_options=bsl_options)
algo.fit(trainset)
evaluate(algo, data1, measures=['RMSE'])

Using SGD
Estimating biases using sgd...
Evaluating RMSE of algorithm BaselineOnly.

------------
Fold 1
Estimating biases using sgd...
RMSE: 54.7921
------------
Fold 2
Estimating biases using sgd...
RMSE: 124.7364
------------
Fold 3
Estimating biases using sgd...
RMSE: 165.6402
------------
Fold 4
Estimating biases using sgd...
RMSE: 24.1050
------------
Fold 5
Estimating biases using sgd...
RMSE: 26.9300
------------
------------
Mean RMSE: 79.2408
------------
------------




CaseInsensitiveDefaultDict(list,
                           {'rmse': [54.792104497222844,
                             124.73643242061354,
                             165.64022937404408,
                             24.105004111259408,
                             26.930015320656373]})

In [26]:
bsl_options = {'method': 'als',
               'n_epochs': 20,
               }
sim_options = {'name': 'pearson_baseline'}
algo = KNNBasic(bsl_options=bsl_options, sim_options=sim_options)
algo.fit(trainset)
evaluate(algo, data1, measures=['RMSE'])

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNBasic.

------------
Fold 1
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 74.4616
------------
Fold 2
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 131.0814
------------
Fold 3
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 166.5846
------------
Fold 4
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 81.0758
------------
Fold 5
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 165.8602
------------
------------
Mean RMSE: 123.8127
------------
------------




CaseInsensitiveDefaultDict(list,
                           {'rmse': [74.4616315770626,
                             131.08139903816772,
                             166.58458281273184,
                             81.07581862469257,
                             165.86022503124033]})

In [27]:
sim_options = {'name': 'cosine',
               'user_based': False  # compute  similarities between items
               }
algo = KNNBasic(sim_options=sim_options)
algo.fit(trainset)
evaluate(algo, data1, measures=['RMSE'])

Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNBasic.

------------
Fold 1
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 51.4653
------------
Fold 2
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 124.1930
------------
Fold 3
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 172.4939
------------
Fold 4
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 51.4566
------------
Fold 5
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 48.3069
------------
------------
Mean RMSE: 89.5831
------------
------------




CaseInsensitiveDefaultDict(list,
                           {'rmse': [51.46533307332846,
                             124.19298778136104,
                             172.49387055636132,
                             51.45663064886058,
                             48.30686165334842]})

In [28]:
sim_options = {'name': 'pearson_baseline',
               'shrinkage': 0  # no shrinkage
               }
algo = KNNBasic(sim_options=sim_options)
algo.fit(trainset)
evaluate(algo, data1, measures=['RMSE'])

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Evaluating RMSE of algorithm KNNBasic.

------------
Fold 1
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 76.6489
------------
Fold 2
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 130.7782
------------
Fold 3
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 166.8858
------------
Fold 4
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 81.1376
------------
Fold 5
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
RMSE: 164.6801
------------
------------
Mean RMSE: 124.0261
------------
------------




CaseInsensitiveDefaultDict(list,
                           {'rmse': [76.64889625950909,
                             130.7782322955981,
                             166.88579101716164,
                             81.13763358650851,
                             164.6800671352279]})

In [29]:
import turicreate
train_data = turicreate.SFrame(data)
test_data = turicreate.SFrame(data)
popularity_model = turicreate.popularity_recommender.create(train_data, 
                                    user_id='Customer_Name', item_id='Product_Family', target='count')

In [30]:
popularity_recomm = popularity_model.recommend(users=["amundi","acm","axians"],k=15)
popularity_recomm.print_rows(num_rows=45)

+---------------+-----------------------+--------------------+------+
| Customer_Name |     Product_Family    |       score        | rank |
+---------------+-----------------------+--------------------+------+
|     amundi    | shavlik oem royalties | 60.666666666666664 |  1   |
|     amundi    |        protect        |        57.1        |  2   |
|     amundi    |     security suite    |        50.0        |  3   |
|     amundi    |         ht oem        |        41.5        |  4   |
|     amundi    |          sccm         |        36.0        |  5   |
|     amundi    |       desktopnow      | 28.708333333333332 |  6   |
|     amundi    |       xtraction       | 27.09090909090909  |  7   |
|     amundi    |     heat 2014 saas    | 23.166666666666668 |  8   |
|     amundi    |        datanow        |        20.0        |  9   |
|     amundi    |          emss         | 17.72222222222222  |  10  |
|     amundi    |     managed planet    | 15.333333333333334 |  11  |
|     amundi    |   

In [31]:
#Training the model
item_sim_model = turicreate.item_similarity_recommender.create(train_data, 
                        user_id='Customer_Name', item_id='Product_Family', target='count', similarity_type='cosine')

#Making recommendations
item_sim_model.evaluate(train_data)
item_sim_recomm = item_sim_model.recommend(users=["amundi","acm","axians"],k=5)
item_sim_recomm.print_rows(num_rows=25)


Precision and recall summary statistics by cutoff
+--------+-------------+----------------+
| cutoff | mean_recall | mean_precision |
+--------+-------------+----------------+
|   1    |     0.0     |      0.0       |
|   2    |     0.0     |      0.0       |
|   3    |     0.0     |      0.0       |
|   4    |     0.0     |      0.0       |
|   5    |     0.0     |      0.0       |
|   6    |     0.0     |      0.0       |
|   7    |     0.0     |      0.0       |
|   8    |     0.0     |      0.0       |
|   9    |     0.0     |      0.0       |
|   10   |     0.0     |      0.0       |
+--------+-------------+----------------+
[10 rows x 3 columns]


Overall RMSE: 99.56748915873104

Per User RMSE (best)
+---------------+--------------------+-------+
| Customer_Name |        rmse        | count |
+---------------+--------------------+-------+
|    parexel    | 0.9535553914419275 |   2   |
+---------------+--------------------+-------+
[1 rows x 3 columns]


Per User RMSE (worst)
+--

In [32]:
#Training the model
item_sim_model = turicreate.item_similarity_recommender.create(train_data, 
                        user_id='Customer_Name', item_id='Product_Family', target='count', similarity_type="pearson")

#Making recommendations
item_sim_recomm = item_sim_model.recommend(users=["amundi","acm","axians"],k=5)
item_sim_recomm.print_rows(num_rows=25)

+---------------+-----------------------+--------------------+------+
| Customer_Name |     Product_Family    |       score        | rank |
+---------------+-----------------------+--------------------+------+
|     amundi    | shavlik oem royalties | 60.666666666666664 |  1   |
|     amundi    |        protect        | 57.01071621576942  |  2   |
|     amundi    |     security suite    | 49.92975507179896  |  3   |
|     amundi    |         ht oem        |        41.5        |  4   |
|     amundi    |          sccm         |        36.0        |  5   |
|      acm      | shavlik oem royalties | 60.666666666666664 |  1   |
|      acm      |     security suite    | 49.976753771305084 |  2   |
|      acm      |     patch manager     | 43.78454810651867  |  3   |
|      acm      |         ht oem        |        41.5        |  4   |
|      acm      |          sccm         |        36.0        |  5   |
|     axians    | shavlik oem royalties | 60.61738050977389  |  1   |
|     axians    |   

In [33]:
#Training the model
item_sim_model = turicreate.item_similarity_recommender.create(train_data, 
                        user_id='Customer_Name', item_id='Product_Family', target='count', similarity_type='jaccard')

#Making recommendations
item_sim_recomm = item_sim_model.recommend(users=["amundi","acm","axians"],k=5)
item_sim_recomm.print_rows(num_rows=25)

+---------------+---------------------------+---------------------+------+
| Customer_Name |       Product_Family      |        score        | rank |
+---------------+---------------------------+---------------------+------+
|     amundi    | ldmg (management gateway) | 0.25212518374125165 |  1   |
|     amundi    |       security suite      |  0.226819376150767  |  2   |
|     amundi    |       managed planet      |  0.2013666033744812 |  3   |
|     amundi    |          res itsm         | 0.11883117755254109 |  4   |
|     amundi    |         desktopnow        | 0.11540679136912028 |  5   |
|      acm      |         desktopnow        | 0.25749388337135315 |  1   |
|      acm      |             es            | 0.18823528289794922 |  2   |
|      acm      |       shavlik patch       |  0.1842857003211975 |  3   |
|      acm      |          res itsm         | 0.18015822768211365 |  4   |
|      acm      |       security suite      | 0.15465116500854492 |  5   |
|     axians    |        