In [1]:
%load_ext autoreload
%autoreload 2
!pip install turicreate

import pandas as pd
import time
import turicreate as tc
from sklearn.model_selection import train_test_split

from google.colab import drive
drive.mount('/content/shared', force_remount=True)
%cp /content/shared/MyDrive/KaDo.csv /content

ModuleNotFoundError: ignored

In [None]:
transactions = pd.read_csv('/content/KaDo.csv', dtype=str)
print(transactions)

In [None]:
data = pd.melt(transactions.set_index('CLI_ID')['LIBELLE'].apply(pd.Series).reset_index(),
             id_vars=['CLI_ID'],
             value_name='LIBELLE') \
    .dropna().drop(['variable'], axis=1) \
    .groupby(['CLI_ID', 'LIBELLE']) \
    .agg({'LIBELLE': 'count'}) \
    .rename(columns={'LIBELLE': 'purchase_count'}) \
    .reset_index() \
    .rename(columns={'LIBELLE': 'LIBELLE'})

In [None]:
print(data.shape)
data.head

In [None]:
def create_data_dummy(data):
    data_dummy = data.copy()
    data_dummy['purchase_dummy'] = 1
    return data_dummy
    
data_dummy = create_data_dummy(data)
print(data_dummy)

In [None]:
df_matrix = pd.pivot_table(data, values='purchase_count', index='CLI_ID', columns='LIBELLE')

In [None]:
df_matrix_norm = (df_matrix-df_matrix.min())/(df_matrix.max()-df_matrix.min())

In [None]:
print(df_matrix_norm)

In [None]:
d = df_matrix_norm.reset_index()
d.index.names = ['scaled_purchase_freq']
data_norm = pd.melt(d, id_vars=['CLI_ID'], value_name='scaled_purchase_freq')

print(data_norm.shape)
data_norm.head()

In [None]:
def normalize_data(data):
    df_matrix = pd.pivot_table(data, values='purchase_count', index='CLI_ID', columns='LIBELLE')
    df_matrix_norm = (df_matrix-df_matrix.min())/(df_matrix.max()-df_matrix.min())
    d = df_matrix_norm.reset_index()
    d.index.names = ['scaled_purchase_freq']
    return pd.melt(d, id_vars=['CLI_ID'], value_name='scaled_purchase_freq').dropna()

In [None]:
def split_data(data):
    train, test = train_test_split(data, test_size = .2)
    train_data = tc.SFrame(train)
    test_data = tc.SFrame(test)
    return train_data, test_data

In [None]:
train_data, test_data = split_data(data)
train_data_dummy, test_data_dummy = split_data(data_dummy)
train_data_norm, test_data_norm = split_data(data_norm)

In [None]:
# constant variables to define field names include:
user_id = 'CLI_ID'
item_id = 'LIBELLE'
users_to_recommend = list(transactions[user_id])
n_rec = 10 # number of items to recommend
n_display = 30 # to display the first few rows in an output dataset

In [None]:
def model(train_data, name, user_id, item_id, target, users_to_recommend, n_rec, n_display):
    if name == 'popularity':
        model = tc.popularity_recommender.create(train_data, user_id=user_id,item_id=item_id, target=target)
    elif name == 'cosine':
        model = tc.item_similarity_recommender.create(train_data, 
                                                    user_id=user_id, 
                                                    item_id=item_id, 
                                                    target=target, 
                                                    similarity_type='cosine')
    elif name == 'pearson':
        model = tc.item_similarity_recommender.create(train_data, 
                                                    user_id=user_id, 
                                                    item_id=item_id, 
                                                    target=target, 
                                                    similarity_type='pearson')
    recom = model.recommend(users=users_to_recommend, k=n_rec)
    recom.print_rows(n_display)
    return model

In [None]:
name = 'popularity'
target = 'purchase_count'
popularity = model(train_data, name, user_id, item_id, target, users_to_recommend, n_rec, n_display)

In [None]:
name = 'popularity'
target = 'purchase_dummy'
pop_dummy = model(train_data_dummy, name, user_id, item_id, target, users_to_recommend, n_rec, n_display)

In [None]:
name = 'popularity'
target = 'scaled_purchase_freq'
pop_norm = model(train_data_norm, name, user_id, item_id, target, users_to_recommend, n_rec, n_display)

In [None]:
#Collaborative Filtering Model
name = 'cosine'
target = 'purchase_count'
cos = model(train_data, name, user_id, item_id, target, users_to_recommend, n_rec, n_display)

In [None]:
name = 'cosine'
target = 'purchase_dummy'
cos_dummy = model(train_data_dummy, name, user_id, item_id, target, users_to_recommend, n_rec, n_display)

In [None]:
name = 'cosine'
target = 'scaled_purchase_freq'
cos_norm = model(train_data_norm, name, user_id, item_id, target, users_to_recommend, n_rec, n_display)

In [None]:
name = 'pearson'
target = 'purchase_count'
pear = model(train_data, name, user_id, item_id, target, users_to_recommend, n_rec, n_display)

In [None]:
name = 'pearson'
target = 'purchase_dummy'
pear_dummy = model(train_data_dummy, name, user_id, item_id, target, users_to_recommend, n_rec, n_display)

In [None]:
name = 'pearson'
target = 'scaled_purchase_freq'
pear_norm = model(train_data_norm, name, user_id, item_id, target, users_to_recommend, n_rec, n_display)

In [None]:
def create_output(model, users_to_recommend, n_rec, print_csv=True):
    recomendation = model.recommend(users=users_to_recommend, k=n_rec)
    df_rec = recomendation.to_dataframe()
    df_rec['recommendedLIBELLE'] = df_rec.groupby([user_id])[item_id] \
        .transform(lambda x: ' | '.join(x.astype(str)))
    df_output = df_rec[['CLI_ID', 'recommendedLIBELLE']].drop_duplicates() \
        .sort_values('CLI_ID').set_index('CLI_ID')
    if print_csv:
        df_output.to_csv('./recommendation.csv')
        print("An output file can be found in folder with name 'recommendation.csv'")
    return df_output


df_output = create_output(pear_norm, users_to_recommend, n_rec, print_csv=True)

In [None]:
def customer_recomendation(customer_id):
    if customer_id not in df_output.index:
        print('Customer not found.')
        return customer_id
    return df_output.loc[customer_id]