In [16]:
import pandas as pd
import pickle
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
import recmetrics as rec

# import  eval_metrics.evaluation as eval
# from recommenders.evaluation.python_evaluation import map_at_k as MAP

import preprocessing.sale_preprocessing as sales_prep

In [18]:
scored_train_data = pd.read_csv('preped_train_data_wth_dummies.csv', index_col=0)
extracted_sales = pd.read_csv('sales_jan23_avr23.csv', index_col=0)
all_sales = sales_prep.sales_pre_pocessing(extracted_sales)

In [3]:
user_id = 'zzWUNSYQ2CWwopiILtUU4kaBEm13'
user_information = scored_train_data[['User Key', 'Recency', 'Frequency', 'Monetary']]
available_dates = ['2023-03-26', '2023-04-18', '2023-04-08']
available_sales = all_sales[all_sales['Start At'].isin(available_dates)]
model = pickle.load(open('reg_model.pkl', 'rb'))
training_columns = ['log_Followers', 'log_Brand Appearance', 'log_Avg Price',
       'Avg Discount', 'log_First Day Revenue', 'Conversion', 'Recency',
       'Frequency', 'Monetary', 'Artisanal', 'Bien-être', 'Bio',
       'Biodégradable', 'Cadeau idéal', 'Concept original', 'Durable',
       'Exclusivité Choose', 'Fabrication à la demande', 'Fait main',
       'Gluten Free', 'Iconique', 'Inclusive', 'Innovation',
       'La Fournée Ulule', 'Made in Europe', 'Made in France', 'Naturel',
       'Oeko-Tex', 'Premium', 'Recyclable', 'Savoir-faire',
       'Socialement engagée', 'Série limitée', 'Tendance', 'Upcycling',
       'Vegan', 'ZéroDéchet', 'Éco-friendly', 'Accessoires', 'Beauté',
       'Bibliothèque', 'Bien-Être', 'Bijoux', 'Buanderie', 'Chambre',
       'Chaussures', 'Cuisine', 'Cures', 'Expériences', 'Hygiène', 'Lingerie',
       'Maroquinerie', 'Outdoor', 'Prêt-à-porter', 'Salon', 'Soins',
       'Sportswear']

In [20]:
def creating_entry_data(user_id, available_sales: pd.DataFrame, user_information = user_information):
    """ 
    The aim of this function is to create dataframe specific to the 
    given user and in the right format to be used by the model for
    scoring. 

    :param user_id: a string containing the user id to predict for
    :param available_sales: pd.DataFrame containing the information of the sales 
    to score that are needed for the model
    :param user_information: pd.DataFrame containing all the user information
    that are needed for the model
    """

    user_specific_information = user_information[user_information['User Key'] == user_id].drop('User Key', axis = 1)

    user_features = user_specific_information.columns

    df_for_predictions = available_sales.__deepcopy__().reset_index(drop = True)
    user_specific_df = pd.concat([user_specific_information] *len (df_for_predictions), ignore_index=True)

    df_for_predictions[user_features] = user_specific_df

    return df_for_predictions


In [21]:
def ranking(df_for_prediction: pd.DataFrame, model, prediction_cols = training_columns):
    """ 
    The aim of this function is to score and rank the given sales using the model. 

    :param df_for_prediction: a pd.DataFrame containing the sale to rate, their
    information and the user information needed by the model
    :param model: model used for the scoring
    :param prediction_cols: columns used by the model for predicting, this
    element to limit bugs due to potentially changing categories or badges
    """
    # dataframe created to store the sales and their score
    predicted = pd.DataFrame(df_for_prediction['Sale ID'])
    
    # predictions and storing
    predicted_score = model.predict(df_for_prediction[training_columns])
    predicted['score'] = [i for i in predicted_score.flatten()]

    # sorting the scales by score with the best ones at the top
    ordered_sales = predicted.sort_values('score',  ascending=False)

    return ordered_sales

In [34]:
def make_preditions(user_id, available_sales, model):
    """ 
    The aim of this function is to predict the ranking of given sales 
    for a specific user using a given model. 

    :param user_id: a string containing the user id to predict for
    :param available_sales: pd.DataFrame containing the information of the sales 
    to rank
    :param model: model used for the scoring
    """
    
    user_df_for_pred = creating_entry_data(user_id, available_sales)
    prediction = ranking(user_df_for_pred, model)
    prediction = prediction.reset_index(drop=True)

    return prediction

In [38]:
def predicted_and_truth_for_user(user_id, available_sales, model, scored_train_data):
    
    """ 
    The aim of this function is to retrun the predicted ranking using a given model
    and real interactions for a specific user . 

    :param user_id: a string containing the user id to predict for
    :param available_sales: pd.DataFrame containing the information of the sales 
    to rank
    :param model: model used for the scoring
    :param scored_train_data: pd.DataFrame containing the known interactions of users
    """

    prediction_and_truth = make_preditions(user_id, available_sales, model)

    user_interaction = scored_train_data[(scored_train_data['User Key'] == user_id ) & (scored_train_data['interaction'] ==  1)].reset_index(drop=True)
    prediction_and_truth['truth'] = prediction_and_truth['Sale ID'].isin(user_interaction['Sale ID']).astype(float).reset_index(drop=True)

    return prediction_and_truth

In [39]:
predicted_and_truth_for_user(user_id, available_sales, model, scored_train_data)

Unnamed: 0,Sale ID,score,truth
0,590f281cb3b3400f8618139e0,0.726315,0.0
1,85f9e8d14af441b7bb0e7c41d,0.688841,1.0
2,971aed4f3ce34ac1a1001f059,0.639169,0.0
3,64b5a333ede74c0eb29063d5c,0.573591,0.0
4,c5cd2e7473bd46e49f4c779fd,0.484101,0.0
5,9aacf646468e47c0a589c7f95,0.458241,0.0
6,b8eb5e7441184bb7bc11a9738,0.45644,0.0
7,d67665620e1f46068ae8ad210,0.453126,0.0
8,be9cef9056264c86a015bb6fc,0.420072,0.0
9,8c9408d775df4cf592a3a6c0e,0.410938,0.0


In [13]:
for i in range(1, 30):
    print(rec.mark([user_specific['Sale ID'].to_list()], [predict['Sale ID'].to_list()] , k = i))

0.0
0.25
0.25
0.25
0.25
0.25
0.25
0.25
0.25
0.25
0.25
0.3333333333333333
0.3333333333333333
0.3333333333333333
0.3333333333333333
0.3333333333333333
0.3333333333333333
0.3333333333333333
0.3333333333333333
0.3333333333333333
0.3333333333333333
0.3333333333333333
0.3333333333333333
0.3333333333333333
0.3333333333333333
0.3333333333333333
0.3333333333333333
0.3333333333333333
0.3333333333333333


In [42]:
predict

Unnamed: 0,Sale ID,score,User Key,truth
7,590f281cb3b3400f8618139e0,0.726315,zzWUNSYQ2CWwopiILtUU4kaBEm13,0.0
10,85f9e8d14af441b7bb0e7c41d,0.688841,zzWUNSYQ2CWwopiILtUU4kaBEm13,1.0
12,971aed4f3ce34ac1a1001f059,0.639169,zzWUNSYQ2CWwopiILtUU4kaBEm13,0.0
8,64b5a333ede74c0eb29063d5c,0.573591,zzWUNSYQ2CWwopiILtUU4kaBEm13,0.0
18,c5cd2e7473bd46e49f4c779fd,0.484101,zzWUNSYQ2CWwopiILtUU4kaBEm13,0.0
13,9aacf646468e47c0a589c7f95,0.458241,zzWUNSYQ2CWwopiILtUU4kaBEm13,0.0
15,b8eb5e7441184bb7bc11a9738,0.45644,zzWUNSYQ2CWwopiILtUU4kaBEm13,0.0
19,d67665620e1f46068ae8ad210,0.453126,zzWUNSYQ2CWwopiILtUU4kaBEm13,0.0
16,be9cef9056264c86a015bb6fc,0.420072,zzWUNSYQ2CWwopiILtUU4kaBEm13,0.0
11,8c9408d775df4cf592a3a6c0e,0.410938,zzWUNSYQ2CWwopiILtUU4kaBEm13,0.0
