In [None]:
import pandas as pd
from google.cloud import storage
from io import BytesIO
from scipy.sparse.linalg import svds
import numpy as np
from sklearn.model_selection import train_test_split

In [312]:
pd.set_option('display.max_columns', 500)

In [183]:
client = storage.Client()
bucket_name = "meet-fresh-recommendation-system-data"
file_name = "meetfresh_customer_item_rating_matrix.csv"
bucket = client.get_bucket(bucket_name)
blob = bucket.blob(file_name)
content = blob.download_as_string()
matrix = pd.read_csv(BytesIO(content))
matrix = matrix.rename(columns={"Unnamed: 0": "customer_id"}).astype({'customer_id': int})

In [184]:
matrix.head()

Unnamed: 0,customer_id,Tofu Pudding,Rice Balls,Boba,Mini Q (Mini Taro Ball),Red Beans,Grass Jelly,Peanuts,Black Sugar Boba,Melon Jelly,...,Potaro Balls,Caramel Pudding,Sesame Rice Balls,Taro Balls,Almond Pudding,Taro Paste,Q Mochi,Coco Mochi,Sago,Almond Flakes
0,49237,1.0,1.0,,,,,,,,...,,,,3.0,,2.0,,,,
1,172644,1.0,,3.0,3.0,2.0,3.0,3.0,,,...,2.0,,3.0,1.0,2.0,2.0,,,3.0,
2,39930,1.0,,,,,3.0,,,,...,,,,2.0,,1.0,3.0,,,
3,207442,2.0,,2.0,,,,,,,...,2.0,,3.0,3.0,,,,,,
4,78835,1.0,,,,,,,,,...,,,,,,,,,,


In [199]:
matrix_train, matrix_test = train_test_split(matrix,
                                   test_size=0.20,
                                   random_state=42)

In [200]:
print(matrix_train.shape)
print(matrix_test.shape)

(124475, 23)
(31119, 23)


In [201]:
train_df = matrix_train.set_index("customer_id")
train_df = train_df.fillna(0)
test_df = matrix_test.set_index("customer_id")
test_df = test_df.fillna(0)
train_df.head()

Unnamed: 0_level_0,Tofu Pudding,Rice Balls,Boba,Mini Q (Mini Taro Ball),Red Beans,Grass Jelly,Peanuts,Black Sugar Boba,Melon Jelly,Lychee Jelly,...,Potaro Balls,Caramel Pudding,Sesame Rice Balls,Taro Balls,Almond Pudding,Taro Paste,Q Mochi,Coco Mochi,Sago,Almond Flakes
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
40920,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0
175469,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
117348,0.0,1.0,2.0,2.0,1.0,3.0,0.0,0.0,2.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
60973,0.0,0.0,2.0,0.0,0.0,3.0,0.0,0.0,3.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
146008,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [233]:
U, sigma, Vt = svds(train_df, k = 5)
sigma = np.diag(sigma)
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt)
preds_df = pd.DataFrame(all_user_predicted_ratings, columns = train_df.columns, index=train_df.index)

In [234]:
sorted_user_predictions = preds_df.loc[40920].sort_values(ascending=False)
sorted_user_predictions

Taro Balls                 1.999372
Grass Jelly                1.998946
Rice Balls                 0.024642
Tofu Pudding               0.020538
Caramel Pudding            0.019857
Melon Jelly                0.016761
Potaro Balls               0.016297
Sesame Rice Balls          0.013750
Almond Pudding             0.011453
Peanuts                    0.011206
Q Mochi                    0.006941
Black Sugar Boba           0.005378
Lychee Jelly               0.004643
Taro                       0.002816
Almond Flakes              0.001266
Sago                       0.000659
Coco Mochi                -0.000171
Boba                      -0.001617
Ice Cream                 -0.006235
Red Beans                 -0.006405
Taro Paste                -0.012664
Mini Q (Mini Taro Ball)   -0.024417
Name: 40920, dtype: float64

In [191]:
sorted_user_predictions["Boba"]

2.9911636168216913

In [235]:
def recommend_items(predictions_df, userID, num_recommendations=5):
    
    # Get and sort the user's predictions
    sorted_user_predictions = predictions_df.loc[userID].sort_values(ascending=False)
    res = []
    for item in sorted_user_predictions.index:
        if len(res) >= num_recommendations:
            break
        if train_df.loc[userID, item] == 0:
            res.append((sorted_user_predictions[item], item))
    return res

reco = recommend_items(preds_df, 40920, 5)
reco

[(0.024641623691538293, 'Rice Balls'),
 (0.020537986020375376, 'Tofu Pudding'),
 (0.019857440626053064, 'Caramel Pudding'),
 (0.016760834495385245, 'Melon Jelly'),
 (0.016296669486365586, 'Potaro Balls')]

# Modeling

In [232]:
class CFModel:

    def __init__(self, items_df=None):
        self.MODEL_NAME = 'Collaborative Filtering #1'
        self._preds_df = None
        self._train = items_df


    def get_model_name(self):
        return self.MODEL_NAME


    def get_svd(self):
        if self._preds_df is None:
            U, sigma, Vt = svds(self._train, k = 5)
            sigma = np.diag(sigma)
            all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt)
            preds_df = pd.DataFrame(all_user_predicted_ratings, columns = self._train.columns, index=self._train.index)
            self._preds_df = preds_df
        return True

    
    def recommend_items(self, user_id, topk=5):
        self.get_svd()
        sorted_user_predictions = self._preds_df.loc[user_id].sort_values(ascending=False)
        res = []
        for item in sorted_user_predictions.index:
            if len(res) >= topk:
                break
            if df.loc[user_id, item] == 0:
                res.append((sorted_user_predictions[item], item))
        return res

    
cf_model = CFModel(train_df)
cf_model.recommend_items(user_id=40920, topk=5)

[(0.02464162369153845, 'Rice Balls'),
 (0.020537986020375636, 'Tofu Pudding'),
 (0.0198574406260532, 'Caramel Pudding'),
 (0.016760834495385717, 'Melon Jelly'),
 (0.016296669486365878, 'Potaro Balls')]

# Evaluate:

In [238]:
class Evaluator:
    EVALUATOR_NAME = "eva"
    
    def __init__(self, model, test_data):
        self._test = test_data
        self.model = model
    
    def draw_user(self):
        user = self._test.sample(1, random_state=43) 
        uid = user.index[0]
        user = user.iloc[0, :].sort_values(ascending=False)
        return uid, user

    def generate_score(self, topk=5):
        uid, user = self.draw_user()
        pred = self.model.recommend_items(uid, topk)
        obt = 0
        # pred = [(0, item) for score, item in pred]
        # pred = [(3, item) for score, item in pred]
        print(user)
        print(pred)
        for score, item in pred:
            target = user[item]
            obt += 1 - abs(score-target) / 3
        # this is some kind of Error, not Precision
        
        # TODO: another evaluation: ADD RECALL
        # TODO: should do more calculations and take average (MAP)
        
        ret = obt/topk
        print("The model \'{}\' has {:.2f}% score".format(self.model.MODEL_NAME, ret * 100))
        return ret

evaluator = Evaluator(cf_model, train_df) # How to use test set?
g = evaluator.generate_score()


Taro                       3.0
Potaro Balls               3.0
Boba                       3.0
Ice Cream                  3.0
Sago                       0.0
Coco Mochi                 0.0
Q Mochi                    0.0
Taro Paste                 0.0
Almond Pudding             0.0
Taro Balls                 0.0
Sesame Rice Balls          0.0
Caramel Pudding            0.0
Tofu Pudding               0.0
Rice Balls                 0.0
Lychee Jelly               0.0
Melon Jelly                0.0
Black Sugar Boba           0.0
Peanuts                    0.0
Grass Jelly                0.0
Red Beans                  0.0
Mini Q (Mini Taro Ball)    0.0
Almond Flakes              0.0
Name: 14868, dtype: float64
[(1.5967230123041107, 'Mini Q (Mini Taro Ball)'), (1.4026677183666412, 'Taro Paste'), (0.6013658796358277, 'Sesame Rice Balls'), (0.48659831183052077, 'Melon Jelly'), (0.4617067285456262, 'Rice Balls')]
The model 'Collaborative Filtering #1' has 69.67% precision


In [258]:
class Evaluator:
    EVALUATOR_NAME = "MAP"
    
    def __init__(self, model, test_data):
        self._test = test_data
        self.model = model
    
    def draw_user(self):
        user = self._test.sample(1, random_state=42) 
        uid = user.index[0]
        user = user.iloc[0, :].sort_values(ascending=False)
        return uid, user

    def generate_score(self, topk=5):
        tot_obt = 0
        sample_size = 100
        for index, row in self._test.sample(sample_size, random_state=123).iterrows():
            user = self._test.loc[index, :]
            uid = user.name
            pred = self.model.recommend_items(uid, topk)
            obt = 0
            # pred = [(0, item) for score, item in pred]
            # pred = [(3, item) for score, item in pred]
            for score, item in pred:
                target = user[item]
                obt += 1 - abs(score-target) / 3
            # TODO: another evaluation: ADD RECALL
            tot_obt += obt
        
        ret = obt/topk
        print("The model \'{}\' has {:.2f}% Precision".format(self.model.MODEL_NAME, ret * 100))
        print("The model \'{}\' has {:.2f}% Average Precision".format(self.model.MODEL_NAME, tot_obt / topk / sample_size * 100))
        return ret

evaluator = Evaluator(cf_model, train_df) # How to use test set?
g = evaluator.generate_score()

The model 'Collaborative Filtering #1' has 86.76% Precision
The model 'Collaborative Filtering #1' has 91.49% Average Precision


In [260]:
%load_ext google.cloud.bigquery

In [265]:
%%bigquery prod
select * from meetfresh.dim_product

Query is running:   0%|          |

Downloading:   0%|          |

In [266]:
prod

Unnamed: 0,product_id,product_name,product_group_name,product_ingredient_name,seasonal,gpt_assisted,product_long_description,product_image_url
0,1017,Icy Taro Ball Signature,SIGNATURE SERIES,"Taro Balls, Taro Paste, Potaro Balls, Boba, Sh...",False,False,Our refreshing Icy Taro Ball Signature is made...,https://meetfresh.us/wp-content/uploads/2022/1...
1,2000,Icy Grass Jelly Signature,SIGNATURE SERIES,"Taro Balls, Grass Jelly Shaved Ice, Grass Jelly",False,False,The Icy Grass Jelly Signature is a customer fa...,https://meetfresh.us/wp-content/uploads/2019/1...
2,21007,Hot Red Bean Soup Signature,SIGNATURE SERIES,"Taro Balls, Sesame Rice Balls, Taro, Red Beans...",False,False,Treat yourself to a bowl of our Hot Red Bean S...,https://meetfresh.us/wp-content/uploads/2021/0...
3,2013,Hot Grass Jelly Soup Signature,SIGNATURE SERIES,"Mini Q, Red Beans, Boba, Grass Jelly Soup",False,False,The Hot Grass Jelly Soup Signature is a great ...,https://meetfresh.us/wp-content/uploads/2021/0...
4,1601,Cold Coco Sago Soup Signature,SIGNATURE SERIES,"Mini Q, Taro, Boba, Coco Mochi, Sago, Coco Sag...",False,False,Treat yourself to a bowl of our Cold Coco Sago...,https://meetfresh.us/wp-content/uploads/2023/0...
...,...,...,...,...,...,...,...,...
123,1111,Hot Almond Drink,SEASONAL DRINKS,,True,False,,https://meetfresh.us/wp-content/uploads/2019/1...
124,1112,Boba Hot Almond Drink,SEASONAL DRINKS,,True,False,,https://meetfresh.us/wp-content/uploads/2019/1...
125,1114,Mini Q Hot Almond Drink,SEASONAL DRINKS,,True,False,,https://meetfresh.us/wp-content/uploads/2019/1...
126,1110,Hot Almond Purple Rice Soup,PURPLE RICE SERIES,"Purple Rice Soup, Almond Flakes",True,True,Warm your soul with our comforting Hot Almond ...,https://meetfresh.us/wp-content/uploads/2019/1...


In [271]:
useful_prod = prod.dropna(subset=['product_ingredient_name'], axis=0)
useful_prod.loc[:, 'product_ingredient_name'] = useful_prod.loc[:, 'product_ingredient_name'].str.lower()
useful_prod.loc[:, 'product_ingredient_name'] = useful_prod['product_ingredient_name'].str.split(",")

In [290]:
useful_prod

Unnamed: 0,product_id,product_name,product_group_name,product_ingredient_name,seasonal,gpt_assisted,product_long_description,product_image_url
0,1017,Icy Taro Ball Signature,SIGNATURE SERIES,"[taro balls, taro paste, potaro balls, boba...",False,False,Our refreshing Icy Taro Ball Signature is made...,https://meetfresh.us/wp-content/uploads/2022/1...
1,2000,Icy Grass Jelly Signature,SIGNATURE SERIES,"[taro balls, grass jelly shaved ice, grass j...",False,False,The Icy Grass Jelly Signature is a customer fa...,https://meetfresh.us/wp-content/uploads/2019/1...
2,21007,Hot Red Bean Soup Signature,SIGNATURE SERIES,"[taro balls, sesame rice balls, taro, red b...",False,False,Treat yourself to a bowl of our Hot Red Bean S...,https://meetfresh.us/wp-content/uploads/2021/0...
3,2013,Hot Grass Jelly Soup Signature,SIGNATURE SERIES,"[mini q, red beans, boba, grass jelly soup]",False,False,The Hot Grass Jelly Soup Signature is a great ...,https://meetfresh.us/wp-content/uploads/2021/0...
4,1601,Cold Coco Sago Soup Signature,SIGNATURE SERIES,"[mini q, taro, boba, coco mochi, sago, co...",False,False,Treat yourself to a bowl of our Cold Coco Sago...,https://meetfresh.us/wp-content/uploads/2023/0...
...,...,...,...,...,...,...,...,...
120,4008,Strawberry Milk Shaved Ice,SHAVED ICE,"[strawberry, strawberry syrup, condensed mil...",True,False,The Strawberry Milk Shaved Ice features fresh ...,https://meetfresh.us/wp-content/uploads/2021/0...
121,4007,Mango Milk Shaved Ice,SHAVED ICE,"[mango, mango syrup, condensed milk, ice cr...",True,False,The Mango Milk Shaved Ice features fresh seaso...,https://meetfresh.us/wp-content/uploads/2021/0...
122,4011,Mangoberry Milk Shaved Ice,SHAVED ICE,"[mango, strawberry, mango syrup, strawberry...",True,False,The Mangoberry Milk Shaved Ice is a delectable...,https://meetfresh.us/wp-content/uploads/2021/0...
126,1110,Hot Almond Purple Rice Soup,PURPLE RICE SERIES,"[purple rice soup, almond flakes]",True,True,Warm your soul with our comforting Hot Almond ...,https://meetfresh.us/wp-content/uploads/2019/1...


In [323]:
train_df

Unnamed: 0_level_0,Tofu Pudding,Rice Balls,Boba,Mini Q (Mini Taro Ball),Red Beans,Grass Jelly,Peanuts,Black Sugar Boba,Melon Jelly,Lychee Jelly,Ice Cream,Taro,Potaro Balls,Caramel Pudding,Sesame Rice Balls,Taro Balls,Almond Pudding,Taro Paste,Q Mochi,Coco Mochi,Sago,Almond Flakes
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
40920,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0
175469,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
117348,0.0,1.0,2.0,2.0,1.0,3.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
60973,0.0,0.0,2.0,0.0,0.0,3.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
146008,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59263,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0
51650,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
229705,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0
71512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0


In [324]:
rec_ls = cf_model.recommend_items(user_id=117348, topk=5)
rec_ls

[(0.8518726624070347, 'Taro Paste'),
 (0.8264262992162967, 'Ice Cream'),
 (0.5228141544384187, 'Potaro Balls'),
 (0.41896907453862603, 'Sesame Rice Balls'),
 (0.3239980902762125, 'Tofu Pudding')]

In [325]:
from collections import defaultdict
store = defaultdict(float)
for score, item in rec_ls:
    store[item.lower()] = score

In [326]:
import heapq
prod_ls = []
REC_NUM = 5 
for ind, row in useful_prod.iterrows():
    sc = 0
    for item in row['product_ingredient_name']:
        sc += store[item]
    heapq.heappush(prod_ls, (sc, row['product_name']))
    if len(prod_ls) > REC_NUM:
        heapq.heappop(prod_ls)

print(prod_ls)

[(0.5228141544384187, 'Potaro Ball Tofu Pudding'), (0.8264262992162967, 'Creamy Milk'), (0.8264262992162967, 'Boba & Caramel Pudding'), (0.8264262992162967, 'Mini Q & Melon Jelly'), (0.8518726624070347, 'Cold Purple Rice Soup Signature')]


In [327]:
train_df.loc[117348, :].sort_values()

Tofu Pudding               0.0
Coco Mochi                 0.0
Q Mochi                    0.0
Taro Paste                 0.0
Almond Pudding             0.0
Taro Balls                 0.0
Sesame Rice Balls          0.0
Potaro Balls               0.0
Sago                       0.0
Ice Cream                  0.0
Black Sugar Boba           0.0
Peanuts                    0.0
Lychee Jelly               0.0
Almond Flakes              0.0
Taro                       1.0
Caramel Pudding            1.0
Red Beans                  1.0
Rice Balls                 1.0
Melon Jelly                2.0
Mini Q (Mini Taro Ball)    2.0
Boba                       2.0
Grass Jelly                3.0
Name: 117348, dtype: float64