In [1]:
import pandas as pd

In [2]:
import numpy as np

In [3]:
from sklearn.decomposition import NMF

In [4]:
df_flatten = pd.read_csv("flatten.csv",date_parser=["date"])

In [14]:
df_flatten["quantity"] = df_flatten["salesquantity"].map(np.ceil).astype('uint')

In [7]:
number_of_customer = df_flatten["csn"].nunique()
number_of_items = df_flatten["article"].nunique()

In [10]:
matrix_features = np.zeros((number_of_customer,number_of_items),dtype=np.uint)

In [11]:
matrix_features

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint64)

In [13]:
def get_user_idx(csn):
    return hash(csn) % number_of_customer
def get_article_idx(csn):
    return hash(csn) % number_of_items

In [15]:
for idx, item in df_flatten.iterrows():
    user_id = get_user_idx(item["csn"])
    item_id = get_article_idx(item["article"])
    matrix_features[user_id][item_id] = item["quantity"]    

In [19]:
nmf = NMF(n_components=25).fit(matrix_features)

In [20]:
nmf.reconstruction_err_

2237.4480420013792

In [21]:
nmf.components_

array([[0.00000000e+00, 1.62410179e-07, 1.19793344e-08, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [2.05118542e-07, 2.62188737e-06, 0.00000000e+00, ...,
        1.13525997e-06, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [0.00000000e+00, 1.70815263e-03, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 8.66649957e-04, ...,
        0.00000000e+00, 1.21010659e-02, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 4.61754020e-03, 0.00000000e+00]])

In [23]:
X_reduce = nmf.transform(matrix_features)

In [24]:
X_reduce.shape

(20000, 25)

In [25]:
df_flatten

Unnamed: 0,csn,date,article,salesquantity,price,quantity
0,Y2NgaWJoYw==,2018-03-02,10020163,2.000,18250.00,2
1,Y2NgaWJoYw==,2018-03-04,10026562,3.000,13000.00,3
2,Y2NgaWJoYw==,2018-03-04,10320883,2.000,43000.00,2
3,Y2NgaWlpYA==,2018-02-27,10013531,1.000,17800.00,1
4,Y2NgaWlpYA==,2018-02-27,10015613,1.000,5600.00,1
5,Y2NgaWlpYA==,2018-02-27,10320578,1.000,5600.00,1
6,Y2NgaWlpYA==,2018-02-27,10005247,4.000,6000.00,4
7,Y2NgaWlpYA==,2018-02-27,10319762,1.000,228000.00,1
8,Y2NgaWlpYA==,2018-02-27,10005236,4.000,13325.00,4
9,Y2NgaWlpYA==,2018-02-27,10005249,4.000,6200.00,4
