In [1]:
import numpy as np
import pandas as pd

from scipy.sparse.linalg import svds


In [2]:
# Loading the first file
df = pd.read_csv("File 01 - Sales Data.csv")
df.head()

Unnamed: 0,Retailer Code,Rep,Product Code,Date,Net Sales Qty,Net Sales Value,Sales Return Qty,Sales Return Value
0,BORE0001,BORELLA - R1,500004,"Wednesday, June 20 2012",4,1014.0,0,0.0
1,BORE0001,BORELLA - R1,500053,"Wednesday, June 20 2012",3,349.14,0,0.0
2,BORE0001,BORELLA - R1,500053,"Tuesday, August 28 2012",3,349.14,0,0.0
3,BORE0001,BORELLA - R1,500053,"Saturday, March 23 2013",-6,-775.86,6,775.86
4,BORE0001,BORELLA - R1,500069,"Monday, April 02 2012",3,590.04,0,0.0


#### Preparing the pivot table

In [3]:
df_pvt = df.pivot_table(values='Net Sales Qty', index='Retailer Code', columns='Product Code', aggfunc='sum').fillna(0)
df_pvt.head()

Product Code,500003,500004,500031,500034,500047,500053,500069,500070,500071,500084,...,605469,605470,605471,605472,605473,605474,605621,605636,605691,605854
Retailer Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
BORE0001,0.0,4.0,0.0,0.0,0.0,0.0,10.0,4.0,11.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BORE0002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BORE0004,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BORE0006,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,-2.0,-2.0,0.0,0.0,0.0,0.0,0.0,0.0
BORE0007,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
df_matrix = df_pvt.as_matrix()
df_matrix[:5]

array([[0., 4., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [5]:
Retailer_Code = list(df_pvt.index)
Retailer_Code[:5]

['BORE0001', 'BORE0002', 'BORE0004', 'BORE0006', 'BORE0007']

### Singular Value Decomposition (SVD)

In [6]:
#The number of factors to factor the user-item matrix.
NUMBER_OF_FACTORS_MF = 15
#Performs matrix factorization of the original user item matrix
U, sigma, Vt = svds(df_matrix, k = NUMBER_OF_FACTORS_MF)

In [7]:
U.shape

(3771, 15)

In [8]:
Vt.shape

(15, 446)

In [9]:
sigma = np.diag(sigma)
sigma.shape

(15, 15)

After the factorization, we try to to reconstruct the original matrix by multiplying its factors. The resulting matrix is not sparse any more. It was generated predictions for items the user have not yet interaction, which we will exploit for recommendations.

In [10]:
all_retailer_predicted_ratings = np.dot(np.dot(U, sigma), Vt) 
all_retailer_predicted_ratings

array([[ 1.09700765e+00,  7.85601517e-01, -1.44940367e-03, ...,
        -1.05149941e-02,  1.06998055e-01,  1.81080622e-03],
       [ 1.08033282e+00,  1.81171398e-01, -2.15691004e-03, ...,
         1.44226344e-02,  7.43826075e-02,  1.67192362e-03],
       [ 2.87463872e-01,  9.82386050e-03,  1.01295472e-03, ...,
        -1.77519221e-03,  5.62429959e-03,  1.60167477e-04],
       ...,
       [-4.81094799e-02, -2.41600246e-01, -6.22263259e-04, ...,
        -1.55725882e-03,  6.61464057e-02,  1.90719024e-04],
       [ 1.63940444e-01, -2.12223218e-01, -3.88381865e-04, ...,
        -9.49392307e-04,  6.36945565e-02, -8.95627182e-04],
       [ 6.01784937e-02,  5.76504907e-02, -9.50626521e-04, ...,
         4.85033090e-03,  2.50011220e-02,  6.92705496e-04]])

In [12]:
#Converting the reconstructed matrix back to a Pandas dataframe
cf_preds_df = pd.DataFrame(all_retailer_predicted_ratings, columns = df_pvt.columns, index=Retailer_Code)
cf_preds_df.head()

Product Code,500003,500004,500031,500034,500047,500053,500069,500070,500071,500084,...,605469,605470,605471,605472,605473,605474,605621,605636,605691,605854
BORE0001,1.097008,0.785602,-0.001449,0.575582,-0.065828,1.843564,0.736611,0.20786,1.5864,0.609598,...,0.01403,0.002875,-0.007668,0.027126,0.038967,-0.004584,0.010072,-0.010515,0.106998,0.001811
BORE0002,1.080333,0.181171,-0.002157,0.296377,-0.04548,1.013091,0.368195,0.082349,1.037599,0.483546,...,0.048552,0.014282,-0.014115,-0.03358,-0.012693,-0.001829,0.000811,0.014423,0.074383,0.001672
BORE0004,0.287464,0.009824,0.001013,0.135079,-0.007684,0.318131,0.059713,0.01841,0.118103,0.053734,...,0.01833,-0.005963,-0.005102,-0.003046,-0.002624,-0.000516,-0.001156,-0.001775,0.005624,0.00016
BORE0006,0.332288,1.664588,0.00076,0.500841,-0.037599,0.500223,0.31698,0.064278,0.173631,0.079116,...,0.08472,-0.009962,-0.014881,-0.004223,0.006328,-0.003464,0.002417,0.016129,0.041853,0.000765
BORE0007,0.010572,-0.008054,-0.000299,0.006295,-0.0002,0.053506,0.015693,0.007347,0.035634,0.013321,...,0.002802,-0.001321,0.000206,0.003469,0.000928,0.000433,-0.000127,-0.000939,0.00558,-1.6e-05
