# Collaborative Filtering

**Item Based**: which takes similarities between items’ consumption histories

**User Based**: that considers similarities between user consumption histories and item similarities

In [1]:
#Import libraries
import pandas as pd
from scipy.spatial.distance import cosine

In [2]:
data = pd.read_csv("../data/groceries.csv")

In [3]:
data.head(100)

Unnamed: 0,Person,item
0,1,citrus fruit
1,1,semi-finished bread
2,1,margarine
3,1,ready soups
4,2,tropical fruit
5,2,yogurt
6,2,coffee
7,3,whole milk
8,4,pip fruit
9,4,yogurt


In [4]:
#Assume that for all items only one quantity was bought 

**Exercise 1** Add a column to data : `Quantity` that has value 1 

In [5]:
data["Quantity"] = 1

In [6]:
data.head()

Unnamed: 0,Person,item,Quantity
0,1,citrus fruit,1
1,1,semi-finished bread,1
2,1,margarine,1
3,1,ready soups,1
4,2,tropical fruit,1


In [7]:
len(pd.unique(data.item))

169

In [8]:
#This particular view isn't very helpful for us for analysis.
#This way of data being arranged is called LONG
#We need it in wide format

In [9]:
#Converting data from long to wide format
dataWide = data.pivot("Person", "item", "Quantity")

In [10]:
dataWide.head()

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
Person,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,1.0,
3,,,,,,,,,,,...,,,,,,,,1.0,,
4,,,,,,,,,,,...,,,,,,,,,1.0,
5,,,,,,,,,,,...,,,,,,,,1.0,,


**Exercise 2**
Print the data for Person number 2

In [11]:
dataWide[dataWide.index==2]

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
Person,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,,,,,,,,,,,...,,,,,,,,,1,


In [12]:
dataWide.iloc[1:2,:]

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
Person,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,,,,,,,,,,,...,,,,,,,,,1,


In [13]:
dataWide.loc[2,:]

item
Instant food products   NaN
UHT-milk                NaN
abrasive cleaner        NaN
artif. sweetener        NaN
baby cosmetics          NaN
baby food               NaN
bags                    NaN
baking powder           NaN
bathroom cleaner        NaN
beef                    NaN
berries                 NaN
beverages               NaN
bottled beer            NaN
bottled water           NaN
brandy                  NaN
brown bread             NaN
butter                  NaN
butter milk             NaN
cake bar                NaN
candles                 NaN
candy                   NaN
canned beer             NaN
canned fish             NaN
canned fruit            NaN
canned vegetables       NaN
cat food                NaN
cereals                 NaN
chewing gum             NaN
chicken                 NaN
chocolate               NaN
                         ..
soda                    NaN
soft cheese             NaN
softener                NaN
sound storage medium    NaN
soups          

**Exercise 3** Print the data for row number 2

In [14]:
dataWide.iloc[1,:]

item
Instant food products   NaN
UHT-milk                NaN
abrasive cleaner        NaN
artif. sweetener        NaN
baby cosmetics          NaN
baby food               NaN
bags                    NaN
baking powder           NaN
bathroom cleaner        NaN
beef                    NaN
berries                 NaN
beverages               NaN
bottled beer            NaN
bottled water           NaN
brandy                  NaN
brown bread             NaN
butter                  NaN
butter milk             NaN
cake bar                NaN
candles                 NaN
candy                   NaN
canned beer             NaN
canned fish             NaN
canned fruit            NaN
canned vegetables       NaN
cat food                NaN
cereals                 NaN
chewing gum             NaN
chicken                 NaN
chocolate               NaN
                         ..
soda                    NaN
soft cheese             NaN
softener                NaN
sound storage medium    NaN
soups          

In [15]:
#Replace NA with 0 
dataWide.fillna(0, inplace=True)

In [16]:
dataWide.head()

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
Person,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


# Item-based Collaborative Filtering

In item based collaborative filtering we do not care about the user column

In [17]:
#Drop the Person column
data_ib = dataWide.copy()

In [18]:
data_ib.head()

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
Person,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [19]:
data_ib = data_ib.reset_index()

In [20]:
data_ib.head()


item,Person,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [21]:
#Drop the Person column
#data_ib = data_ib.iloc[:,1:]
data_ib = data_ib.drop("Person", axis=1)

In [22]:
data_ib.head()

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [23]:
# Create a placeholder dataframe listing item vs. item
data_ibs = pd.DataFrame(index=data_ib.columns,
                        columns=data_ib.columns)

In [24]:
data_ibs.head()

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Instant food products,,,,,,,,,,,...,,,,,,,,,,
UHT-milk,,,,,,,,,,,...,,,,,,,,,,
abrasive cleaner,,,,,,,,,,,...,,,,,,,,,,
artif. sweetener,,,,,,,,,,,...,,,,,,,,,,
baby cosmetics,,,,,,,,,,,...,,,,,,,,,,


## Similarity Measure 

We will now find similarities.

We will use `cosine similarity`

<img src="img/cosine.png" >

The resulting similarity ranges from −1 meaning exactly opposite, to 1 meaning exactly the same, with 0 indicating orthogonality (decorrelation), and in-between values indicating intermediate similarity or dissimilarity.

*src* https://en.wikipedia.org/wiki/Cosine_similarity

In essense the cosine similarity takes the sum product of the first and second column, then divides that by the product of the square root of the sum of squares of each column.

In [25]:
for i in range(0,len(data_ibs.columns)) :
    # Loop through the columns for each column
    for j in range(0,len(data_ibs.columns)) :
      # Fill in placeholder with cosine similarities
      data_ibs.ix[i,j] = 1-cosine(data_ib.ix[:,i],data_ib.ix[:,j])

In [26]:
data_ibs.head()

item,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Instant food products,1.0,0.0248112,0.0,0.0,0,0,0,0.0255878,0.0,0.0396234,...,0.0251577,0.0140636,0.0173605,0.0296613,0,0.044236,0.0,0.0673304,0.0425243,0.0409311
UHT-milk,0.0248112,1.0,0.0,0.0389841,0,0,0,0.0376158,0.0212202,0.0339786,...,0.0308196,0.0275659,0.0595491,0.0622915,0,0.0352245,0.0120949,0.0428914,0.108655,0.0401143
abrasive cleaner,0.0,0.0,1.0,0.0298807,0,0,0,0.0,0.03253,0.044647,...,0.0,0.0422577,0.017388,0.0509286,0,0.0249222,0.0123608,0.0539498,0.045634,0.0
artif. sweetener,0.0,0.0389841,0.0298807,1.0,0,0,0,0.0402042,0.0,0.00778216,...,0.0,0.0220971,0.00909241,0.0066578,0,0.0173762,0.0,0.0387901,0.0524977,0.0
baby cosmetics,0.0,0.0,0.0,0.0,1,0,0,0.0309492,0.0,0.0,...,0.0,0.0,0.020998,0.0153755,0,0.0200643,0.0,0.0244315,0.0,0.0


With our similarity matrix filled out we can look for each items “neighbour” by looping through ‘data_ibs’, sorting each column in descending order, and grabbing the name of each of the top 3 products.

In [27]:
data_neighbours = pd.DataFrame(index=data_ibs.columns,columns=range(1,4))
 
# Loop through our similarity dataframe and fill in neighbouring item names
for i in range(0,len(data_ibs.columns)):
    data_neighbours.ix[i,:3] = data_ibs.ix[0:,i].sort_values(ascending=False)[:3].index

In [28]:
data_neighbours

Unnamed: 0_level_0,1,2,3
item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Instant food products,Instant food products,hamburger meat,canned fish
UHT-milk,UHT-milk,bottled water,yogurt
abrasive cleaner,abrasive cleaner,preservation products,cleaner
artif. sweetener,artif. sweetener,potato products,salad dressing
baby cosmetics,baby cosmetics,cream,dish cleaner
baby food,baby food,finished products,soups
bags,bags,tidbits,frozen potato products
baking powder,baking powder,whole milk,sugar
bathroom cleaner,bathroom cleaner,cleaner,liver loaf
beef,beef,root vegetables,other vegetables


**Exercise 4** Modify the above code to print the top 10 similar products for each product

In [29]:
data_neighbours = pd.DataFrame(index=data_ibs.columns,columns=range(1,11))
 
# Loop through our similarity dataframe and fill in neighbouring item names
for i in range(0,len(data_ibs.columns)):
    data_neighbours.ix[i,:10] = data_ibs.ix[0:,i].sort_values(ascending=False)[:10].index
    
data_neighbours

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10
item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Instant food products,Instant food products,hamburger meat,canned fish,other vegetables,whole milk,root vegetables,curd,rolls/buns,butter,kitchen utensil
UHT-milk,UHT-milk,bottled water,yogurt,other vegetables,soda,coffee,margarine,domestic eggs,brown bread,citrus fruit
abrasive cleaner,abrasive cleaner,preservation products,cleaner,curd cheese,root vegetables,dish cleaner,other vegetables,salad dressing,rice,berries
artif. sweetener,artif. sweetener,potato products,salad dressing,skin care,candles,flour,rum,yogurt,frankfurter,mustard
baby cosmetics,baby cosmetics,cream,dish cleaner,cookware,syrup,kitchen towels,soups,butter milk,oil,sweet spreads
baby food,baby food,finished products,soups,cake bar,pasta,soft cheese,butter milk,dessert,salty snack,waffles
bags,bags,tidbits,frozen potato products,pickled vegetables,frozen vegetables,napkins,pork,fruit/vegetable juice,pip fruit,pastry
baking powder,baking powder,whole milk,sugar,whipped/sour cream,other vegetables,cooking chocolate,flour,margarine,domestic eggs,yogurt
bathroom cleaner,bathroom cleaner,cleaner,liver loaf,decalcifier,root vegetables,soda,other vegetables,liquor (appetizer),berries,napkins
beef,beef,root vegetables,other vegetables,whole milk,rolls/buns,pork,yogurt,citrus fruit,margarine,whipped/sour cream


# User Based collaborative Filtering

The process for creating a User Based recommendation system is as follows:

1. Have Item-Based similarity matrix
2. Check which items the user has consumed
3. For each item the user has consumed, get the top X neighbours
4. Get the consumption record of the user for each neighbour.
5. Compute similarity score
6. Recommend the items with the highest score

In [30]:
#Helper function to get similarity scores
def getScore(history, similarities):
   return sum(history*similarities)/sum(similarities)

#Understand what this function does ! 

In [31]:
data_sims1 = dataWide.reset_index()

In [32]:
data_sims1.head()

item,Person,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [33]:
# Create a place holder matrix for similarities, and fill in the user name column
data_sims = pd.DataFrame(index=data_sims1.index,columns=data_sims1.columns)
data_sims.ix[:,:1] = data_sims1.ix[:,:1]

In [34]:
#This is the same as our original data but with nothing filled in except the headers
data_sims.head()

item,Person,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,1,,,,,,,,,,...,,,,,,,,,,
1,2,,,,,,,,,,...,,,,,,,,,,
2,3,,,,,,,,,,...,,,,,,,,,,
3,4,,,,,,,,,,...,,,,,,,,,,
4,5,,,,,,,,,,...,,,,,,,,,,


In [35]:
data_sims12 = data_sims1.iloc[:500,:]

In [36]:
data_sims11 = data_sims.iloc[:500,:]

In [37]:
for i in range(0,len(data_sims11.index)):
    for j in range(1,len(data_sims11.columns)):
        user = data_sims11.index[i]
        product = data_sims11.columns[j]
 
        if data_sims12.ix[i][j] == 1:
            data_sims11.ix[i][j] = 0
        else:
            product_top_names = data_neighbours.ix[product][1:10]
            product_top_sims = data_ibs.ix[product].sort_values(ascending=False)[1:10]
            user_purchases = data_ib.ix[user,product_top_names]
            data_sims11.ix[i][j] = getScore(user_purchases,product_top_sims)
    print i

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [38]:
# Get the top products
data_recommend = pd.DataFrame(index=data_sims.index, columns=['Person','1','2','3','4','5','6'])
data_recommend.ix[0:,0] = data_sims.ix[:,0]


In [39]:
# Instead of top product scores, we want to see names
for i in range(0,len(data_sims.index)):
    data_recommend.ix[i,1:] = data_sims.ix[i,:].sort_values(ascending=False).ix[1:7,].index.transpose()

In [40]:
# Print a sample
data_recommend.ix[:10,:4]

Unnamed: 0,Person,1,2,3
0,1,candles,hair spray,oil
1,2,seasonal products,pip fruit,beverages
2,3,cereals,curd,domestic eggs
3,4,butter milk,tropical fruit,canned fish
4,5,coffee,cereals,chocolate
5,6,curd,cleaner,domestic eggs
6,7,frankfurter,sausage,spread cheese
7,8,red/blush wine,dishes,bottled water
8,9,prosecco,dog food,finished products
9,10,soap,curd,domestic eggs


This case/code was inspired from
http://www.salemmarafi.com/code/collaborative-filtering-with-python/

Look into that link for more information

More links:
http://blogs.gartner.com/martin-kihn/how-to-build-a-recommender-system-in-python/