# Recommendation engine

In [1]:
import pandas

In [2]:
import networkx

In [3]:
user_brand = pandas.read_csv('../../SYD_DAT_7/data/user_brand.csv')

In [4]:
user_brand

Unnamed: 0,ID,Store
0,80002,Target
1,80002,Home Depot
2,80010,Levi's
3,80010,Puma
4,80010,Cuisinart
5,80010,Converse
6,80010,DKNY
7,80010,Express
8,80010,Kohl's
9,80010,Old Navy


In [6]:
# networkx.read_edgelist('../../SYD_DAT_7/data/user_brand.csv')

In [17]:
for index, row in user_brand.iterrows():
    print(row['ID'], row['Store'])
    break # print only 1 for test

80002 Target


In [32]:
graph = networkx.Graph()
nodes_already_added = set()
for index, row in user_brand.iterrows():
    store = row['Store']
    person = row['ID']
    if store not in nodes_already_added:
        graph.add_node(store)
        nodes_already_added.add(store)
    if person not in nodes_already_added:
        graph.add_node(person)
        nodes_already_added.add(person)
    graph.add_edge(store, person)

In [33]:
user_brand.Store.nunique()

198

In [34]:
user_brand.ID.unique()

array([80002, 80010, 80011, ..., 91946, 91955, 91957])

In [35]:
networkx.degree_centrality(graph)

{'Target': 0.47168857431749245,
 80002: 0.0005055611729019212,
 'Home Depot': 0.29979777553083925,
 "Levi's": 0.11931243680485339,
 80010: 0.0025278058645096056,
 'Puma': 0.0884732052578362,
 'Cuisinart': 0.12790697674418605,
 'Converse': 0.11526794742163803,
 'DKNY': 0.08291203235591507,
 'Express': 0.19843276036400406,
 "Kohl's": 0.2924671385237614,
 'Old Navy': 0.3033367037411527,
 'Container Store': 0.14256825075834176,
 'Nordstrom': 0.22851365015166836,
 'Kenneth Cole': 0.09251769464105157,
 80011: 0.0035389282103134483,
 'Calvin Klein': 0.12032355915065723,
 'French Connection': 0.05763397371081901,
 'BCBGMAXAZRIA': 0.10844287158746209,
 'Nine West': 0.12360970677451973,
 'Steve Madden': 0.13624873609706775,
 'Diesel': 0.06268958543983823,
 'Guess': 0.12866531850353893,
 'Banana Republic': 0.23559150657229525,
 'Gap': 0.2173913043478261,
 'Restoration Hardware': 0.10364004044489383,
 'Crate & Barrel': 0.20626895854398383,
 80015: 0.0010111223458038423,
 'Tommy Hilfiger': 0.064964

In [36]:
degs = networkx.degree_centrality(graph)

In [42]:
deg_series = pandas.Series(index=degs.keys(), data=list(degs.values())) # python 3 need to cast list

In [43]:
deg_series

Target                  0.471689
80002                   0.000506
Home Depot              0.299798
Levi's                  0.119312
80010                   0.002528
Puma                    0.088473
Cuisinart               0.127907
Converse                0.115268
DKNY                    0.082912
Express                 0.198433
Kohl's                  0.292467
Old Navy                0.303337
Container Store         0.142568
Nordstrom               0.228514
Kenneth Cole            0.092518
80011                   0.003539
Calvin Klein            0.120324
French Connection       0.057634
BCBGMAXAZRIA            0.108443
Nine West               0.123610
Steve Madden            0.136249
Diesel                  0.062690
Guess                   0.128665
Banana Republic         0.235592
Gap                     0.217391
Restoration Hardware    0.103640
Crate & Barrel          0.206269
80015                   0.001011
Tommy Hilfiger          0.064965
80020                   0.000253
          

In [44]:
deg_series.nlargest()

Target             0.471689
Old Navy           0.303337
Home Depot         0.299798
Kohl's             0.292467
Banana Republic    0.235592
dtype: float64

In [47]:
list(networkx.jaccard_coefficient(graph, [('Target', "Kohl's")]))

[('Target', "Kohl's", 0.4008341056533828)]

In [49]:
list(networkx.jaccard_coefficient(graph, [('Target', "Tommy Hilfiger")]))

[('Target', 'Tommy Hilfiger', 0.086489252814739)]

In [52]:
list(networkx.jaccard_coefficient(graph, [('Target', "Michael Kors")]))

[('Target', 'Michael Kors', 0.0021413276231263384)]

In [53]:
all_shoppers = user_brand.ID.unique()

In [58]:
user_brand[user_brand.ID==91946]

Unnamed: 0,ID,Store
23794,91946,Levi's
23795,91946,Old Navy
23796,91946,Target
23797,91946,Nordstrom


In [71]:
similarity = networkx.jaccard_coefficient(graph,
                                         [(91946, x) for x in all_shoppers if x!=91946])

In [72]:
# list(similarity)

In [73]:
import numpy
like_91946 = pandas.DataFrame(data=numpy.array(list(similarity)), columns=['self', 'other_shoppers', 'jaccard'])
like_91946

Unnamed: 0,self,other_shoppers,jaccard
0,91946.0,80002.0,0.200000
1,91946.0,80010.0,0.272727
2,91946.0,80011.0,0.125000
3,91946.0,80015.0,0.142857
4,91946.0,80020.0,0.000000
5,91946.0,80021.0,0.076923
6,91946.0,80027.0,0.000000
7,91946.0,80032.0,0.375000
8,91946.0,80042.0,0.000000
9,91946.0,80043.0,0.000000


In [74]:
like_91946.sort_values('jaccard', ascending=False)

Unnamed: 0,self,other_shoppers,jaccard
632,91946.0,82031.0,0.600000
1921,91946.0,86215.0,0.600000
2443,91946.0,88045.0,0.600000
2521,91946.0,88283.0,0.600000
2594,91946.0,88525.0,0.600000
2835,91946.0,89227.0,0.600000
2437,91946.0,88026.0,0.600000
3181,91946.0,90228.0,0.600000
581,91946.0,81886.0,0.600000
3190,91946.0,90259.0,0.571429


In [81]:
most_similar_shoppers = list(like_91946[like_91946.jaccard > 0.55].other_shoppers.astype(int))
most_similar_shoppers

[81886, 82031, 86215, 88026, 88045, 88283, 88525, 89227, 90228, 90259]

In [82]:
def is_similar_shopper(x):
    return x in most_similar_shoppers

recommended_shops = user_brand[user_brand.ID.map(is_similar_shopper)].Store.value_counts()
recommended_shops

Target          9
Levi's          8
Old Navy        8
Kohl's          6
Nordstrom       6
BCBGMAXAZRIA    1
Gap             1
Puma            1
Converse        1
Nine West       1
Home Depot      1
Name: Store, dtype: int64

In [83]:
list(networkx.clique.find_cliques(graph))

[[81932, 'Express'],
 [81932, 'Kenneth Cole'],
 [81932, 'Nambe'],
 [81932, 'Nine West'],
 [90126, 'Guess'],
 [90128, 'Diesel'],
 [90128, 'Banana Republic'],
 [81937, 'Nordstrom'],
 [81937, 'BCBGMAXAZRIA'],
 [81937, 'Calvin Klein'],
 [81937, 'Tommy Hilfiger'],
 [81937, 'New Balance'],
 [90129, 'Nordstrom'],
 [90129, 'BCBGMAXAZRIA'],
 [81942, 'J.Crew'],
 [81942, 'Express'],
 [81942, 'Banana Republic'],
 [90135, 'Home Depot'],
 [90135, 'Calvin Klein'],
 [90135, 'Elie Tahari'],
 [90135, "Kohl's"],
 [81944, 'Nine West'],
 [81948, 'J.Crew'],
 [81949, 'Cuisinart'],
 [81952, 'Kenneth Cole'],
 [81956, 'Restoration Hardware'],
 [90150, "Levi's"],
 [90152, 'Banana Republic'],
 [81961, 'Home Depot'],
 [81965, 'Home Depot'],
 [90164, 'Steve Madden'],
 [90165, 'Kenneth Cole'],
 [90165, 'BCBGMAXAZRIA'],
 [90165, 'Guess'],
 [90165, 'Columbia'],
 [90175, 'Nordstrom'],
 [81985, 'J.Crew'],
 [81985, 'Lacoste'],
 [81985, 'BCBGMAXAZRIA'],
 [81985, 'Converse'],
 [81985, 'Columbia'],
 [81985, "Levi's"],
 [819

In [89]:
import math
def amazon_weight(store):
    return math.sqrt(networkx.degree(graph, store))

In [90]:
amazon_weight('Apple')

1.4142135623730951

In [91]:
amazon_weight('Target')

43.197222132910355

In [100]:
def amazon_cosine(store1, store2):
    return len(
        list(networkx.common_neighbors(graph, store1, store2))) / amazon_weight(store1) / amazon_weight(store2)

In [101]:
amazon_cosine('Gap', 'Target')

0.5344211329155709

In [102]:
amazon_cosine('Gap', 'Kohl\'s')

0.39999723622690947

In [105]:
similar_stores = [
    (amazon_cosine(store1, store2), store1, store2)
    for store1 in user_brand.Store.unique()
    for store2 in user_brand.Store.unique() 
    if store1 != store2
]

In [106]:
similar_stores.sort(reverse=True)
similar_stores

[(1.0, 'Yves Saint Laurent', 'Billionaire Boys Club'),
 (1.0, 'YSL', 'Lancome'),
 (1.0, 'YSL', 'Charles David'),
 (1.0, 'YSL', 'Bali'),
 (1.0, 'YSL', 'Armani Exchange'),
 (1.0, 'Vince Camuto', 'BCBGeneration'),
 (1.0, 'TOMS Shoes', 'Keds'),
 (1.0, 'Stuart Weitzman', 'Rachel Zoe'),
 (1.0, 'Roxy', 'Rip Curl'),
 (1.0, 'Roxy', "O'Neill"),
 (1.0, 'Roxy', 'Billabong'),
 (1.0, 'Rip Curl', 'Roxy'),
 (1.0, 'Rip Curl', "O'Neill"),
 (1.0, 'Rip Curl', 'Billabong'),
 (1.0, 'Rachel Zoe', 'Stuart Weitzman'),
 (1.0, "O'Neill", 'Roxy'),
 (1.0, "O'Neill", 'Rip Curl'),
 (1.0, "O'Neill", 'Billabong'),
 (1.0, 'Marmot', 'La Sportiva'),
 (1.0, 'Marmot', 'Five Ten'),
 (1.0, 'Marmot', 'Black Diamond'),
 (1.0, 'MINKPINK', "Bloomingdale's"),
 (1.0, 'Lord & Taylor', 'Boden'),
 (1.0, 'Lancome', 'YSL'),
 (1.0, 'Lancome', 'Charles David'),
 (1.0, 'Lancome', 'Bali'),
 (1.0, 'Lancome', 'Armani Exchange'),
 (1.0, 'La Sportiva', 'Marmot'),
 (1.0, 'La Sportiva', 'Five Ten'),
 (1.0, 'La Sportiva', 'Black Diamond'),
 (1.0,