In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [7]:
aisles = pd.read_csv('../input/aisles.csv')
departments = pd.read_csv('../input/departments.csv')
prior = pd.read_csv('../input/order_products__prior.csv')
train = pd.read_csv('../input/order_products__train.csv')
orders = pd.read_csv('../input/orders.csv')
products = pd.read_csv('../input/products.csv')

In [8]:
df_orders = pd.merge(train, products, on='product_id')
df_orders.head()

Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id
0,1,49302,1,1,Bulgarian Yogurt,120,16
1,816049,49302,7,1,Bulgarian Yogurt,120,16
2,1242203,49302,1,1,Bulgarian Yogurt,120,16
3,1383349,49302,11,1,Bulgarian Yogurt,120,16
4,1787378,49302,8,0,Bulgarian Yogurt,120,16


In [20]:
products_aisles = pd.merge(products, aisles, on = 'aisle_id', how = 'left')
products_description = pd.merge(products_aisles, departments, on = 'department_id', how = 'left')
products_description.head()

Unnamed: 0,product_id,product_name,aisle_id,department_id,aisle,department
0,1,Chocolate Sandwich Cookies,61,19,cookies cakes,snacks
1,2,All-Seasons Salt,104,13,spices seasonings,pantry
2,3,Robust Golden Unsweetened Oolong Tea,94,7,tea,beverages
3,4,Smart Ones Classic Favorites Mini Rigatoni Wit...,38,1,frozen meals,frozen
4,5,Green Chile Anytime Sauce,5,13,marinades meat preparation,pantry


In [21]:
products_description['metadata'] = products_description.apply(lambda x : x['aisle']+' '+x['department']+' '+x['product_name'], axis = 1)

In [25]:
products_description.head()

Unnamed: 0,product_id,product_name,aisle_id,department_id,aisle,department,metadata
0,1,Chocolate Sandwich Cookies,61,19,cookies cakes,snacks,cookies cakes snacks Chocolate Sandwich Cookies
1,2,All-Seasons Salt,104,13,spices seasonings,pantry,spices seasonings pantry All-Seasons Salt
2,3,Robust Golden Unsweetened Oolong Tea,94,7,tea,beverages,tea beverages Robust Golden Unsweetened Oolong...
3,4,Smart Ones Classic Favorites Mini Rigatoni Wit...,38,1,frozen meals,frozen,frozen meals frozen Smart Ones Classic Favorit...
4,5,Green Chile Anytime Sauce,5,13,marinades meat preparation,pantry,marinades meat preparation pantry Green Chile ...


In [22]:
count_vec = CountVectorizer(stop_words='english')
count_vec_matrix = count_vec.fit_transform(products_description['metadata'])

In [23]:
# finds similar vector in the count_vec_matrix, this will be helpful in generating recommendation for new products which doesn't exist in association rules.

def vectorize_products_based_on_metadata(product_input):

    vec = count_vec.transform(pd.Series(product_input))

    simil = cosine_similarity(vec, count_vec_matrix)

    simil_scores = pd.DataFrame(simil.reshape(49688,), index = products_description.index, columns=['score'])

    # Don't return scores of zero, only as many positive scores as exist
    non_zero_scores = simil_scores[simil_scores['score'] > 0]

    #if product is new & does't belong to any aisle then we will receive this error.
    if len(non_zero_scores) == 0:
        print('No similar products found.  Please refine your search terms and try again')
        return

    #if we have less than 10 products to recommend
    if len(non_zero_scores) < 10:
        item_count = len(non_zero_scores)
    else:
        #if we have more than 10 products to recommend, we will limit the recommendation to top 10 products based on similarity scores.
        item_count = 10

    similarity_scores = simil_scores.sort_values(['score'], ascending=False)[:item_count]

    return (products_description['product_name'].iloc[similarity_scores.index])

In [24]:
vectorize_products_based_on_metadata('Bubble Bath')

42098                 Eucalyptus Ease Bubble Bath
42324                 Eucalyptus Mint Bubble Bath
10036           Over Tired and Cranky Bubble Bath
11065                    Bubble Bath Extra Gentle
42859    Super Sensitive No Fragrance Bubble Bath
18225                            Baby Bubble Bath
3490           Everyday Shea Lavender Bubble Bath
16699                      Baby Bee Bubble Bath -
7544                     Baby Bedtime Bubble Bath
35662                         Calming Bubble Bath
Name: product_name, dtype: object