### This notebook will create a product recommendation engine

#### Import dependencies 

In [56]:
import pandas as pd
import numpy as np
import requests
import json
from re import search
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

### Get setup variables

In [57]:
with open('config.json') as json_file:
    data = json.load(json_file)
    BASE_URL  = data['EC2_API_ENDPOINT']
    MY_TOKEN = data['GUEST_TOKEN']

#### Import dataset by making get request for REST API

In [113]:
base_url = BASE_URL

In [59]:
HEADER = {'Authorization':f'Token {MY_TOKEN}'}

In [64]:
query_url = f"{base_url}product/"
#print(query_url)

In [61]:
db_prods = requests.get(url=query_url, headers=HEADER).json()

#### Create dataframe

In [62]:
df = pd.json_normalize(db_prods)

#### View data

In [65]:
df.head()

Unnamed: 0,id,name,slug,category,brand,retail_price,sale_price,qty,release_date,is_available
0,1,adidas yeezy boost 350 low v2 beluga,adidas-yeezy-boost-350-low-v2-beluga,1,1,220.0,1097.0,498,2016-09-24,True
1,2,adidas yeezy boost 350 v2 core black copper,adidas-yeezy-boost-350-v2-core-black-copper,1,1,220.0,685.0,225,2016-11-23,True
2,3,adidas yeezy boost 350 v2 core black green,adidas-yeezy-boost-350-v2-core-black-green,1,1,220.0,690.0,196,2016-11-23,True
3,4,adidas yeezy boost 350 v2 core black red,adidas-yeezy-boost-350-v2-core-black-red,1,1,220.0,1075.0,302,2016-11-23,True
4,5,adidas yeezy boost 350 v2 core black red 2017,adidas-yeezy-boost-350-v2-core-black-red-2017,1,1,220.0,828.0,969,2017-02-11,True


### Get count of products

In [66]:
df.shape[0]

50

### Convert the text to a matrix of token counts

In [67]:
cm = CountVectorizer().fit_transform(df['slug'])

In [70]:
cm

<50x77 sparse matrix of type '<class 'numpy.int64'>'
	with 345 stored elements in Compressed Sparse Row format>

### Get the cosine similiarity matrix from the count matrix

In [68]:
cs = cosine_similarity(cm)

#### View cosine_similarity maxtrix

In [71]:
cs

array([[1.        , 0.6681531 , 0.6681531 , ..., 0.        , 0.        ,
        0.        ],
       [0.6681531 , 1.        , 0.875     , ..., 0.        , 0.        ,
        0.        ],
       [0.6681531 , 0.875     , 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.8819171 ,
        0.80178373],
       [0.        , 0.        , 0.        , ..., 0.8819171 , 1.        ,
        0.82495791],
       [0.        , 0.        , 0.        , ..., 0.80178373, 0.82495791,
        1.        ]])

#### Get shape of the cosine similiarity matrix

In [72]:
cs.shape

(50, 50)

#### Get a recently purchased or viewed produced

In [181]:
product = 'nike-blazer-mid-off-white-all-hallows-eve'

#### product id

In [182]:
product_id = df[df.slug == product]['id'].values[0]
product_id

37

### Create a list for similiarity score [(product_id, similarity score), (...)]
#### Note: We subtract 1 b/c the product id is in position 1- prodct id

In [183]:
scores = list(enumerate(cs[product_id-1]))
scores[:5]

[(0, 0.0), (1, 0.0), (2, 0.0), (3, 0.0), (4, 0.0)]

#### Sort list

In [184]:
scored_scores = sorted(scores, key = lambda x:x[1], reverse=True)
scored_scores[:5]

[(36, 0.9999999999999999),
 (23, 0.7905694150420948),
 (37, 0.6681531047810608),
 (38, 0.6681531047810608),
 (21, 0.47434164902525683)]

#### Exclude first item in list, the most similiar product will be current product

In [185]:
sorted_scores = scored_scores[1:]
sorted_scores[:5]

[(23, 0.7905694150420948),
 (37, 0.6681531047810608),
 (38, 0.6681531047810608),
 (21, 0.47434164902525683),
 (22, 0.47434164902525683)]

### Recommend 3 products by running a get request for those product ids
#### Note: we add 1 b/c the index is 1 value higher than the actual product id

In [144]:
#base_url = query_url

In [186]:
for i in range(3):
    recommend_prod_id = sorted_scores[i][0]
    #print(recommend_prod_id)
    recommend_prod_id +=1
    
    query_url = f'{base_url}product/{recommend_prod_id}' 
    #print(query_url)
    #print(recommend_prod_id)
    recommend_product = requests.get(url=query_url, headers=HEADER).json()
    print(recommend_product['name'])
    

nike blazer mid off white
nike blazer mid off white grim reaper
nike blazer mid off white wolf grey


#### Save the Cosine Similiarity object as pickle file

In [146]:
import pickle

In [153]:
#pickle.dump(cm, open("countmatrix.pickle", "wb"))

In [154]:
pickle.dump(cs, open("simscores.pickle", "wb"))

### Test the pickle files by running 

In [152]:
sim_scores_model = pickle.load(open('simscores.pickle', 'rb'))
sim_scores_model

array([[1.        , 0.6681531 , 0.6681531 , ..., 0.        , 0.        ,
        0.        ],
       [0.6681531 , 1.        , 0.875     , ..., 0.        , 0.        ,
        0.        ],
       [0.6681531 , 0.875     , 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.8819171 ,
        0.80178373],
       [0.        , 0.        , 0.        , ..., 0.8819171 , 1.        ,
        0.82495791],
       [0.        , 0.        , 0.        , ..., 0.80178373, 0.82495791,
        1.        ]])

In [188]:
def recommend_products(prod_id):
    recommended_proucts = []
    scores = list(enumerate(sim_scores_model[prod_id-1]))
    scored_scores = sorted(scores, key = lambda x:x[1], reverse=True)
    sorted_scores = scored_scores[1:]
    
    for i in range(3):
        recommend_prod_id = sorted_scores[i][0]
        recommend_prod_id +=1

        query_url = f'{base_url}product/{recommend_prod_id}' 
        recommend_product = requests.get(url=query_url, headers=HEADER).json()
        
        recommended_proucts.append(recommend_product)
    return recommended_proucts

    

In [187]:
new_product = 'nike-blazer-mid-off-white-all-hallows-eve'
new_prod_id = 37

In [189]:
recommend_products(new_prod_id)

[{'id': 24,
  'name': 'nike blazer mid off white',
  'slug': 'nike-blazer-mid-off-white',
  'category': 1,
  'brand': 2,
  'retail_price': '130.00',
  'sale_price': '720.00',
  'qty': 501,
  'release_date': '2017-09-09',
  'is_available': True},
 {'id': 38,
  'name': 'nike blazer mid off white grim reaper',
  'slug': 'nike-blazer-mid-off-white-grim-reaper',
  'category': 1,
  'brand': 2,
  'retail_price': '130.00',
  'sale_price': '908.00',
  'qty': 1398,
  'release_date': '2018-10-03',
  'is_available': True},
 {'id': 39,
  'name': 'nike blazer mid off white wolf grey',
  'slug': 'nike-blazer-mid-off-white-wolf-grey',
  'category': 1,
  'brand': 2,
  'retail_price': '130.00',
  'sale_price': '1150.00',
  'qty': 288,
  'release_date': '2018-08-25',
  'is_available': True}]