<a href="https://colab.research.google.com/github/JRSmiffy/recommendationSystem/blob/master/eCommerce2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# eCommerce Basket Recommendations

This notebook complements an eCommerce website demo. In the demo, items of various brands are sold. In order to return recommendations to customers, based on what is in their basket, I have built this recommendation system. It relies on KNN Item Based filtering which finds similar items to ones in the customer's basket, based on dummy purchase history data that I created. The dummy data was designed to reflect patterns across the brands and products.


## Import the modules

In [0]:
import pandas as pd # Handling data (data frame) - switching rows and columns, changing rows titles, randomly swapping rows, viewing csv table, adding random noise to the data.
import random # Adding random noise to the data.

from scipy.sparse import csr_matrix # Allows us to perform kNN on the matrix using scipy lib.
from sklearn.neighbors import NearestNeighbors # importing kNN function

## Load the data

In [0]:
url = 'https://raw.githubusercontent.com/JRSmiffy/recommendationSystem/master/seed240.csv'
seed = pd.read_csv(url)

In [0]:
seed.tail() 

Unnamed: 0,1,1.1,1.2,0,0.1,0.2,0.3,0.4,0.5
202,0,0,0,0,1,0,0,1,0
203,0,1,0,0,0,0,0,1,0
204,0,0,1,0,0,1,0,0,0
205,0,0,0,0,0,1,0,0,1
206,0,0,1,0,0,0,0,0,1


In [0]:
# Viewing the first row
rowOne = seed.loc[ 0 , : ] # The first line (1,1,1,0,0,0,0,0,0) is missing; although the repetition should solve this...
print(rowOne)

print('\n\n')

# Viewing the first row, first column
itemOneRowOne = seed.iloc[0, 0]
print(itemOneRowOne)

1      0
1.1    0
1.2    0
0      1
0.1    1
0.2    1
0.3    0
0.4    0
0.5    0
Name: 0, dtype: int64



0


## Prepare the data

Add some noise to the data
###1 - apply random number of changes per row.
###2 - apply random number of purchases per item.

In [0]:
# For each row in seed:
# Create an arr[] with a random choice of randint(0,2) items from the row
for i in range(206):
  num_items = random.randint(0,2)
  items_to_change = []
  for j in range(num_items):
    items_to_change.append(random.randint(0,8))
  # Remove duplicates
  items_to_change = list(dict.fromkeys(items_to_change))
  
  
  # for each item in arr[]:
  # Calculate a random number between (-1,2)
  # Take purchase amount for this item and add this random number
  # If this number < 0, number = 0  
  for k in range(num_items-1):
    num_purchases = random.randint(-1,2)
    seed.iloc[i,items_to_change[k]] = abs(seed.iloc[i,items_to_change[k]] + num_purchases)
  

In [0]:
seed = seed.transpose() # Switch rows and columns
seed = seed.sample(frac=1) # Randomize rows
seed

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206
0.5,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,2,0,0,0,0,2,1,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,...,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1
0.3,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,3,0,1,1,...,0,0,0,0,0,1,0,1,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0,0,0,0,0
0.1,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,...,0,0,1,1,0,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0
1.1,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,2,0,0,0,2,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
1.2,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,...,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,2,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1
0.0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,...,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,0
0.2,1,0,0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,...,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0
0.4,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,...,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0
1.0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,1,...,0,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0


In [0]:
# Rename the rows with product name
seed.index.values[0] = ".NET Mug"
seed.index.values[1] = ".NET Sheet"
seed.index.values[2] = ".NET Hoodie"

seed.index.values[3] = "Java Mug"
seed.index.values[4] = "Java Sheet"
seed.index.values[5] = "Java Hoodie"

seed.index.values[6] = "Python Mug"
seed.index.values[7] = "Python Sheet"
seed.index.values[8] = "Python Hoodie"

seed


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206
.NET Mug,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,2,0,0,0,0,2,1,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,...,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1
.NET Sheet,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,3,0,1,1,...,0,0,0,0,0,1,0,1,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0,0,0,0,0
.NET Hoodie,1,0,0,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,...,0,0,1,1,0,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0
Java Mug,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,2,0,0,0,2,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
Java Sheet,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,...,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,2,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1
Java Hoodie,1,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,...,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,0,0,0
Python Mug,1,0,0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,...,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0
Python Sheet,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,...,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,1,1,0,0
Python Hoodie,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,1,...,0,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0


## Compute Item Similarity - Train KNN Model for item-based collaborative filtering

In [0]:
seed_pivot = seed
seed_matrix = csr_matrix(seed)


model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute') # How does this work?
model_knn.fit(seed) # How does this work?

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
                 metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                 radius=1.0)

## Calculate the most appropriate recommendations for the basket

In [0]:
def basket_rec(listing):
  
  # Keep a track of which items are the best to recommend
  track_keys = []
  track_values = [] # Having a split dictionary like this prevents values from being overwritten as duplicates are added to the dictionary

  for item in listing:
    query_index = item
    distances, indices = model_knn.kneighbors(seed.iloc[query_index, :].values.reshape(1, -1), n_neighbors = 4) # How does this work?
    # n_neighbours = 4 - returns the 3 nearest neighbours, plus itself

    for i in range(0, len(distances.flatten())):
        if i == 0:
            print('Recommendations for {0}:\n'.format(seed_pivot.index[query_index]))
        else:
            print('{0}: {1}, with distance of {2}:'.format(i, seed_pivot.index[indices.flatten()[i]], distances.flatten()[i]))

        track_values.append(distances.flatten()[i]) # Keep a track of all the distances from the 3 nearest neighbours
        track_keys.append(indices.flatten()[i]) # Keep a track of all the indices linked to the distances from the 3 nearest neighours


    print('\n\n\n')

  
  
  def find_label(value):
    return track_keys[track_values.index(value)]
    # for a given value (cosine similariy to basket item), return the unique index referring to an item {0: .NET Mug, 1: .NET Sheet, 2: .NET Hoodie, 3: Java Mug, 4: Java Sheet, 5: Java Hoodie, 6: Python Mug, 7: Python Sheet, 8: Python Hoodie}
  
  
  
  # Remove all items already in the basket from both lists
  # find the lowest (if it is not is the basket or rec), append to rec - delete key and value from lists. Repeat until rec.length = 3.
  rec = []
  recName = []
  while len(rec) < 3:
      if (find_label(min(track_values)) in rec or find_label(min(track_values)) in listing):
        track_keys.remove(find_label(min(track_values)))
        track_values.remove(min(track_values))
      else:
        rec.append(find_label(min(track_values)))
        recName.append(seed_pivot.index[find_label(min(track_values))])
        
        
  print(rec)
  print(recName)


    
  

In [0]:
basket_rec([1,4,5]) 

# Pass in an array of the unique index positions of each item in the basket {0: .NET Mug, 1: .NET Sheet, 2: .NET Hoodie, 3: Java Mug, 4: Java Sheet, 5: Java Hoodie, 6: Python Mug, 7: Python Sheet, 8: Python Hoodie}
# Return an array of 3 unique index positions for the items to be recommended

# Extend to return tuples with the probabilities - softmax/sigmoid... (plot these tuples?) - items that would get recommended twice or more should have an unusually high rating...

Recommendations for .NET Sheet:

1: .NET Mug, with distance of 0.6637944259323054:
2: Python Hoodie, with distance of 0.7103628214292061:
3: Java Hoodie, with distance of 0.7123557544335283:




Recommendations for Java Sheet:

1: .NET Mug, with distance of 0.618614964301763:
2: Python Mug, with distance of 0.6980701800722291:
3: Python Hoodie, with distance of 0.7592179693625932:




Recommendations for Java Hoodie:

1: Python Hoodie, with distance of 0.5752697724180522:
2: .NET Hoodie, with distance of 0.6863537283182894:
3: .NET Sheet, with distance of 0.7123557544335283:




[8, 0, 2]
['Python Hoodie', '.NET Mug', '.NET Hoodie']
