In [3]:
import numpy as np

# Amazon Ranking Algo

### Aim:
To rank all the items in a certain catergory taking into account both the ratings and the number of ratings given to a product. This is achieved by applying bayesian adjustment on the data set. 

Further optimizations on the algorithim can include:
- Factor in time period of review
- Factor in reputation score of reviews
- Quality of review
- Stock remaining of item, demand, etc?

### Algorithm Description:
Assign a ranking based on the ratings and number of reviews provided to a certain product relative to the average rating and total number of reviews in that category of products.

Assuming a total of N reviews with average rating of R. For each product $i$ with avg. rating $ r_i $ across the $ n_i $ reviews recieved the bayesian ranking is calculated as:
$$ ranking = (N*R + n_i*r_i) / (N+n_i) $$

Input Parameters:
- Total number of ratings (optional)
- Total average rating (optional)
- A dictionary of all products and recieved ratings for each:
    - Key : product name/id
    - Value : list of ratings given

Output Paraments:
- A dictionary of all the products and their ranking:
    - Key : product name/id
    - Value: bayseian ranking
    

In [17]:
def Bayseian_Ranking(item_ratings, total_num_rating=None, total_avg_rating=None):
   
    if total_num_rating == None or total_avg_rating == None:
        
        total_num_rating = 0
        total_rating = 0
        
        for item in item_ratings.keys():
            ratings = item_ratings[item]
            total_num_rating += len(ratings)
            total_rating += sum(ratings)
        
        total_avg_rating = total_rating/total_num_rating
            
    print(" Total number of revies: " + str(total_num_rating))
    print(" Total average rating :  " + str(total_avg_rating))
        
    item_ranking = {}

    for item in item_ratings.keys():

        ratings = item_ratings[item]
        num_ratings = len(ratings)
        avg_rating = np.mean(ratings)

        bayseian_rating = (total_ratings*total_avg_rating + num_ratings*avg_rating)/(total_ratings+num_ratings)

        item_ranking[item] = bayseian_rating

    return item_ranking 

In [18]:
# Test case
item_ratings = {
    "Canon" : [5,5,5],
    "HP" : [4.5, 5, 4, 5, 4, 5, 3, 5, 4.5, 3.5] 
}

Bayseian_Ranking(item_ratings, 50, 4)

 Total number of revies: 50
 Total average rating :  4


{'Canon': 4.056603773584905, 'HP': 4.058333333333334}