In [None]:
"""
Importing all requried libraries.
"""

import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import median_absolute_error
from collections import OrderedDict
import random
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
    
ratings = pd.read_csv("Amazon_Reviews.csv")  

# Creating user-item matrix to be used in collaborative system

def create_matrix(df):      
    C = len(df['customer_id'].unique())
    P = len(df['product_id'].unique())   
## Mapping product, customer Ids to indices
    customer_mapper = dict(zip(np.unique(df["customer_id"]), list(range(C))))
    product_mapper = dict(zip(np.unique(df["product_id"]), list(range(P))))  
## Mapping indices to product, customer IDs
    customer_inv_mapper = dict(zip(list(range(C)), np.unique(df["customer_id"])))
    product_inv_mapper = dict(zip(list(range(P)), np.unique(df["product_id"])))
    customer_index = [customer_mapper[i] for i in df['customer_id']]
    product_index = [product_mapper[i] for i in df['product_id']]
    X = csr_matrix((df["star_rating"], (product_index, customer_index)), shape=(P, C))
    return X, customer_mapper, product_mapper, customer_inv_mapper, product_inv_mapper
  
X, customer_mapper, product_mapper, customer_inv_mapper, product_inv_mapper = create_matrix(ratings)
  
"""
Creating recommendations by KNN
"""
def knn_product(product,product_mapper, X, k, metric='cosine', show_distance=False):
    neighbour_ids = []     
    product_ind = product_mapper[product]
    product_vec = X[product_ind]
    k+=1
    kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
    kNN.fit(X)
    product_vec = product_vec.reshape(1,-1)
    neighbour = kNN.kneighbors(product_vec, return_distance=show_distance)
    for i in range(0,k):
        n = neighbour.item(i)
        neighbour_ids.append(product_inv_mapper[n])
    neighbour_ids.pop(0)
    return neighbour_ids
  
product_titles = dict(zip(ratings['product_id'], ratings['product_title']))


def process_text(text):
    text = ' '.join(text.split())    
    text = text.lower()
    return text
  
## Function of Hybrid recommendation system    
def hybrid_recommendation(product,ratings_hybrid,hybrid_data,Actual_data):
    X1, customer_mapper1, product_mapper1, customer_inv_mapper1, product_inv_mapper1 = create_matrix(ratings_hybrid)
    product_titles1 = dict(zip(ratings['product_id'], ratings['product_title']))
    similar_ids1 = knn_product(product,product_mapper1, X1, k=5)
    
    print("Hybrid System Recommends:")
    item_no=0
    for i in similar_ids1:
        item_no+=1
        print("Item",item_no,":",product_titles1[i])

    rating_list_hybrid =[]
    for i in similar_ids1:
        rating_list_hybrid.append(sum(list(ratings['star_rating'][ratings['product_id'] == i]))/len(list(ratings['star_rating'][ratings['product_id'] == i])))

    ## Predicted  
    print("  ")
    Predicted_hybrid = [sum(rating_list_hybrid)/len(rating_list_hybrid)]
#    print("Predicted is ",round(Predicted_hybrid[0],3))
    ## Actual
#    print('Actual is',round(Actual_hybrid[0],3))
#    print("Mean Squared Error for hyrbid is",round(mean_squared_error(Predicted_hybrid,Actual_hybrid),3))
#    print("Mean Absolute Error for hyrbid is",round(median_absolute_error(Predicted_hybrid,Actual_hybrid),3))
    Predicted.append(Predicted_hybrid[0])
    Actual.append(Actual_data[0])
    MSE.append(mean_squared_error(Predicted_hybrid,hybrid_data))
    MAE.append(median_absolute_error(Predicted_hybrid,hybrid_data))
    precision_hybrid, recall_hybrid, f1_hybrid=metrics(Predicted_hybrid,rating_list_hybrid)
    Precision.append(precision_hybrid),Recall.append(recall_hybrid),F1.append(f1_hybrid)
    
    Type =['Collab','Content','Hybrid']
    plotting(Predicted,Actual,F1,Type,'Predicted','Actual')
    plotting(MAE,MSE,F1,Type,'MAE','MSE')
    plotting(Precision,Recall,F1,Type,'Precision','Recall')
     
## Function to plot multiple graphs  
def plotting(Predicted,Actual,F_score,Type,Name1,Name2):
    plt.plot(Type,Predicted,marker ='o',linestyle='--',color='r',label=Name1)
    plt.plot(Type,Actual,marker ='o',linestyle='--',color='b',label=Name2)
    if Name1 == 'Precision':
        plt.plot(Type,F_score,marker ='o',linestyle='--',color='g',label='F-Score')
    plt.xlabel('Recommendation Type')
    plt.title(product)
    plt.ylabel('Value')
    plt.legend()
    plt.show()
    
## Funtion to calculate evaluation metrics
def metrics(Predicted,type_list):
    tp,fn,fp,tn,threshold=0,0,0,0,3.5
    for i in type_list:
        if(i>=threshold):
            if(Predicted[0]>=threshold):
                tp = tp+1
            else:
                fn = fn+1
        else:
            if(Predicted[0]>=threshold):
                fp = fp+1
            else:
                tn = tn+1  
    if tp==0:
        precision,recall,f1 =0,0,0
    else:
        precision = tp/(tp+fp)
        recall = tp/(tp+fn)
        f1 = (2*precision*recall)/(precision+recall)
#    print('Precision is', precision)
#    print('Recall is', recall)
#    print('F1 is',f1)    
    return precision,recall,f1
    

In [None]:
## Text pre-processing for collaborative system


ratings['product_title'] = ratings.apply(lambda x: process_text(x.product_title),axis=1)
product_list,product_id=[],[]
 
for i in ratings['product_title']:
    product_list.append(i)

for i in ratings['product_id']:
    product_id.append(i)

## Selecting random products to be used in the recommedation systems     
list_input =[]
for i in range(0,1):
    random.seed(170)
    if random.choice(list(ratings['product_id'])) not in list_input:
        list_input.append(random.choice(list(ratings['product_id'])))

for productid in list_input:
    similar_ids = knn_product(productid,product_mapper, X, k=5)
    product_titles = dict(zip(ratings['product_id'], ratings['product_title']))
    product_title = product_titles[productid]
    product = productid    
    Predicted,Actual, MSE, MAE =[],[],[],[]  
    Precision, Recall, F1 =[],[],[]

    print(f"Since you purchased {product_titles[productid]}")
    print(" ")
    print("Collaborative System recommends:")
    item_no=0
    for i in similar_ids:
        item_no +=1
        print("Item",item_no,":",product_titles[i])
    
    rating_list =[]
    for i in similar_ids:
        rating_list.append(sum(list(ratings['star_rating'][ratings['product_title'] == product_titles[i]]))/len(list(ratings['star_rating'][ratings['product_title'] == product_titles[i]])))
    ## Predicted  
    print("  ")
    Predicted_collab = [sum(rating_list)/len(rating_list)]
#    print("Predicted for ",product,"is",round(Predicted_collab[0],3))

    ## Actual
    Actual_collab =[sum(list(ratings['star_rating'][ratings['product_id'] == productid]))/len(list(ratings['star_rating'][ratings['product_id'] == productid]))]
#    print('Actual for ',product,'is',round(Actual_collab[0],3))
#    print("Mean Squared Error is",round(mean_squared_error(Predicted_collab,Actual_collab),3))
#    print("Mean Absolute Error is",round(median_absolute_error(Predicted_collab,Actual_collab),3))

    Predicted.append(Predicted_collab[0])
    Actual.append(Actual_collab[0])
    MSE.append(mean_squared_error(Predicted_collab,Actual_collab))
    MAE.append(median_absolute_error(Predicted_collab,Actual_collab))
    precision_collab, recall_collab, f1_collab =metrics(Predicted_collab,rating_list)
    Precision.append(precision_collab),Recall.append(recall_collab),F1.append(f1_collab)

## Content based system    
## Converting text of product title/description to a vector for collaborative system    
    tf_idf = TfidfVectorizer(stop_words='english')
    tf_idf_matrix = tf_idf.fit_transform(ratings['product_title']);
    tf_idf_matrix 

## Calculating cosine similarity between items converted to vectors
    cosine_similarity_matrix = cosine_similarity(tf_idf_matrix, tf_idf_matrix)
    item = product_id.index(productid)
    cosine_similarity_matrix[item]
    similarity_scores = pd.DataFrame(cosine_similarity_matrix[item], columns=["score"])
    similarity_scores.sort_values(by='score',ascending=False).head(10)
    product_indices = similarity_scores.sort_values("score", ascending=False)[0:1000].index
    
    recommend_list =[]
    for i in product_indices:
        recommend_list.append(product_list[i])
    recommend_list = list(dict.fromkeys(recommend_list))
    print("Content System recommeds:")
    item_no=0
    for i in range(1,len(recommend_list)):
        if i<6:
            item_no+=1
            print("Item",item_no,":",recommend_list[i])
    rating_list_content =[]
    ab =0
    for i in recommend_list:
        if ab <10:
            rating_list_content.append(sum(list(ratings['star_rating'][ratings['product_title'] == i]))/len(list(ratings['star_rating'][ratings['product_title'] == i])))
            ab+=1
    ## Predicted  
    print("  ")
    Predicted_content = [sum(rating_list_content)/len(rating_list_content)]
#    print("Predicted content score is",round(Predicted_content[0],3))
#    print("Actual content score is",round(Actual_content[0],3))   
#    print("Mean Squared Error is",round(mean_squared_error(Predicted_content,Actual_content),3))
#    print("Mean Absolute Error is",round(median_absolute_error(Predicted_content,Actual_content),3))  

    product_indices_hybrid = similarity_scores.sort_values("score", ascending=False)[0:1000].index
    precision_content, recall_content, f1_content=metrics(Predicted_content,rating_list_content)
    Precision.append(precision_content),Recall.append(recall_content),F1.append(f1_content)
    
    Predicted.append(Predicted_content[0])
    Actual.append(Actual_collab[0])
    MSE.append(mean_squared_error(Predicted_content,Actual_collab))
    MAE.append(median_absolute_error(Predicted_content,Actual_collab))
    hybrid_product[0]
    hybrid_dataset = pd.DataFrame()
    for i in product_indices_hybrid:    
        hybrid_dataset = hybrid_dataset.append(ratings.iloc[[i]]) 
    hybrid_product = list(set(ratings['product_id'][ratings['product_title'] == recommend_list[1]]))
    Actual_hybrid =[sum(list(ratings['star_rating'][ratings['product_id'] == hybrid_product[0]]))/len(list(ratings['star_rating'][ratings['product_id'] == hybrid_product[0]]))]
    hybrid_recommendation(hybrid_product[0],hybrid_dataset,Actual_hybrid,Actual_collab)
    

    