In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import pairwise_distances 

In [None]:
# read data
df = pd.read_excel(r'C:\Users\Ravideep\Desktop\expdata.xlsx')

In [None]:
df.drop(columns=['Unnamed: 0'],inplace=True)
df

Unnamed: 0,User,Metric,Total clicks,Total Views,Click_by_View
0,1,SalestoDate__Number,24,57,0.421053
1,1,NumberOfTransactionProduct__Barchart,34,64,0.531250
2,1,GrowthVsTime__Barchart,29,39,0.743590
3,1,SalesVsTime__Linechart,12,50,0.240000
4,1,AverageOrderValueVsTime__Linechart,44,66,0.666667
...,...,...,...,...,...
165,30,NumberOfTransactionMarket__Linechart,39,54,0.722222
166,30,MostAnomalousMarkets__Piechart,56,71,0.788732
167,30,SalestoDate__Number,33,41,0.804878
168,30,LeastAnomalousProducts__Barchart,15,18,0.833333


In [None]:
#label encoding metric column while maintaining the dictionary mapping
df["Metric"] = df["Metric"].astype('category')
cat_mapping = dict( zip( df['Metric'].cat.codes, df['Metric'] ) )
df["Metric"] = df["Metric"].cat.codes

In [None]:
#function for predicting user-user and item-item collaborative engine
def predict(feedbacks, similarity, type='user'):
    if type == 'user':
        mean_user_feedback = feedbacks.mean(axis=1)
        feedbacks_diff = (feedbacks - mean_user_feedback[:, np.newaxis])
        pred = mean_user_feedback[:, np.newaxis] + similarity.dot(feedbacks_diff) / np.array([np.abs(similarity).sum(axis=1)]).T
    elif type == 'item':
        pred = feedbacks.dot(similarity) / np.array([np.abs(similarity).sum(axis=1)])
    return pred

In [None]:
def get_recommendations(col_number,no_of_recomm):
    column = ""
    if col_number == 3:
        column = "Total click"
    elif col_number == 4:
        column = "Total View"
    elif col_number == 5:
        column = "Click_by_View"
    else:
        print("Column entered is invalid!!")
        return
    # extracting unique number of users and items
    n_users = df.User.unique().shape[0]
    n_items = df.Metric.unique().shape[0]
    
    # creating User X Item matrix
    data_matrix = np.zeros((n_users, n_items))
    for line in df.itertuples():
        data_matrix[line[1]-1, line[2]-1] = line[col_number]
        
    # User-User and Item-Item collaborative prediction
    user_similarity = pairwise_distances(data_matrix, metric='cosine')
    item_similarity = pairwise_distances(data_matrix.T, metric='cosine')
    pd.DataFrame(data = user_similarity).to_excel("User similarity Matrix using column- "+column+".xlsx")
    pd.DataFrame(data = item_similarity).to_excel("Item similarity Matrix using column- "+column+".xlsx")
    
    user_prediction = predict(data_matrix, user_similarity, type='user')
    item_prediction = predict(data_matrix, item_similarity, type='item')
    pd.DataFrame(data = user_prediction).to_excel("User-User prediction Matrix (User X Item) using column- "+column+".xlsx")
    pd.DataFrame(data = item_prediction).to_excel("Item-Item prediction Matrix (User X Item) using column- "+column+".xlsx")
    
    # averaging User-User and Item-Item prediction scores
    prediction_matrix = (user_prediction + item_prediction)/2
    pd.DataFrame(data = prediction_matrix).to_excel("Prediction Matrix using column- "+column+".xlsx")
    
    # Generating top 5 recommendations for each user
    recommendations = pd.DataFrame(columns = ['UserID', "Recommended Metrics (Highest to Lowest rating)"])
    for i in range(len(data_matrix)):
        # filtering out present metrics for each user
        t = data_matrix[i].copy()
        t = np.where(t==0, 1, t)
        t = np.where(t>1, 0, t)
        y = prediction_matrix[i] * t
        # selecting top no_of_recomm recommended metrics
        indexes = sorted(range(len(y)), key=lambda j: y[j])[-no_of_recomm:]
        indexes.reverse()
        # mapping labels with metric names and saving the recommendations
        string = ""
        for k in indexes:
            string += cat_mapping[k] + "|"
        string = string[:-1]
        met = "Metric "
        lst = []
        for l in range(no_of_recomm):
            lst.append(met+str(l+1))
        recommendations = recommendations.append({'UserID' : i+1, 'Recommended Metrics (Highest to Lowest rating)' : string},ignore_index = True)
        recommendations[lst] = recommendations["Recommended Metrics (Highest to Lowest rating)"].str.split('|',expand=True)
    recommendations.drop(columns=["Recommended Metrics (Highest to Lowest rating)"],inplace=True)
    recommendations.to_excel("Recommendations using column- "+column+".xlsx")
    return


In [None]:
command = input("Select\n0 for Total clicks\n1 for Total Views\n2 for Click by View\n")
no_of_recomm = input("Enter number of recommendations required")
# recommendation system
get_recommendations(int(command)+3,int(no_of_recomm))

Select
0 for Total clicks
1 for Total Views
2 for Click by View
2
Enter number of recommendations required5
