In [None]:
# from collections import OrderedDict
from collections import Counter
import numpy as np
import pandas
import heapq

visit_popularity = Counter()
purchase_popularity = Counter()

with open('train.txt', 'r') as f:
    for line in f.readlines():
        visits, purchases = line.strip().split(';')
        for v in visits.split(","):
            if v.strip() != '':
                visit_popularity[v] += 1
        for p in purchases.split(","):
            if p.strip() != '':
                purchase_popularity[p] += 1

def recommend_by_purchase(items, max_count):
    return heapq.nlargest(max_count, OrderedDict.fromkeys(items), key=lambda x: purchase_popularity.get(x, 0))

def recommend_by_visit(items, max_count):
    return heapq.nlargest(max_count, OrderedDict.fromkeys(items), key=lambda x: visit_popularity.get(x, 0))

def calculate_metrics(recommend, max_count, sessions_file):
    with open(sessions_file, 'r') as f:
        avg_recall = np.zeros(max_count)
        avg_precision = np.zeros(max_count)
        sessions_count = 0
        for line in f.readlines():
            visits, purchases = line.strip().split(';')
            if purchases != '':
                visits = visits.split(',')
                purchases = purchases.split(',')
                rec = recommend(visits, max_count)
                
                avg_recall[len(rec)-1] += len(set(purchases).intersection(rec)) / len(purchases)
                avg_precision[len(rec)-1] += len(set(purchases).intersection(rec)) / len(rec)
                sessions_count += 1

    return pandas.DataFrame({
            'k': np.arange(max_count) + 1,
            'avg_recall@k': [round(x, 2) for x in avg_recall / sessions_count],
            'avg_precision@k': [round(x, 2) for x in avg_precision / sessions_count]
    }).set_index('k')

print(calculate_metrics(recommend_by_purchase, 5, 'train.txt'))
print(calculate_metrics(recommend_by_purchase, 5, 'test.txt'))
print(calculate_metrics(recommend_by_visit, 5, 'train.txt'))
print(calculate_metrics(recommend_by_visit, 5, 'test.txt'))
    
