In [169]:
import pandas as pd
from collections import Counter, OrderedDict
from __future__ import division

In [2]:
class OrderedCounter(Counter, OrderedDict):
    'Counter that remembers the order elements are first encountered'

    def __repr__(self):
        return '%s(%r)' % (self.__class__.__name__, OrderedDict(self))

    def __reduce__(self):
        return self.__class__, (OrderedDict(self),)

In [168]:
def id_freq(X, order=True):
    if order:
        return OrderedCounter(X.dropna().str.cat(sep=',').split(','))
    else:
        return Counter(X.dropna().str.cat(sep=',').split(','))

def sort_by_pop(row, freq_dict):
    row = np.unique(row.split(","), return_index=True)
    row = pd.Series(row[0], index=row[1]).sort_index().tolist()
    return sorted(row, key=freq_dict.get, reverse=True)


def metrics_count(view, bought):
    
    def metrics(v, b):
        b = b.split(',')

        appear_5 = len(set(v[:5]).intersection(b))

        presicion_5 = appear_5/5
        recall_5 = appear_5/min(5,len(b))
        presicion_1 = int(v[0] in b)
        recall_1 = presicion_1/len(b)
        
        return [recall_1, presicion_1, recall_5, presicion_5]

    return map(lambda v,b: metrics(v,b), view, bought)

In [158]:
train = pd.read_csv("coursera_sessions_train.txt", sep=';', header=None, names=["view", "bought"])
test = pd.read_csv("coursera_sessions_test.txt", sep=";", header=None, names=["view", "bought"])

In [159]:
train_vfo = id_freq(train['view'])
train_bfo = id_freq(train["bought"])

train_vf = id_freq(test['view'], order=False)
train_bf = id_freq(test['bought'], order=False)

train.dropna(inplace=True)
test.dropna(inplace=True)

train["view_sorted"] = map(lambda x: sort_by_pop(x, train_vfo), train["view"])
test["view_sorted"] = map(lambda x: sort_by_pop(x, train_vf), test["view"])
train["bought_sorted"] = map(lambda x: sort_by_pop(x, train_bfo), train["view"])
test["bought_sorted"] = map(lambda x: sort_by_pop(x, train_bf), test["view"])

train.head()

Unnamed: 0,view,bought,view_sorted,bought_sorted
7,59606162606364656661676867,676063,"[63, 64, 60, 61, 65, 66, 67, 68, 59, 62]","[60, 63, 67, 59, 61, 62, 64, 65, 66, 68]"
10,848586878889849091929386,86,"[85, 93, 89, 90, 84, 92, 86, 87, 91, 88]","[86, 85, 93, 84, 87, 88, 89, 90, 91, 92]"
19,138198199127,199,"[127, 138, 198, 199]","[138, 199, 127, 198]"
30,303304305306307308309310311312,303,"[303, 306, 304, 307, 309, 310, 305, 308, 311, ...","[303, 304, 305, 306, 307, 308, 309, 310, 311, ..."
33,352353352,352,"[352, 353]","[352, 353]"


In [165]:
train_cls = train.copy().dropna()
test_cls = test.copy().dropna()


train_metrics_vs = metrics_count(train_cls['view_sorted'], train_cls["bought"])
test_metrics_vs = metrics_count(test_cls["view_sorted"], test_cls['bought'])
train_metrics_bs = metrics_count(train_cls['bought_sorted'], train_cls["bought"])
test_metrics_bs = metrics_count(test_cls["bought_sorted"], test_cls['bought'])

In [161]:
answer1 = map(lambda x: str(round(x,2)),pd.DataFrame(train_metrics_vs).mean())
answer2 = map(lambda x: str(round(x,2)),pd.DataFrame(test_metrics_vs).mean())
answer3 = map(lambda x: str(round(x,2)),pd.DataFrame(train_metrics_bs).mean())
answer4 = map(lambda x: str(round(x,2)),pd.DataFrame(test_metrics_bs).mean())

In [163]:
for i,a in enumerate([answer1,answer2,answer3, answer4],1):
    file = "ans" + str(i) + ".txt"
    with open(file, "w") as f:
        f.write(" ".join(a))