# 基于用户标签的推荐

In [1]:
# 导入包
import random
import math
import time
from tqdm import tqdm

## 一. 通用函数定义

In [2]:
# 定义装饰器，监控运行时间
def timmer(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        res = func(*args, **kwargs)
        stop_time = time.time()
        print('Func %s, run time: %s' % (func.__name__, stop_time - start_time))
        return res
    return wrapper

### 1. 数据处理相关
Delicious-2k数据集
1. load data
2. split data

In [14]:
class Dataset():
    
    def __init__(self, fp):
        # fp: data file path
        self.data = self.loadData(fp)
    
    @timmer
    def loadData(self, fp):
        data = [f.strip().split('\t')[:3] for f in open(fp).readlines()[1:]]
        new_data = {}
        for user, item, tag in data:
            if user not in new_data:
                new_data[user] = {}
            if item not in new_data[user]:
                new_data[user][item] = set()
            new_data[user][item].add(tag)
        ret = []
        for user in new_data:
            for item in new_data[user]:
                ret.append((user, item, list(new_data[user][item])))
        return ret
    
    @timmer
    def splitData(self, M, k, seed=1):
        '''
        :params: data, 加载的所有(user, item)数据条目
        :params: M, 划分的数目，最后需要取M折的平均
        :params: k, 本次是第几次划分，k~[0, M)
        :params: seed, random的种子数，对于不同的k应设置成一样的
        :return: train, test
        '''
        # 按照(user, item)作为key进行划分
        train, test = [], []
        random.seed(seed)
        for user, item, tags in self.data:
            # 这里与书中的不一致，本人认为取M-1较为合理，因randint是左右都覆盖的
            if random.randint(0, M-1) == k:  
                test.append((user, item, tags))
            else:
                train.append((user, item, tags))

        # 处理成字典的形式，user->set(items)
        def convert_dict(data):
            data_dict = {}
            for user, item, tags in data:
                if user not in data_dict:
                    data_dict[user] = {}
                data_dict[user][item] = tags
            return data_dict

        return convert_dict(train), convert_dict(test)

### 2. 评价指标
1. Precision
2. Recall
3. Coverage
4. Diversity
5. Popularity(Novelty)

In [32]:
class Metric():
    
    def __init__(self, train, test, GetRecommendation):
        '''
        :params: train, 训练数据
        :params: test, 测试数据
        :params: GetRecommendation, 为某个用户获取推荐物品的接口函数
        '''
        self.train = train
        self.test = test
        self.GetRecommendation = GetRecommendation
        self.recs = self.getRec()
        
    # 为test中的每个用户进行推荐
    def getRec(self):
        recs = {}
        for user in self.test:
            rank = self.GetRecommendation(user)
            recs[user] = rank
        return recs
        
    # 定义精确率指标计算方式
    def precision(self):
        all, hit = 0, 0
        for user in self.test:
            test_items = set(self.test[user])
            rank = self.recs[user]
            for item, score in rank:
                if item in test_items:
                    hit += 1
            all += len(rank)
        return round(hit / all * 100, 2)
    
    # 定义召回率指标计算方式
    def recall(self):
        all, hit = 0, 0
        for user in self.test:
            test_items = set(self.test[user])
            rank = self.recs[user]
            for item, score in rank:
                if item in test_items:
                    hit += 1
            all += len(test_items)
        return round(hit / all * 100, 2)
    
    # 定义覆盖率指标计算方式
    def coverage(self):
        all_item, recom_item = set(), set()
        for user in self.train:
            for item in self.train[user]:
                all_item.add(item)
        for user in self.test:
            rank = self.recs[user]
            for item, score in rank:
                recom_item.add(item)
        return round(len(recom_item) / len(all_item) * 100, 2)
    
    # 定义多样性指标计算方式
    def diversity(self):
        # 计算item_vec，每个tag的个数
        item_tags = {}
        for user in self.train:
            for item in self.train[user]:
                if item not in item_tags:
                    item_tags[item] = {}
                for tag in self.train[user][item]:
                    if tag not in item_tags[item]:
                        item_tags[item][tag] = 0
                    item_tags[item][tag] += 1
        
        # 计算两个item的相似度
        def CosineSim(u, v):
            ret = 0
            for tag in item_tags[u]:
                if tag in item_tags[v]:
                    ret += item_tags[u][tag] * item_tags[v][tag]
            nu, nv = 0, 0
            for tag in item_tags[u]:
                nu += item_tags[u][tag] ** 2
            for tag in item_tags[v]:
                nv += item_tags[v][tag] ** 2
            return ret / math.sqrt(nu * nv)
        
        # 计算Diversity
        div = []
        for user in self.test:
            rank = self.recs[user]
            sim, cnt = 0, 0
            for u, _ in rank:
                for v, _ in rank:
                    if u == v:
                        continue
                    sim += CosineSim(u, v)
                    cnt += 1
            sim = sim / cnt if sim != 0 else 0
            div.append(1 - sim)
        return sum(div) / len(div)   
    
    # 定义新颖度指标计算方式
    def popularity(self):
        # 计算物品的流行度，为给这个物品打过标签的用户数
        item_pop = {}
        for user in self.train:
            for item in self.train[user]:
                if item not in item_pop:
                    item_pop[item] = 0
                item_pop[item] += 1

        num, pop = 0, 0
        for user in self.test:
            rank = self.recs[user]
            for item, score in rank:
                # 取对数，防止因长尾问题带来的被流行物品所主导
                pop += math.log(1 + item_pop[item])
                num += 1
        return round(pop / num, 6)
    
    def eval(self):
        metric = {'Precision': self.precision(),
                  'Recall': self.recall(),
                  'Coverage': self.coverage(),
                  'Diversity': self.diversity(),
                  'Popularity': self.popularity()}
        print('Metric:', metric)
        return metric

## 二. 算法实现
1. SimpleTagBased
2. TagBasedTFIDF
3. TagBasedTFIDF++
4. TagExtend

In [21]:
# 1. 基于热门标签的推荐
def SimpleTagBased(train, N):
    '''
    :params: train, 训练数据集
    :params: N, 超参数，设置取TopN推荐物品数目
    :return: GetRecommendation，推荐接口函数
    '''
    # 统计user_tags和tag_items
    user_tags, tag_items = {}, {}
    for user in train:
        user_tags[user] = {}
        for item in train[user]:
            for tag in train[user][item]:
                if tag not in user_tags[user]:
                    user_tags[user][tag] = 0
                user_tags[user][tag] += 1
                if tag not in tag_items:
                    tag_items[tag] = {}
                if item not in tag_items[tag]:
                    tag_items[tag][item] = 0
                tag_items[tag][item] += 1
    
    def GetRecommendation(user):
        # 按照打分推荐N个未见过的
        if user not in user_tags:
            return []
        seen_items = set(train[user])
        item_score = {}
        for tag in user_tags[user]:
            for item in tag_items[tag]:
                if item in seen_items:
                    continue
                if item not in item_score:
                    item_score[item] = 0
                item_score[item] += user_tags[user][tag] * tag_items[tag][item]
        item_score = list(sorted(item_score.items(), key=lambda x: x[1], reverse=True))
        return item_score[:N]
    
    return GetRecommendation

In [22]:
# 2. 改进一：为热门标签加入惩罚项
def TagBasedTFIDF(train, N):
    '''
    :params: train, 训练数据集
    :params: N, 超参数，设置取TopN推荐物品数目
    :return: GetRecommendation，推荐接口函数
    '''
    # 统计user_tags和tag_items
    user_tags, tag_items = {}, {}
    # 统计标签的热门程度，即打过此标签的不同用户数
    tag_pop = {}
    for user in train:
        user_tags[user] = {}
        for item in train[user]:
            for tag in train[user][item]:
                if tag not in user_tags[user]:
                    user_tags[user][tag] = 0
                user_tags[user][tag] += 1
                if tag not in tag_items:
                    tag_items[tag] = {}
                if item not in tag_items[tag]:
                    tag_items[tag][item] = 0
                tag_items[tag][item] += 1
                if tag not in tag_pop:
                    tag_pop[tag] = set()
                tag_pop[tag].add(user)
    tag_pop = {k: len(v) for k, v in tag_pop.items()}
    
    def GetRecommendation(user):
        # 按照打分推荐N个未见过的
        if user not in user_tags:
            return []
        seen_items = set(train[user])
        item_score = {}
        for tag in user_tags[user]:
            for item in tag_items[tag]:
                if item in seen_items:
                    continue
                if item not in item_score:
                    item_score[item] = 0
                item_score[item] += user_tags[user][tag] * tag_items[tag][item] / tag_pop[tag]
        item_score = list(sorted(item_score.items(), key=lambda x: x[1], reverse=True))
        return item_score[:N]
    
    return GetRecommendation

In [23]:
# 3. 改进二：同时也为热门商品加入惩罚项
def TagBasedTFIDF_Improved(train, N):
    '''
    :params: train, 训练数据集
    :params: N, 超参数，设置取TopN推荐物品数目
    :return: GetRecommendation，推荐接口函数
    '''
    # 统计user_tags和tag_items
    user_tags, tag_items = {}, {}
    # 统计标签和物品的热门程度，即打过此标签的不同用户数，和物品对应的不同用户数
    tag_pop, item_pop = {}, {}
    for user in train:
        user_tags[user] = {}
        for item in train[user]:
            if item not in item_pop:
                item_pop[item] = 0
            item_pop[item] += 1
            for tag in train[user][item]:
                if tag not in user_tags[user]:
                    user_tags[user][tag] = 0
                user_tags[user][tag] += 1
                if tag not in tag_items:
                    tag_items[tag] = {}
                if item not in tag_items[tag]:
                    tag_items[tag][item] = 0
                tag_items[tag][item] += 1
                if tag not in tag_pop:
                    tag_pop[tag] = set()
                tag_pop[tag].add(user)
    tag_pop = {k: len(v) for k, v in tag_pop.items()}
    
    def GetRecommendation(user):
        # 按照打分推荐N个未见过的
        if user not in user_tags:
            return []
        seen_items = set(train[user])
        item_score = {}
        for tag in user_tags[user]:
            for item in tag_items[tag]:
                if item in seen_items:
                    continue
                if item not in item_score:
                    item_score[item] = 0
                item_score[item] += user_tags[user][tag] * tag_items[tag][item] / tag_pop[tag] / item_pop[item]
        item_score = list(sorted(item_score.items(), key=lambda x: x[1], reverse=True))
        return item_score[:N]
    
    return GetRecommendation

In [44]:
# 4. 基于标签改进的推荐
def ExpandTagBased(train, N, M=20):
    '''
    :params: train, 训练数据集
    :params: N, 超参数，设置取TopN推荐物品数目
    :params: M，超参数，设置取TopM的标签填补不满M个标签的用户
    :return: GetRecommendation，推荐接口函数
    '''
    
    # 1. 计算标签之间的相似度
    item_tag = {}
    for user in train:
        for item in train[user]:
            if item not in item_tag:
                item_tag[item] = set()
            for tag in train[user][item]:
                item_tag[item].add(tag)
    tag_sim, tag_cnt = {}, {}
    for item in item_tag:
        for u in item_tag[item]:
            if u not in tag_cnt:
                tag_cnt[u] = 0
            tag_cnt[u] += 1
            if u not in tag_sim:
                tag_sim[u] = {}
            for v in item_tag[item]:
                if u == v:
                    continue
                if v not in tag_sim[u]:
                    tag_sim[u][v] = 0
                tag_sim[u][v] += 1
    for u in tag_sim:
        for v in tag_sim[u]:
            tag_sim[u][v] /= math.sqrt(tag_cnt[u] * tag_cnt[v])
    
    # 2. 为每个用户扩展标签
    user_tags = {}
    for user in train:
        if user not in user_tags:
            user_tags[user] = {}
        for item in train[user]:
            for tag in train[user][item]:
                if tag not in user_tags[user]:
                    user_tags[user][tag] = 0
                user_tags[user][tag] += 1
    expand_tags = {}
    for user in user_tags:
        if len(user_tags[user]) >= M:
            expand_tags[user] = user_tags[user]
            continue
        # 不满M个的进行标签扩展
        expand_tags[user] = {}
        seen_tags = set(user_tags[user])
        for tag in user_tags[user]:
            for t in tag_sim[tag]:
                if t in seen_tags:
                    continue
                if t not in expand_tags[user]:
                    expand_tags[user][t] = 0
                expand_tags[user][t] += user_tags[user][tag] * tag_sim[tag][t]
        expand_tags[user].update(user_tags[user])
        expand_tags[user] = dict(list(sorted(expand_tags[user].items(), key=lambda x: x[1], reverse=True))[:M])
        
    # 3. SimpleTagBased算法
    tag_items = {}
    for user in train:
        for item in train[user]:
            for tag in train[user][item]:
                if tag not in tag_items:
                    tag_items[tag] = {}
                if item not in tag_items[tag]:
                    tag_items[tag][item] = 0
                tag_items[tag][item] += 1
    
    def GetRecommendation(user):
        # 按照打分推荐N个未见过的
        if user not in user_tags:
            return []
        seen_items = set(train[user])
        item_score = {}
        for tag in expand_tags[user]:
            for item in tag_items[tag]:
                if item in seen_items:
                    continue
                if item not in item_score:
                    item_score[item] = 0
                item_score[item] += expand_tags[user][tag] * tag_items[tag][item]
        item_score = list(sorted(item_score.items(), key=lambda x: x[1], reverse=True))
        return item_score[:N]
    
    return GetRecommendation

## 三. 实验
1. SimpleTagBased实验
2. TagBasedTFIDF实验
3. TagBasedTFIDF++实验
4. TagExtend

M=10, N=10

In [38]:
class Experiment():
    
    def __init__(self, M, N, fp='../dataset/delicious-2k/user_taggedbookmarks.dat', rt='SimpleTagBased'):
        '''
        :params: M, 进行多少次实验
        :params: N, TopN推荐物品的个数
        :params: fp, 数据文件路径
        :params: rt, 推荐算法类型
        '''
        self.M = M
        self.N = N
        self.fp = fp
        self.rt = rt
        self.alg = {'SimpleTagBased': SimpleTagBased, 'TagBasedTFIDF': TagBasedTFIDF, \
                    'TagBasedTFIDF_Improved': TagBasedTFIDF_Improved, 'ExtendTagBased': ExpandTagBased}
    
    # 定义单次实验
    @timmer
    def worker(self, train, test):
        '''
        :params: train, 训练数据集
        :params: test, 测试数据集
        :return: 各指标的值
        '''
        getRecommendation = self.alg[self.rt](train, self.N)
        metric = Metric(train, test, getRecommendation)
        return metric.eval()
    
    # 多次实验取平均
    @timmer
    def run(self):
        metrics = {'Precision': 0, 'Recall': 0, 
                   'Coverage': 0, 'Diversity': 0, 
                   'Popularity': 0}
        dataset = Dataset(self.fp)
        for ii in range(self.M):
            train, test = dataset.splitData(self.M, ii)
            print('Experiment {}:'.format(ii))
            metric = self.worker(train, test)
            metrics = {k: metrics[k]+metric[k] for k in metrics}
        metrics = {k: metrics[k] / self.M for k in metrics}
        print('Average Result (M={}, N={}): {}'.format(\
                              self.M, self.N, metrics))

In [33]:
# 1. SimpleTagBased实验
M, N = 10, 10
exp = Experiment(M, N, rt='SimpleTagBased')
exp.run()

Func loadData, run time: 1.6280088424682617
Func splitData, run time: 0.30851316452026367
Experiment 0:
Metric: {'Precision': 0.33, 'Recall': 0.54, 'Coverage': 3.33, 'Diversity': 0.7889366782206686, 'Popularity': 2.341392}
Func worker, run time: 37.870625019073486
Func splitData, run time: 0.3097972869873047
Experiment 1:
Metric: {'Precision': 0.36, 'Recall': 0.59, 'Coverage': 3.37, 'Diversity': 0.789191306584079, 'Popularity': 2.326798}
Func worker, run time: 38.06450700759888
Func splitData, run time: 0.32140111923217773
Experiment 2:
Metric: {'Precision': 0.36, 'Recall': 0.59, 'Coverage': 3.37, 'Diversity': 0.7930642205047819, 'Popularity': 2.327752}
Func worker, run time: 43.02850008010864
Func splitData, run time: 0.32935285568237305
Experiment 3:
Metric: {'Precision': 0.29, 'Recall': 0.48, 'Coverage': 3.35, 'Diversity': 0.7980044140029352, 'Popularity': 2.3653}
Func worker, run time: 39.16614294052124
Func splitData, run time: 0.1974170207977295
Experiment 4:
Metric: {'Precision'

In [35]:
# 2. TagBasedTFIDF实验
M, N = 10, 10
exp = Experiment(M, N, rt='TagBasedTFIDF')
exp.run()

Func loadData, run time: 1.6277968883514404
Func splitData, run time: 0.27590298652648926
Experiment 0:
Metric: {'Precision': 0.38, 'Recall': 0.62, 'Coverage': 16.84, 'Diversity': 0.8817864660115259, 'Popularity': 1.324191}
Func worker, run time: 46.15612602233887
Func splitData, run time: 0.31597304344177246
Experiment 1:
Metric: {'Precision': 0.39, 'Recall': 0.64, 'Coverage': 16.95, 'Diversity': 0.8826858063646551, 'Popularity': 1.316902}
Func worker, run time: 43.69584107398987
Func splitData, run time: 0.24825787544250488
Experiment 2:
Metric: {'Precision': 0.35, 'Recall': 0.58, 'Coverage': 16.95, 'Diversity': 0.8810856212597441, 'Popularity': 1.32838}
Func worker, run time: 43.3360550403595
Func splitData, run time: 0.26052021980285645
Experiment 3:
Metric: {'Precision': 0.3, 'Recall': 0.5, 'Coverage': 16.98, 'Diversity': 0.8852701028022301, 'Popularity': 1.324043}
Func worker, run time: 43.02037310600281
Func splitData, run time: 0.26059913635253906
Experiment 4:
Metric: {'Precis

In [36]:
# 3. TagBasedTFIDF++实验
M, N = 10, 10
exp = Experiment(M, N, rt='TagBasedTFIDF_Improved')
exp.run()

Func loadData, run time: 1.2623248100280762
Func splitData, run time: 0.2863779067993164
Experiment 0:
Metric: {'Precision': 0.14, 'Recall': 0.23, 'Coverage': 19.4, 'Diversity': 0.859877838307336, 'Popularity': 0.786183}
Func worker, run time: 54.93890690803528
Func splitData, run time: 0.2523970603942871
Experiment 1:
Metric: {'Precision': 0.16, 'Recall': 0.27, 'Coverage': 19.36, 'Diversity': 0.8617994094261496, 'Popularity': 0.785819}
Func worker, run time: 54.65705108642578
Func splitData, run time: 0.26293516159057617
Experiment 2:
Metric: {'Precision': 0.18, 'Recall': 0.3, 'Coverage': 19.48, 'Diversity': 0.861349178757724, 'Popularity': 0.787125}
Func worker, run time: 54.77145004272461
Func splitData, run time: 0.2572140693664551
Experiment 3:
Metric: {'Precision': 0.15, 'Recall': 0.24, 'Coverage': 19.32, 'Diversity': 0.8633524800153738, 'Popularity': 0.78599}
Func worker, run time: 54.72025799751282
Func splitData, run time: 0.2647433280944824
Experiment 4:
Metric: {'Precision':

In [45]:
# 4. TagExtend实验
M, N = 10, 10
exp = Experiment(M, N, rt='ExtendTagBased')
exp.run()

Func loadData, run time: 1.888315200805664
Func splitData, run time: 0.18341422080993652
Experiment 0:
Metric: {'Precision': 0.33, 'Recall': 0.54, 'Coverage': 3.37, 'Diversity': 0.7882770482685956, 'Popularity': 2.338341}
Func worker, run time: 45.58587598800659
Func splitData, run time: 0.342771053314209
Experiment 1:
Metric: {'Precision': 0.37, 'Recall': 0.61, 'Coverage': 3.45, 'Diversity': 0.7884184200805971, 'Popularity': 2.323208}
Func worker, run time: 43.79095387458801
Func splitData, run time: 0.18767595291137695
Experiment 2:
Metric: {'Precision': 0.36, 'Recall': 0.6, 'Coverage': 3.47, 'Diversity': 0.7920836566910633, 'Popularity': 2.323179}
Func worker, run time: 45.01177382469177
Func splitData, run time: 0.3437650203704834
Experiment 3:
Metric: {'Precision': 0.29, 'Recall': 0.47, 'Coverage': 3.39, 'Diversity': 0.7975400160363582, 'Popularity': 2.361645}
Func worker, run time: 40.95514512062073
Func splitData, run time: 0.3429849147796631
Experiment 4:
Metric: {'Precision': 

## 四. 实验结果
1. SimpleTagBased实验

    Running time: 404.8816478252411
    
    Average Result (M=10, N=10): {'Precision': 0.33699999999999997, 'Recall': 0.5529999999999999, 'Coverage': 3.3609999999999998, 'Diversity': 0.7913794301955859, 'Popularity': 2.3396786}
     
2. TagBasedTFIDF实验
    
    Running time: 443.55260705947876
    
    Average Result (M=10, N=10): {'Precision': 0.352, 'Recall': 0.5799999999999998, 'Coverage': 16.952, 'Diversity': 0.8829974324199723, 'Popularity': 1.3243864}
     
3. TagBasedTFIDF_Improved实验
    
    Running time: 551.4401750564575
    
    Average Result (M=10, N=10): {'Precision': 0.16299999999999998, 'Recall': 0.267, 'Coverage': 19.410999999999998, 'Diversity': 0.8612131974012064, 'Popularity': 0.7858693999999999}

4. ExtendTagBased实验

    Running time: 430.87147402763367
    
    Average Result (M=10, N=10): {'Precision': 0.34400000000000003, 'Recall': 0.5660000000000001, 'Coverage': 3.4150000000000005, 'Diversity': 0.7904256291985878, 'Popularity': 2.336292}

## 附：运行日志（请双击看）

1. SimpleTagBased实验
Func loadData, run time: 1.6280088424682617
Func splitData, run time: 0.30851316452026367
Experiment 0:
Metric: {'Precision': 0.33, 'Recall': 0.54, 'Coverage': 3.33, 'Diversity': 0.7889366782206686, 'Popularity': 2.341392}
Func worker, run time: 37.870625019073486
Func splitData, run time: 0.3097972869873047
Experiment 1:
Metric: {'Precision': 0.36, 'Recall': 0.59, 'Coverage': 3.37, 'Diversity': 0.789191306584079, 'Popularity': 2.326798}
Func worker, run time: 38.06450700759888
Func splitData, run time: 0.32140111923217773
Experiment 2:
Metric: {'Precision': 0.36, 'Recall': 0.59, 'Coverage': 3.37, 'Diversity': 0.7930642205047819, 'Popularity': 2.327752}
Func worker, run time: 43.02850008010864
Func splitData, run time: 0.32935285568237305
Experiment 3:
Metric: {'Precision': 0.29, 'Recall': 0.48, 'Coverage': 3.35, 'Diversity': 0.7980044140029352, 'Popularity': 2.3653}
Func worker, run time: 39.16614294052124
Func splitData, run time: 0.1974170207977295
Experiment 4:
Metric: {'Precision': 0.34, 'Recall': 0.56, 'Coverage': 3.33, 'Diversity': 0.7913038648261218, 'Popularity': 2.33633}
Func worker, run time: 41.13529896736145
Func splitData, run time: 0.19643640518188477
Experiment 5:
Metric: {'Precision': 0.33, 'Recall': 0.55, 'Coverage': 3.29, 'Diversity': 0.7897780704681152, 'Popularity': 2.346427}
Func worker, run time: 38.96295094490051
Func splitData, run time: 0.19998574256896973
Experiment 6:
Metric: {'Precision': 0.35, 'Recall': 0.56, 'Coverage': 3.48, 'Diversity': 0.7947467303677718, 'Popularity': 2.305821}
Func worker, run time: 40.37690997123718
Func splitData, run time: 0.19191503524780273
Experiment 7:
Metric: {'Precision': 0.33, 'Recall': 0.55, 'Coverage': 3.39, 'Diversity': 0.7909845940006351, 'Popularity': 2.362614}
Func worker, run time: 41.105441093444824
Func splitData, run time: 0.1934211254119873
Experiment 8:
Metric: {'Precision': 0.34, 'Recall': 0.55, 'Coverage': 3.37, 'Diversity': 0.7895494174800041, 'Popularity': 2.343617}
Func worker, run time: 39.65980076789856
Func splitData, run time: 0.1929779052734375
Experiment 9:
Metric: {'Precision': 0.34, 'Recall': 0.56, 'Coverage': 3.33, 'Diversity': 0.7882350055007459, 'Popularity': 2.340735}
Func worker, run time: 41.376152992248535
Average Result (M=10, N=10): {'Precision': 0.33699999999999997, 'Recall': 0.5529999999999999, 'Coverage': 3.3609999999999998, 'Diversity': 0.7913794301955859, 'Popularity': 2.3396786}
Func run, run time: 404.8816478252411

2. TagBasedTFIDF实验
Func loadData, run time: 1.6277968883514404
Func splitData, run time: 0.27590298652648926
Experiment 0:
Metric: {'Precision': 0.38, 'Recall': 0.62, 'Coverage': 16.84, 'Diversity': 0.8817864660115259, 'Popularity': 1.324191}
Func worker, run time: 46.15612602233887
Func splitData, run time: 0.31597304344177246
Experiment 1:
Metric: {'Precision': 0.39, 'Recall': 0.64, 'Coverage': 16.95, 'Diversity': 0.8826858063646551, 'Popularity': 1.316902}
Func worker, run time: 43.69584107398987
Func splitData, run time: 0.24825787544250488
Experiment 2:
Metric: {'Precision': 0.35, 'Recall': 0.58, 'Coverage': 16.95, 'Diversity': 0.8810856212597441, 'Popularity': 1.32838}
Func worker, run time: 43.3360550403595
Func splitData, run time: 0.26052021980285645
Experiment 3:
Metric: {'Precision': 0.3, 'Recall': 0.5, 'Coverage': 16.98, 'Diversity': 0.8852701028022301, 'Popularity': 1.324043}
Func worker, run time: 43.02037310600281
Func splitData, run time: 0.26059913635253906
Experiment 4:
Metric: {'Precision': 0.39, 'Recall': 0.65, 'Coverage': 16.93, 'Diversity': 0.8839700173444075, 'Popularity': 1.318708}
Func worker, run time: 44.03740382194519
Func splitData, run time: 0.25109100341796875
Experiment 5:
Metric: {'Precision': 0.36, 'Recall': 0.59, 'Coverage': 16.86, 'Diversity': 0.8819926728499792, 'Popularity': 1.332067}
Func worker, run time: 43.196900844573975
Func splitData, run time: 0.26158785820007324
Experiment 6:
Metric: {'Precision': 0.36, 'Recall': 0.58, 'Coverage': 17.06, 'Diversity': 0.8857461664078716, 'Popularity': 1.317056}
Func worker, run time: 43.58964991569519
Func splitData, run time: 0.26162195205688477
Experiment 7:
Metric: {'Precision': 0.35, 'Recall': 0.58, 'Coverage': 17.08, 'Diversity': 0.8821745724171214, 'Popularity': 1.331707}
Func worker, run time: 43.189525842666626
Func splitData, run time: 0.23992609977722168
Experiment 8:
Metric: {'Precision': 0.31, 'Recall': 0.51, 'Coverage': 16.89, 'Diversity': 0.8827909053583793, 'Popularity': 1.327498}
Func worker, run time: 45.02846622467041
Func splitData, run time: 0.25911593437194824
Experiment 9:
Metric: {'Precision': 0.33, 'Recall': 0.55, 'Coverage': 16.98, 'Diversity': 0.8824719933838076, 'Popularity': 1.323312}
Func worker, run time: 43.965688705444336
Average Result (M=10, N=10): {'Precision': 0.352, 'Recall': 0.5799999999999998, 'Coverage': 16.952, 'Diversity': 0.8829974324199723, 'Popularity': 1.3243864}
Func run, run time: 443.55260705947876

3. TagBasedTFIDF++实验
Func loadData, run time: 1.2623248100280762
Func splitData, run time: 0.2863779067993164
Experiment 0:
Metric: {'Precision': 0.14, 'Recall': 0.23, 'Coverage': 19.4, 'Diversity': 0.859877838307336, 'Popularity': 0.786183}
Func worker, run time: 54.93890690803528
Func splitData, run time: 0.2523970603942871
Experiment 1:
Metric: {'Precision': 0.16, 'Recall': 0.27, 'Coverage': 19.36, 'Diversity': 0.8617994094261496, 'Popularity': 0.785819}
Func worker, run time: 54.65705108642578
Func splitData, run time: 0.26293516159057617
Experiment 2:
Metric: {'Precision': 0.18, 'Recall': 0.3, 'Coverage': 19.48, 'Diversity': 0.861349178757724, 'Popularity': 0.787125}
Func worker, run time: 54.77145004272461
Func splitData, run time: 0.2572140693664551
Experiment 3:
Metric: {'Precision': 0.15, 'Recall': 0.24, 'Coverage': 19.32, 'Diversity': 0.8633524800153738, 'Popularity': 0.78599}
Func worker, run time: 54.72025799751282
Func splitData, run time: 0.2647433280944824
Experiment 4:
Metric: {'Precision': 0.21, 'Recall': 0.34, 'Coverage': 19.38, 'Diversity': 0.8611766478285409, 'Popularity': 0.786397}
Func worker, run time: 54.61092400550842
Func splitData, run time: 0.2570078372955322
Experiment 5:
Metric: {'Precision': 0.16, 'Recall': 0.27, 'Coverage': 19.36, 'Diversity': 0.8607577942073997, 'Popularity': 0.786923}
Func worker, run time: 54.64287829399109
Func splitData, run time: 0.25312089920043945
Experiment 6:
Metric: {'Precision': 0.16, 'Recall': 0.26, 'Coverage': 19.43, 'Diversity': 0.8622121035638752, 'Popularity': 0.784275}
Func worker, run time: 54.19543790817261
Func splitData, run time: 0.25305795669555664
Experiment 7:
Metric: {'Precision': 0.16, 'Recall': 0.26, 'Coverage': 19.57, 'Diversity': 0.8625286276619254, 'Popularity': 0.785651}
Func worker, run time: 54.9225959777832
Func splitData, run time: 0.24744105339050293
Experiment 8:
Metric: {'Precision': 0.15, 'Recall': 0.24, 'Coverage': 19.41, 'Diversity': 0.8605756591696193, 'Popularity': 0.784442}
Func worker, run time: 56.0502827167511
Func splitData, run time: 0.25081896781921387
Experiment 9:
Metric: {'Precision': 0.16, 'Recall': 0.26, 'Coverage': 19.4, 'Diversity': 0.8585022350741194, 'Popularity': 0.785889}
Func worker, run time: 54.0096640586853
Average Result (M=10, N=10): {'Precision': 0.16299999999999998, 'Recall': 0.267, 'Coverage': 19.410999999999998, 'Diversity': 0.8612131974012064, 'Popularity': 0.7858693999999999}
Func run, run time: 551.4401750564575

4. ExtendTagBased实验
Func loadData, run time: 1.888315200805664
Func splitData, run time: 0.18341422080993652
Experiment 0:
Metric: {'Precision': 0.33, 'Recall': 0.54, 'Coverage': 3.37, 'Diversity': 0.7882770482685956, 'Popularity': 2.338341}
Func worker, run time: 45.58587598800659
Func splitData, run time: 0.342771053314209
Experiment 1:
Metric: {'Precision': 0.37, 'Recall': 0.61, 'Coverage': 3.45, 'Diversity': 0.7884184200805971, 'Popularity': 2.323208}
Func worker, run time: 43.79095387458801
Func splitData, run time: 0.18767595291137695
Experiment 2:
Metric: {'Precision': 0.36, 'Recall': 0.6, 'Coverage': 3.47, 'Diversity': 0.7920836566910633, 'Popularity': 2.323179}
Func worker, run time: 45.01177382469177
Func splitData, run time: 0.3437650203704834
Experiment 3:
Metric: {'Precision': 0.29, 'Recall': 0.47, 'Coverage': 3.39, 'Diversity': 0.7975400160363582, 'Popularity': 2.361645}
Func worker, run time: 40.95514512062073
Func splitData, run time: 0.3429849147796631
Experiment 4:
Metric: {'Precision': 0.37, 'Recall': 0.62, 'Coverage': 3.4, 'Diversity': 0.7909206637230392, 'Popularity': 2.333121}
Func worker, run time: 41.210543155670166
Func splitData, run time: 0.32721614837646484
Experiment 5:
Metric: {'Precision': 0.34, 'Recall': 0.56, 'Coverage': 3.33, 'Diversity': 0.788432348430914, 'Popularity': 2.344057}
Func worker, run time: 41.1824209690094
Func splitData, run time: 0.19087624549865723
Experiment 6:
Metric: {'Precision': 0.37, 'Recall': 0.6, 'Coverage': 3.52, 'Diversity': 0.7933734279462265, 'Popularity': 2.302654}
Func worker, run time: 44.1220920085907
Func splitData, run time: 0.3399050235748291
Experiment 7:
Metric: {'Precision': 0.33, 'Recall': 0.55, 'Coverage': 3.45, 'Diversity': 0.7899962076862624, 'Popularity': 2.359363}
Func worker, run time: 41.13612079620361
Func splitData, run time: 0.3469550609588623
Experiment 8:
Metric: {'Precision': 0.33, 'Recall': 0.53, 'Coverage': 3.41, 'Diversity': 0.7877338070843662, 'Popularity': 2.340181}
Func worker, run time: 41.45753598213196
Func splitData, run time: 0.34490013122558594
Experiment 9:
Metric: {'Precision': 0.35, 'Recall': 0.58, 'Coverage': 3.36, 'Diversity': 0.7874806960384569, 'Popularity': 2.337171}
Func worker, run time: 41.51561903953552
Average Result (M=10, N=10): {'Precision': 0.34400000000000003, 'Recall': 0.5660000000000001, 'Coverage': 3.4150000000000005, 'Diversity': 0.7904256291985878, 'Popularity': 2.336292}
Func run, run time: 430.87147402763367