In [1]:
import gzip
import random
import csv
from collections import defaultdict
from sklearn import linear_model
import math

In [2]:
path="trainInteractions.csv.gz"

In [3]:
def parse(f):
    for l in gzip.open(f):
        yield eval(l)

In [4]:
def readCSV(path):
    f = gzip.open(path, 'rt')
    c = csv.reader(f)
    header = next(c)
    for l in c:
        d = dict(zip(header,l))
        yield d['user_id'],d['recipe_id'],d

In [5]:
def Jaccard(s1, s2):
    numer = len(s1.intersection(s2))
    denom = len(s1.union(s2))
    if denom == 0:
        return 0
    return numer / denom

In [6]:
def CosineSet(s1, s2):
    # Not a proper implementation, operates on sets so correct for interactions only
    numer = len(s1.intersection(s2))
    denom = math.sqrt(len(s1)) * math.sqrt(len(s2))
    if denom == 0:
        return 0
    return numer / denom

In [7]:
dataset = list(readCSV(path))

In [8]:
dataset[0]

('88348277',
 '03969194',
 {'user_id': '88348277',
  'recipe_id': '03969194',
  'date': '2004-12-23',
  'rating': '5'})

In [9]:
itemsPerUser = defaultdict(set)
usersPerItem=defaultdict(set)
itemSet=set([d[1] for d in dataset])

In [10]:
for d in dataset:
    user,item = d[0], d[1]
    itemsPerUser[user].add(item)
    usersPerItem[item].add(user)

In [32]:
dataset2 = list(parse("trainRecipes.json.gz"))

In [12]:
ingsPerItem = defaultdict(set)
itemsPerIng = defaultdict(set)

In [13]:
for d in dataset2:
    r = d['recipe_id']
    for i in d['ingredients']:
        ingsPerItem[r].add(i)
        itemsPerIng[i].add(r)

1. Popularity and Jaccard similarity

In [14]:
def build_validate_set(dataset):
    validate_set=[]
    random.seed(50)
    for d in dataset:
        positive_entry=[d[0],d[1],1]
        negative_entry_item_set=itemSet.difference(itemsPerUser[d[0]])
        random_item=random.choice(list(negative_entry_item_set))
        negative_entry=[d[0],random_item,0]
        validate_set.append(positive_entry)
        validate_set.append(negative_entry)
    return validate_set

In [15]:
def build_train_set(dataset):
    train_set=[]
    for d in dataset:
        positive_entry=[d[0],d[1],d[2]]
        train_set.append(positive_entry)
    return train_set

In [25]:
def mostSimilar(i):
    ings = ingsPerItem[i]
    similarities = []
    for i2 in ingsPerItem: # For all items
        if i == i2: continue # other than the query
        sim = Jaccard(ings, ingsPerItem[i2])
        similarities.append((sim,i2))
    similarities.sort(key = lambda x: (-x[0], x[1]),reverse=False)
    for d in similarities:
        if d[1] in itemSet:
            return d[1]
    return dataset[0][1]

In [20]:
# def Pearson(i1, i2):
#     # Between two items
#     iBar1 = itemAverages[i1]
#     iBar2 = itemAverages[i2]
#     inter = usersPerItem[i1].intersection(usersPerItem[i2])
#     numer = 0
#     denom1 = 0
#     denom2 = 0
#     for u in inter:
#         numer += (ratingDict[(u,i1)] - iBar1)*(ratingDict[(u,i2)] - iBar2)
#     for u in usersPerItem[i1]:
#         denom1 += (ratingDict[(u,i1)] - iBar1)**2
#     for u in usersPerItem[i2]:
#         denom2 += (ratingDict[(u,i2)] - iBar2)**2
#     denom = math.sqrt(denom1) * math.sqrt(denom2)
#     if denom == 0: return 0
#     return numer / denom

In [14]:
train_set=build_train_set(dataset[:499000])
validate_set=build_validate_set(dataset[499000:500000])

In [21]:
def jaccardPopularityModel(train_set, test_set, pt=0.6):
    # calculate most popular set in train_set
    recipeCount = defaultdict(int)
    totalCooked = 0
    for d in train_set:
        recipeCount[d[1]] += 1
        totalCooked += 1

    mostPopular = [(recipeCount[x], x) for x in recipeCount]
    mostPopular.sort()
    mostPopular.reverse()

    return1 = set()
    count = 0
    for ic, i in mostPopular:
        count += ic
        return1.add(i)
        if count > totalCooked*pt: break

    # evalute on test_set
    total_size=len(test_set)
    correct_size=0
    for i in range(total_size):
        sample=test_set[i]
        item=sample[1]
        user=sample[0]
        predict=0

        if item in return1:
            predict=1
        else:
            
#             maxSim1=0
#             for u in usersPerItem[item]:
#                 sim=Jaccard(itemsPerUser[u],itemsPerUser[user])
#                 maxSim1=max(maxSim1,sim)
#             print("1:")
#             print(maxSim1)
#             maxSim2=0
#             for d in itemsPerUser[user]:
#                 sim=Jaccard(usersPerItem[d],usersPerItem[item])
#                 maxSim2=max(maxSim2,sim)
#             print("2:")
#             print(maxSim2)
#             if maxSim2>0.1 and maxSim1>0.01:
#                 predict=1
#             maxSim1=0
#             for d in itemsPerUser[user]:
#                 sim=Pearson1(d,item)
#                 maxSim1=max(maxSim1,sim)
#             print("1:")
#             print(maxSim1)
            
            maxSim=0
            for d in itemsPerUser[user]:
                sim=Pearson(d,item)
                maxSim2=max(maxSim,sim)
            print("pearson:")
            print(maxSim)
            
            if maxSim2>0.1:
                predict=1
                
                
        if predict==sample[2]:
            correct_size+=1
    accuracy=correct_size/total_size
    print([accuracy])

In [22]:
jaccardPopularityModel(train_set,validate_set,0.694)

pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0.5477225575051661
pearson:
0
pearson:
0
pearson:
0
pearson:
0.059085806446138964
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0.10368527442368827
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0.3333333333333332
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0.6123724356957945
pearson:
0.8333333333333337
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0.8164965809277259
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0.5193819785289564
pearson:
0
pearson:
0
pearson:
0.20412414523193148
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0.6846531968814575
pearson:
0
pearson:
0
pearson:
0
pearson:
0.6123724356957945
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearso

pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0.016835875742536904
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0.6149186938124421
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0.13693063937629132
pearson:
0
pearson:
0
pearson:
0
pearson:
0.4166666666666667
pearson:
0
pearson:
0
pearson:
0
pearson:
0.28867513459481287
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0.3091734712004211
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0.6546536707079768
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0.35355339059327373
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0.23570226039551598
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0.6324555320336757
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0
pearson:
0.577350269189626
pearson:
0.1195228609334392

Upload to kaggle:

In [80]:
def jaccardPopularityModel(train_set, pt=0.6):
    print("predicting....")

    # calculate most popular set in train_set
    recipeCount = defaultdict(int)
    totalCooked = 0
    for d in train_set:
        recipeCount[d[1]] += 1
        totalCooked += 1

    mostPopular = [(recipeCount[x], x) for x in recipeCount]
    mostPopular.sort()
    mostPopular.reverse()

    return1 = set()
    count = 0
    for ic, i in mostPopular:
        count += ic
        return1.add(i)
        if count > totalCooked*pt: break

    predictions = open("predictions_Made.txt", 'w')
    for l in open("stub_Made.txt"):
        if l.startswith("user_id"):
            predictions.write(l)
            continue
        user,item = l.strip().split('-')
        item1=item
        predict=0
            

        if item in return1:
            predict=1
        else:
            
            if item not in itemSet:
                predict=0
            else:
#                 maxSim=0
#                 for d in itemsPerUser[user]:
#                     sim=Pearson(d,item)
#                     maxSim=max(maxSim,sim)
#                 print("Pearson:")
#                 print(maxSim)

                maxSim1=0
                for u in usersPerItem[item]:
                    sim=Jaccard(itemsPerUser[u],itemsPerUser[user])
                    maxSim1=max(maxSim1,sim)
                print("1:")
                print(maxSim1)
                maxSim2=0
                for d in itemsPerUser[user]:
                    sim=Jaccard(usersPerItem[d],usersPerItem[item])
                    maxSim2=max(maxSim2,sim)
                print("2:")
                print(maxSim2)
                if maxSim1>0.01 and maxSim2 > 0.05:
                    predict=1 

        if predict==1:
            predictions.write(user + '-' + item1 + ",1\n")
        else:
            predictions.write(user + '-' + item1 + ",0\n")
    predictions.close()
    print("predicting finished!")

In [81]:
train_set=build_train_set(dataset)

In [82]:
jaccardPopularityModel(train_set,0.595)

predicting....
1:
0.0036407766990291263
2:
0.017543859649122806
1:
0.027188081936685288
2:
0.2857142857142857
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.0053388090349075976
2:
0.5
1:
0.03212468193384224
2:
0.25
1:
0.002390438247011952
2:
0.09090909090909091
1:
0.0023584905660377358
2:
0.2
1:
0.005631687629059508
2:
0.3333333333333333
1:
0.01232114467408585
2:
0.5
1:
0.005376344086021506
2:
0.008849557522123894
1:
0
2:
0
1:
0.0065420560747663555
2:
0.25
1:
0.00034083162917518747
2:
0.029411764705882353
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.002207505518763797
2:
0.5
1:
0
2:
0
1:
0.0007423904974016332
2:
0.06666666666666667
1:
0.002824858757062147
2:
0.024390243902439025
1:
0.004629629629629629
2:
0.2
1:
0.00020824656393169514
2:
0.3333333333333333
1:
0.009009009009009009
2:
0.005154639175257732
1:
0
2:
0
1:
0
2:
0
1:
0.01690039554117224
2:
0.25
1:
0.016735028712059064
2:
0.5
1:
0.004098360655737705
2:
0.017857142857142856
1:
0
2:
0
1:
0.002551020408163265
2:
0.011363636363636364
1:
0.00356188780053

0.25
1:
0.020985401459854013
2:
0.5
1:
0.004075846181109339
2:
0.5
1:
0.0013844023996308261
2:
0.3333333333333333
1:
0
2:
0
1:
0.0027548209366391185
2:
0.25
1:
0
2:
0
1:
0
2:
0
1:
0.004392386530014641
2:
0.02702702702702703
1:
0.004461440407903123
2:
0.125
1:
0.0007936507936507937
2:
0.002183406113537118
1:
0
2:
0
1:
0.007470119521912351
2:
0.16666666666666666
1:
0
2:
0
1:
0.004124656278643446
2:
0.1111111111111111
1:
0
2:
0
1:
0.0035211267605633804
2:
0.014492753623188406
1:
0
2:
0
1:
0.0007974481658692185
2:
0.2
1:
0.0031645569620253164
2:
0.002369668246445498
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.0023923444976076554
2:
0.0029498525073746312
1:
0.012631233595800526
2:
0.5
1:
0
2:
0
1:
0.0015015015015015015
2:
0.16666666666666666
1:
0.014150943396226415
2:
0.01098901098901099
1:
0
2:
0
1:
0.0029850746268656717
2:
0.14285714285714285
1:
0.0006906077348066298
2:
0.021739130434782608
1:
0
2:
0
1:
0.0019723865877712033
2:
0.5
1:
0.0011876484560570072
2:
0.058823529411764705
1:
0
2:
0
1:
0.00

2:
0
1:
0.0015408320493066256
2:
0.007936507936507936
1:
0.005009392611145898
2:
0.1
1:
0.009831029185867896
2:
0.25
1:
0
2:
0
1:
0.0015552099533437014
2:
0.2
1:
0
2:
0
1:
0
2:
0
1:
0.001941747572815534
2:
0.08333333333333333
1:
0.00732421875
2:
0.14285714285714285
1:
0.005529953917050691
2:
0.14285714285714285
1:
0.0392156862745098
2:
0.011494252873563218
1:
0
2:
0
1:
0.013107416879795396
2:
0.25
1:
0.005490196078431373
2:
0.3333333333333333
1:
0.006211180124223602
2:
0.2
1:
0
2:
0
1:
0.0015290519877675841
2:
0.05555555555555555
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.0027100271002710027
2:
0.25
1:
0.0018484288354898336
2:
0.05555555555555555
1:
0
2:
0
1:
0.004038772213247173
2:
0.3333333333333333
1:
0.000474158368895211
2:
0.09090909090909091
1:
0.00425531914893617
2:
0.058823529411764705
1:
0.0007215007215007215
2:
0.020833333333333332
1:
0
2:
0
1:
0
2:
0
1:
0.010569852941176471
2:
0.3333333333333333
1:
0
2:
0
1:
0
2:
0
1:
0.01485148514851485
2:
0.058823529411764705
1:
0.0029282576866764

2:
0.1111111111111111
1:
0.006944444444444444
2:
0.02702702702702703
1:
0.023393984404010398
2:
0.5
1:
0
2:
0
1:
0
2:
0
1:
0.011581067472306143
2:
0.3333333333333333
1:
0.01885057471264368
2:
0.25
1:
0.006024096385542169
2:
0.16666666666666666
1:
0
2:
0
1:
0.00487012987012987
2:
0.1
1:
0
2:
0
1:
0
2:
0
1:
0.03352189398701027
2:
0.3333333333333333
1:
0.0016560861164780568
2:
0.06666666666666667
1:
0.0076045627376425855
2:
0.006622516556291391
1:
0
2:
0
1:
0.000687757909215956
2:
0.3333333333333333
1:
0.0015220700152207
2:
0.024390243902439025
1:
0.001182033096926714
2:
0.013513513513513514
1:
0.008869179600886918
2:
0.029411764705882353
1:
0.015518028297581013
2:
0.25
1:
0
2:
0
1:
0.006694720734506504
2:
0.3333333333333333
1:
0
2:
0
1:
0.0120415982484948
2:
0.2
1:
0
2:
0
1:
0
2:
0
1:
0.005680351148980119
2:
0.5
1:
0.0005955926146515784
2:
0.029411764705882353
1:
0.0005903187721369539
2:
0.0196078431372549
1:
0.001148105625717566
2:
0.08333333333333333
1:
0.00842548709847288
2:
0.2
1:
0.

2:
0.045454545454545456
1:
0.002304147465437788
2:
0.014705882352941176
1:
0.001669449081803005
2:
0.2
1:
0.0028694404591104736
2:
0.058823529411764705
1:
0.0032442748091603053
2:
0.5
1:
0
2:
0
1:
0
2:
0
1:
0.0026455026455026454
2:
0.1
1:
0.003303964757709251
2:
0.16666666666666666
1:
0.0006598482349059716
2:
0.125
1:
0
2:
0
1:
0.006097560975609756
2:
0.022727272727272728
1:
0
2:
0
1:
0
2:
0
1:
0.00849673202614379
2:
0.5
1:
0
2:
0
1:
0.012048192771084338
2:
0.023809523809523808
1:
0.0015313935681470138
2:
0.16666666666666666
1:
0.003157894736842105
2:
0.014492753623188406
1:
0
2:
0
1:
0.016537467700258397
2:
0.2
1:
0.0011001100110011
2:
0.010526315789473684
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.011661807580174927
2:
0.4
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.006493506493506494
2:
0.3333333333333333
1:
0.005092761004001455
2:
0.5
1:
0.004646840148698885
2:
0.03333333333333333
1:
0
2:
0
1:
0
2:
0
1:
0.030303030303030304
2:
0.006329113924050633
1:
0
2:
0
1:
0.0072405470635559

2:
0
1:
0.0014866204162537165
2:
0.14285714285714285
1:
0.000949667616334283
2:
0.02702702702702703
1:
0
2:
0
1:
0
2:
0
1:
0.008016032064128256
2:
0.08333333333333333
1:
0.002785515320334262
2:
0.25
1:
0.007173601147776184
2:
0.3333333333333333
1:
0
2:
0
1:
0
2:
0
1:
0.006436955698599016
2:
0.2
1:
0.007570543702684102
2:
0.3333333333333333
1:
0.00759914509617668
2:
0.5
1:
0.0074875207986688855
2:
0.25
1:
0.0008936550491510277
2:
0.0625
1:
0
2:
0
1:
0.0037533512064343165
2:
0.2
1:
0
2:
0
1:
0
2:
0
1:
0.0026041666666666665
2:
0.25
1:
0
2:
0
1:
0.004612546125461255
2:
0.5
1:
0
2:
0
1:
0.018367961457392352
2:
0.3333333333333333
1:
0
2:
0
1:
0.00641025641025641
2:
0.00909090909090909
1:
0
2:
0
1:
0
2:
0
1:
0.011886503067484663
2:
0.3333333333333333
1:
0.0012232415902140672
2:
0.16666666666666666
1:
0.009615384615384616
2:
0.034482758620689655
1:
0.0015527950310559005
2:
0.010416666666666666
1:
0.002967359050445104
2:
0.0017035775127768314
1:
0.0022099447513812156
2:
0.3333333333333333
1:
0.

1:
0.006820566631689402
2:
0.3333333333333333
1:
0.0014347202295552368
2:
0.09090909090909091
1:
0
2:
0
1:
0
2:
0
1:
0.0033333333333333335
2:
0.14285714285714285
1:
0.011144578313253013
2:
0.25
1:
0.004524886877828055
2:
0.010869565217391304
1:
0
2:
0
1:
0.0008896797153024911
2:
0.05555555555555555
1:
0.0007911392405063291
2:
0.027777777777777776
1:
0.0002050440844781628
2:
0.00819672131147541
1:
0.010596026490066225
2:
0.25
1:
0.0005422993492407809
2:
0.3333333333333333
1:
0.01588447653429603
2:
0.3333333333333333
1:
0.007751937984496124
2:
0.25
1:
0.00099601593625498
2:
0.2
1:
0.006787330316742082
2:
0.25
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.0009478672985781991
2:
0.125
1:
0.0017152658662092624
2:
0.007407407407407408
1:
0
2:
0
1:
0.009134406263592866
2:
0.25
1:
0.021533811862485833
2:
0.25
1:
0
2:
0
1:
0.009208103130755065
2:
0.041666666666666664
1:
0
2:
0
1:
0.000992063492063492
2:
0.01282051282051282
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.007547169811320755
2:
0.02631578947368421
1:
0
2

0.04
1:
0
2:
0
1:
0
2:
0
1:
0.0006839945280437756
2:
0.016666666666666666
1:
0
2:
0
1:
0
2:
0
1:
0.0006807351940095302
2:
0.05555555555555555
1:
0.007874015748031496
2:
0.2
1:
0.0009596928982725527
2:
0.05555555555555555
1:
0
2:
0
1:
0.00784313725490196
2:
0.004
1:
0.013513513513513514
2:
0.045454545454545456
1:
0.010326311441553077
2:
0.3333333333333333
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.0012406947890818859
2:
0.002183406113537118
1:
0.0022471910112359553
2:
0.014925373134328358
1:
0.0034079844206426485
2:
0.1111111111111111
1:
0
2:
0
1:
0.0009606147934678194
2:
0.3333333333333333
1:
0.010582010582010581
2:
0.125
1:
0.009021512838306732
2:
0.08333333333333333
1:
0.003952569169960474
2:
0.003367003367003367
1:
0
2:
0
1:
0.0033112582781456954
2:
0.03333333333333333
1:
0.0012239902080783353
2:
0.008849557522123894
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.002793296089385475
2:
0.06666666666666667
1:
0.009644670050761422
2:
0.3333333333333333
1:
0.008425145819831496
2:
0.1666

2:
0.14285714285714285
1:
0.0012158054711246201
2:
0.3333333333333333
1:
0.0014184397163120568
2:
0.022222222222222223
1:
0
2:
0
1:
0.006153846153846154
2:
0.2
1:
0
2:
0
1:
0.0028208744710860366
2:
0.006289308176100629
1:
0.001392757660167131
2:
0.2
1:
0
2:
0
1:
0.0019801980198019802
2:
0.2
1:
0
2:
0
1:
0.00101010101010101
2:
0.038461538461538464
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.0007908264136022143
2:
0.1111111111111111
1:
0.007477906186267845
2:
0.25
1:
0.004310344827586207
2:
0.2
1:
0.0034542314335060447
2:
0.125
1:
0
2:
0
1:
0.003683241252302026
2:
0.047619047619047616
1:
0.013231981981981982
2:
0.3333333333333333
1:
0
2:
0
1:
0.0029806259314456036
2:
0.3333333333333333
1:
0.0012779552715654952
2:
0.045454545454545456
1:
0
2:
0
1:
0
2:
0
1:
0.003389830508474576
2:
0.023255813953488372
1:
0.0016142050040355124
2:
0.14285714285714285
1:
0
2:
0
1:
0.008048289738430584
2:
0.07142857142857142
1:
0.016181229773462782
2:
0.14285714285714285
1:
0
2:
0
1:
0
2:
0
1:
0.021956087824351298
2:


2:
0
1:
0.004333694474539545
2:
0.5
1:
0.008097165991902834
2:
0.019230769230769232
1:
0
2:
0
1:
0.011731843575418994
2:
0.3333333333333333
1:
0.0049813200498132005
2:
0.16666666666666666
1:
0
2:
0
1:
0.0005567928730512249
2:
0.1
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.002260738507912585
2:
0.25
1:
0
2:
0
1:
0.001466275659824047
2:
0.047619047619047616
1:
0
2:
0
1:
0
2:
0
1:
0.0017953321364452424
2:
0.014705882352941176
1:
0.00510204081632653
2:
0.043478260869565216
1:
0
2:
0
1:
0.0038119440914866584
2:
0.25
1:
0
2:
0
1:
0.0009573958831977022
2:
0.4
1:
0.0030303030303030303
2:
0.012048192771084338
1:
0
2:
0
1:
0.01025236593059937
2:
0.3333333333333333
1:
0
2:
0
1:
0.0010626992561105207
2:
0.16666666666666666
1:
0.006065200909780136
2:
0.1
1:
0.004512874966817096
2:
0.25
1:
0.0004166666666666667
2:
0.16666666666666666
1:
0
2:
0
1:
0.009297520661157025
2:
0.2222222222222222
1:
0.0014104372355430183
2:
0.3333333333333333
1:
0
2:
0
1:
0
2:
0
1:
0.012418687167356593
2:
0.2
1:
0
2:
0
1:
0.0046082

2:
0.25
1:
0.00851581508515815
2:
0.25
1:
0.013157894736842105
2:
0.14285714285714285
1:
0.0016891891891891893
2:
0.058823529411764705
1:
0
2:
0
1:
0
2:
0
1:
0.0028653295128939827
2:
0.0625
1:
0
2:
0
1:
0.003929273084479371
2:
0.08333333333333333
1:
0.004344048653344918
2:
0.25
1:
0.006125574272588055
2:
0.16666666666666666
1:
0
2:
0
1:
0.008830022075055188
2:
0.2
1:
0.0036496350364963502
2:
0.002369668246445498
1:
0
2:
0
1:
0.03212468193384224
2:
0.3333333333333333
1:
0
2:
0
1:
0.0030721966205837174
2:
0.3333333333333333
1:
0.006329113924050633
2:
0.125
1:
0.00020627062706270627
2:
0.14285714285714285
1:
0.0019582245430809398
2:
0.037037037037037035
1:
0.011764705882352941
2:
0.05555555555555555
1:
0.0045871559633027525
2:
0.16666666666666666
1:
0
2:
0
1:
0.03
2:
0.25
1:
0
2:
0
1:
0.0014695077149155032
2:
0.05555555555555555
1:
0
2:
0
1:
0.002805049088359046
2:
0.2
1:
0
2:
0
1:
0
2:
0
1:
0.002052334530528476
2:
0.3333333333333333
1:
0
2:
0
1:
0.001091703056768559
2:
0.125
1:
0.0044510

2:
0.2
1:
0.0029411764705882353
2:
0.5
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.0017316017316017316
2:
0.043478260869565216
1:
0
2:
0
1:
0.000946969696969697
2:
0.1
1:
0.006269592476489028
2:
0.008695652173913044
1:
0.0034602076124567475
2:
0.011235955056179775
1:
0.000513347022587269
2:
0.008547008547008548
1:
0.005040713454827452
2:
0.5
1:
0
2:
0
1:
0.002932551319648094
2:
0.0045045045045045045
1:
0.00625
2:
0.058823529411764705
1:
0.0004137360364087712
2:
0.06666666666666667
1:
0
2:
0
1:
0.009943652635067949
2:
0.5
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.006756756756756757
2:
0.038461538461538464
1:
0.006731287022078622
2:
0.3333333333333333
1:
0
2:
0
1:
0.0037735849056603774
2:
0.00819672131147541
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.004415011037527594
2:
0.25
1:
0
2:
0
1:
0
2:
0
1:
0.008333333333333333
2:
0.5
1:
0.008333333333333333
2:
0.017543859649122806
1:
0
2:
0
1:
0
2:
0
1:
0.00205761316872428
2:
0.058823529411764705
1:
0.009602501116569897
2:
0.3333333333333333
1:
0
2:
0
1:
0.0

1:
0.006470696986503975
2:
0.25
1:
0
2:
0
1:
0
2:
0
1:
0.0015552099533437014
2:
0.013513513513513514
1:
0.005089058524173028
2:
0.1
1:
0.0038022813688212928
2:
0.1
1:
0.0035087719298245615
2:
0.1
1:
0
2:
0
1:
0.0022701475595913734
2:
0.024390243902439025
1:
0
2:
0
1:
0.0028116213683223993
2:
0.0625
1:
0.0032751091703056767
2:
0.25
1:
0.017190278601066984
2:
0.2
1:
0
2:
0
1:
0.0024390243902439024
2:
0.006578947368421052
1:
0.015151515151515152
2:
0.058823529411764705
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.018694653984913087
2:
0.4
1:
0.015776081424936386
2:
0.25
1:
0.005747126436781609
2:
0.3333333333333333
1:
0.0012234910277324632
2:
0.14285714285714285
1:
0.019600052973116143
2:
0.3333333333333333
1:
0.024170274170274172
2:
0.3333333333333333
1:
0.002257336343115124
2:
0.015151515151515152
1:
0.004107981220657277
2:
0.2
1:
0.002320185614849188
2:
0.06666666666666667
1:
0
2:
0
1:
0
2:
0
1:
0.002881844380403458
2:
0.017543859649122806
1:
0.00929368029739777
2:
0.25
1:
0.0034542314

1:
0.004608294930875576
2:
0.14285714285714285
1:
0.0017953321364452424
2:
0.1
1:
0
2:
0
1:
0
2:
0
1:
0.00847457627118644
2:
0.0034602076124567475
1:
0
2:
0
1:
0.01357042583060365
2:
0.5
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.0016545334215751159
2:
0.25
1:
0
2:
0
1:
0.004310344827586207
2:
0.027777777777777776
1:
0
2:
0
1:
0.011029411764705883
2:
0.3333333333333333
1:
0
2:
0
1:
0.002570694087403599
2:
0.023809523809523808
1:
0
2:
0
1:
0.003289473684210526
2:
0.025
1:
0.023826208829712685
2:
0.058823529411764705
1:
0
2:
0
1:
0.0022354694485842027
2:
0.027777777777777776
1:
0.002150537634408602
2:
0.0625
1:
0
2:
0
1:
0.016241776315789474
2:
0.5
1:
0.004663557628247834
2:
0.2
1:
0
2:
0
1:
0
2:
0
1:
0.0013245033112582781
2:
0.037037037037037035
1:
0.003051881993896236
2:
0.125
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.0013363028953229399
2:
0.058823529411764705
1:
0.005089058524173028
2:
0.007751937984496124
1:
0.01098901098901099
2:
0.02857142857142857
1:
0.001451378809869376
2:
0.33333333

2:
0.16666666666666666
1:
0.004608294930875576
2:
0.008130081300813009
1:
0
2:
0
1:
0.01634512325830654
2:
0.4
1:
0.001452081316553727
2:
0.07142857142857142
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.008403361344537815
2:
0.0036101083032490976
1:
0
2:
0
1:
0.0014278914802475012
2:
0.05263157894736842
1:
0
2:
0
1:
0.007308684436801376
2:
0.25
1:
0
2:
0
1:
0.00641025641025641
2:
0.07692307692307693
1:
0
2:
0
1:
0.0008156606851549756
2:
0.3333333333333333
1:
0.00253592561284869
2:
0.3333333333333333
1:
0
2:
0
1:
0.0030633735401110473
2:
0.25
1:
0
2:
0
1:
0
2:
0
1:
0.006389776357827476
2:
0.023255813953488372
1:
0.009059233449477353
2:
0.06666666666666667
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.0030120481927710845
2:
0.2
1:
0
2:
0
1:
0.007964601769911504
2:
0.25
1:
0
2:
0
1:
0.005434782608695652
2:
0.045454545454545456
1:
0
2:
0
1:
0
2:
0
1:
0.008902077151335312
2:
0.16666666666666666
1:
0
2:
0
1:
0.011764705882352941
2:
0.25
1:
0.0008382229673093043
2:
0.0049504950495049506
1:
0.00453367875

2:
0.14285714285714285
1:
0.00034246575342465754
2:
0.1
1:
0
2:
0
1:
0.0018281535648994515
2:
0.14285714285714285
1:
0.004395604395604396
2:
0.02631578947368421
1:
0
2:
0
1:
0
2:
0
1:
0.018425460636515914
2:
0.16666666666666666
1:
0
2:
0
1:
0.019211324570273004
2:
0.16666666666666666
1:
0.004234026173979984
2:
0.5
1:
0.008285862247540134
2:
0.25
1:
0.005633802816901409
2:
0.25
1:
0.016551724137931035
2:
0.25
1:
0
2:
0
1:
0.004551365409622887
2:
0.07142857142857142
1:
0.010309278350515464
2:
0.007352941176470588
1:
0
2:
0
1:
0.0032763014197306154
2:
0.5
1:
0.004219409282700422
2:
0.2
1:
0.0011494252873563218
2:
0.09090909090909091
1:
0.002074688796680498
2:
0.25
1:
0.0024330900243309003
2:
0.2
1:
0.023521351906828043
2:
0.5
1:
0
2:
0
1:
0.004288777698355968
2:
0.14285714285714285
1:
0
2:
0
1:
0.005302695536897923
2:
0.25
1:
0.005511022044088177
2:
0.25
1:
0
2:
0
1:
0.016713091922005572
2:
0.0625
1:
0.007731958762886598
2:
0.08333333333333333
1:
0
2:
0
1:
0.00487012987012987
2:
0.1428571

2:
0.5
1:
0.0015048908954100827
2:
0.02631578947368421
1:
0
2:
0
1:
0.008591885441527447
2:
0.2
1:
0.0017793594306049821
2:
0.04
1:
0.0020060180541624875
2:
0.011363636363636364
1:
0.002053388090349076
2:
0.007142857142857143
1:
0
2:
0
1:
0.024170274170274172
2:
0.5
1:
0.005221932114882507
2:
0.5
1:
0.004962779156327543
2:
0.07692307692307693
1:
0.0070921985815602835
2:
0.25
1:
0
2:
0
1:
0.0020151133501259445
2:
0.1
1:
0
2:
0
1:
0
2:
0
1:
0.0009606147934678194
2:
0.07692307692307693
1:
0
2:
0
1:
0.004480286738351254
2:
0.3333333333333333
1:
0.006289308176100629
2:
0.125
1:
0.0015698587127158557
2:
0.14285714285714285
1:
0
2:
0
1:
0.0057306590257879654
2:
0.006369426751592357
1:
0
2:
0
1:
0.005
2:
0.0196078431372549
1:
0.005581395348837209
2:
0.3333333333333333
1:
0.0020646937370956643
2:
0.1
1:
0
2:
0
1:
0.0013089005235602095
2:
0.5
1:
0
2:
0
1:
0.0005151983513652757
2:
0.3333333333333333
1:
0
2:
0
1:
0
2:
0
1:
0.014473684210526316
2:
0.16666666666666666
1:
0.001566579634464752
2:
0.2


2:
0.2
1:
0.011379800853485065
2:
0.5
1:
0.0008143322475570033
2:
0.07692307692307693
1:
0.006993006993006993
2:
0.004310344827586207
1:
0.003367003367003367
2:
0.1111111111111111
1:
0.01332398316970547
2:
0.2
1:
0
2:
0
1:
0.0039177277179236044
2:
0.5
1:
0.008445945945945946
2:
0.045454545454545456
1:
0.0214190093708166
2:
0.16666666666666666
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.0017387944358578052
2:
0.5
1:
0.004885993485342019
2:
0.3333333333333333
1:
0.013745704467353952
2:
0.014084507042253521
1:
0.005417118093174431
2:
0.03225806451612903
1:
0
2:
0
1:
0
2:
0
1:
0.006364922206506365
2:
0.2
1:
0.004581901489117984
2:
0.25
1:
0.0015337423312883436
2:
0.05555555555555555
1:
0
2:
0
1:
0.001282051282051282
2:
0.1
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.005484460694698354
2:
0.06666666666666667
1:
0.0022396416573348264
2:
0.3333333333333333
1:
0.017994858611825194
2:
0.3333333333333333
1:
0.00020271639975674033
2:
0.038461538461538464
1:
0
2:
0
1:
0.0013504388926401081
2:
0.2
1:
0.0176181602574

2:
0.14285714285714285
1:
0
2:
0
1:
0.0049488617617947876
2:
0.3333333333333333
1:
0
2:
0
1:
0.0020325203252032522
2:
0.2
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.00267379679144385
2:
0.1
1:
0
2:
0
1:
0.011565836298932384
2:
0.0625
1:
0.0053475935828877
2:
0.5
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.0018315018315018315
2:
0.005714285714285714
1:
0
2:
0
1:
0
2:
0
1:
0.012048192771084338
2:
0.004524886877828055
1:
0.004647560030983733
2:
0.25
1:
0
2:
0
1:
0
2:
0
1:
0.009900990099009901
2:
0.0020408163265306124
1:
0.003
2:
0.14285714285714285
1:
0.003105590062111801
2:
0.25
1:
0
2:
0
1:
0.0023148148148148147
2:
0.16666666666666666
1:
0.0014947683109118087
2:
0.14285714285714285
1:
0
2:
0
1:
0.0008285004142502071
2:
0.047619047619047616
1:
0.0021897810218978104
2:
0.16666666666666666
1:
0.001949317738791423
2:
0.14285714285714285
1:
0.0024429967426710096
2:
0.14285714285714285
1:
0
2:
0
1:
0.005380476556495004
2:
0.25
1:
0.0008288437629506838
2:
0.0125
1:
0.005642633228840125
2:
0.166666666

0.16666666666666666
1:
0
2:
0
1:
0
2:
0
1:
0.013676588897827836
2:
0.2
1:
0.003885003885003885
2:
0.2
1:
0.0009573958831977022
2:
0.043478260869565216
1:
0
2:
0
1:
0
2:
0
1:
0.005131494547787043
2:
0.25
1:
0.004048582995951417
2:
0.14285714285714285
1:
0.0019966722129783694
2:
0.027777777777777776
1:
0.01335113484646195
2:
0.08333333333333333
1:
0.005076142131979695
2:
0.2
1:
0.00047938638542665386
2:
0.0017006802721088435
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.004965243296921549
2:
0.125
1:
0
2:
0
1:
0.0008771929824561404
2:
0.2
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.004524886877828055
2:
0.019230769230769232
1:
0.01359977796280877
2:
0.5
1:
0.005399568034557235
2:
0.25
1:
0.0018281535648994515
2:
0.037037037037037035
1:
0
2:
0
1:
0.002365464222353637
2:
0.14285714285714285
1:
0
2:
0
1:
0.004405286343612335
2:
0.08333333333333333
1:
0
2:
0
1:
0.012773722627737226
2:
0.0625
1:
0.0017667844522968198
2:
0.01
1:
0
2:
0
1:
0
2:
0
1:
0.0011198208286674132
2:
0.00588235

0.5
1:
0.004048582995951417
2:
0.015625
1:
0.00047778308647873863
2:
0.002369668246445498
1:
0
2:
0
1:
0.0005652911249293386
2:
0.007936507936507936
1:
0.011976047904191617
2:
0.25
1:
0
2:
0
1:
0
2:
0
1:
0.003861003861003861
2:
0.014084507042253521
1:
0
2:
0
1:
0
2:
0
1:
0.0006825938566552901
2:
0.09090909090909091
1:
0
2:
0
1:
0.0019672131147540984
2:
0.16666666666666666
1:
0
2:
0
1:
0.013032581453634085
2:
0.25
1:
0
2:
0
1:
0.009218289085545723
2:
0.2
1:
0.0007971303308090873
2:
0.019230769230769232
1:
0
2:
0
1:
0.004629629629629629
2:
0.017857142857142856
1:
0.010341261633919338
2:
0.25
1:
0
2:
0
1:
0.0005973715651135006
2:
0.007751937984496124
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.014184397163120567
2:
0.1111111111111111
1:
0.0022038567493112946
2:
0.5
1:
0.008130081300813009
2:
0.034482758620689655
1:
0.013265704252828716
2:
0.5
1:
0.004219409282700422
2:
0.1111111111111111
1:
0.0020597322348094747
2:
0.01639344262295082
1:
0.003709714815673545
2:
0.25
1:
0.0026058631921824

2:
0.3333333333333333
1:
0
2:
0
1:
0.027777777777777776
2:
0.2
1:
0.006361323155216285
2:
0.14285714285714285
1:
0.0036809815950920245
2:
0.25
1:
0.004572299485616308
2:
0.3333333333333333
1:
0
2:
0
1:
0.004016064257028112
2:
0.1111111111111111
1:
0.00535475234270415
2:
0.3333333333333333
1:
0.002183406113537118
2:
0.16666666666666666
1:
0.0014527845036319612
2:
0.1111111111111111
1:
0.0008080808080808081
2:
0.038461538461538464
1:
0.0034522439585730723
2:
0.125
1:
0
2:
0
1:
0.004662004662004662
2:
0.2
1:
0.010752688172043012
2:
0.014705882352941176
1:
0.025657071339173967
2:
0.25
1:
0.008547008547008548
2:
0.1
1:
0
2:
0
1:
0.0013422818791946308
2:
0.0029585798816568047
1:
0
2:
0
1:
0
2:
0
1:
0.004032258064516129
2:
0.008695652173913044
1:
0
2:
0
1:
0.002311111111111111
2:
0.5
1:
0.0044742729306487695
2:
0.006993006993006993
1:
0
2:
0
1:
0
2:
0
1:
0.003281378178835111
2:
0.25
1:
0.01687289088863892
2:
0.16666666666666666
1:
0.002927400468384075
2:
0.5
1:
0.002807862013638187
2:
0.14285

2:
0.2
1:
0
2:
0
1:
0.0012195121951219512
2:
0.006289308176100629
1:
0.015562248995983935
2:
0.25
1:
0
2:
0
1:
0
2:
0
1:
0.006124234470691163
2:
0.3333333333333333
1:
0.007462686567164179
2:
0.023255813953488372
1:
0.001996007984031936
2:
0.07142857142857142
1:
0
2:
0
1:
0.004024144869215292
2:
0.05555555555555555
1:
0
2:
0
1:
0.004283965728274173
2:
0.16666666666666666
1:
0.012290502793296089
2:
0.25
1:
0
2:
0
1:
0.020915896081021578
2:
0.4
1:
0
2:
0
1:
0
2:
0
1:
0.002777777777777778
2:
0.5
1:
0
2:
0
1:
0
2:
0
1:
0.0009337068160597573
2:
0.07692307692307693
1:
0.005763688760806916
2:
0.034482758620689655
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.015329125338142471
2:
0.25
1:
0
2:
0
1:
0
2:
0
1:
0.003616636528028933
2:
0.022727272727272728
1:
0
2:
0
1:
0.005128205128205128
2:
0.006134969325153374
1:
0.002857142857142857
2:
0.08333333333333333
1:
0.0012315270935960591
2:
0.1111111111111111
1:
0
2:
0
1:
0.004243281471004243
2:
0.038461538461538464
1:
0
2:
0
1:
0
2:
0
1:
0.02

2:
0
1:
0.0042451578668081715
2:
0.3333333333333333
1:
0.00130718954248366
2:
0.16666666666666666
1:
0.003432796408766834
2:
0.5
1:
0.006887052341597796
2:
0.25
1:
0.012987012987012988
2:
0.00546448087431694
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.0017211703958691911
2:
0.005714285714285714
1:
0
2:
0
1:
0
2:
0
1:
0.0031466331025802393
2:
0.25
1:
0
2:
0
1:
0.00646551724137931
2:
0.02040816326530612
1:
0.00046948356807511736
2:
0.0136986301369863
1:
0.004591836734693878
2:
0.2
1:
0.000779423226812159
2:
0.05
1:
0.0007468259895444362
2:
0.007633587786259542
1:
0
2:
0
1:
0
2:
0
1:
0.017801590044936054
2:
0.5
1:
0.004807692307692308
2:
0.16666666666666666
1:
0.0036529680365296802
2:
0.25
1:
0
2:
0
1:
0
2:
0
1:
0.0019815059445178335
2:
0.14285714285714285
1:
0.0014388489208633094
2:
0.008695652173913044
1:
0
2:
0
1:
0.00881057268722467
2:
0.03571428571428571
1:
0.007023230686115613
2:
0.2
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.008778346744696415
2:
0.16666666666666666
1:
0.0185983

2:
0.047619047619047616
1:
0
2:
0
1:
0.0020768431983385254
2:
0.1
1:
0.0034812880765883376
2:
0.2
1:
0.0056179775280898875
2:
0.0064516129032258064
1:
0.0013679890560875513
2:
0.0016155088852988692
1:
0.0008203445447087777
2:
0.06666666666666667
1:
0.0003429355281207133
2:
0.125
1:
0
2:
0
1:
0.006557377049180328
2:
0.034482758620689655
1:
0.006952491309385863
2:
0.3333333333333333
1:
0
2:
0
1:
0
2:
0
1:
0.005192107995846314
2:
0.0625
1:
0
2:
0
1:
0
2:
0
1:
0.008600469116497263
2:
0.2
1:
0.0027247956403269754
2:
0.1111111111111111
1:
0.002379535990481856
2:
0.25
1:
0
2:
0
1:
0.000931098696461825
2:
0.25
1:
0.0014947683109118087
2:
0.05555555555555555
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.000594883997620464
2:
0.08333333333333333
1:
0
2:
0
1:
0.004545454545454545
2:
0.01694915254237288
1:
0.01753393665158371
2:
0.3333333333333333
1:
0
2:
0
1:
0.01282051282051282
2:
0.02702702702702703
1:
0.001201923076923077
2:
0.16666666666666666
1:
0
2:
0
1:
0.0025974025974025974
2:
0.

2:
0.017241379310344827
1:
0
2:
0
1:
0
2:
0
1:
0.005104408352668214
2:
0.25
1:
0
2:
0
1:
0.004
2:
0.25
1:
0.0016750418760469012
2:
0.05
1:
0.00410958904109589
2:
0.25
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.0029644268774703555
2:
0.16666666666666666
1:
0.0030911901081916537
2:
0.3333333333333333
1:
0.006772009029345372
2:
0.047619047619047616
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.007780659503519822
2:
0.25
1:
0
2:
0
1:
0.004971002485501243
2:
0.16666666666666666
1:
0
2:
0
1:
0
2:
0
1:
0
2:
0
1:
0.004833375731366065
2:
0.2857142857142857
1:
0.0027472527472527475
2:
0.14285714285714285
1:
0.001984126984126984
2:
0.02127659574468085
1:
0.0024096385542168677
2:
0.004739336492890996
1:
0.008849557522123894
2:
0.012195121951219513
1:
0
2:
0
1:
0.0026980150317980345
2:
0.3333333333333333
1:
0.0035714285714285713
2:
0.00546448087431694
1:
0
2:
0
1:
0.004050405040504051
2:
0.25
1:
0.004878048780487805
2:
0.010416666666666666
1:
0.0004051863857374392
2:
0.06666666666666667
1:
0.00345489443378119
2:
0.16

2. Classification

In [None]:
# import sklearn
# from sklearn import linear_model

In [None]:
# def build_train_set(dataset):
#     itemsPerUser = defaultdict(set)
#     usersPerItem=defaultdict(set)
#     for d in dataset:
#         user,item = d[0], d[1]
#         itemsPerUser[user].add(item)
#         usersPerItem[item].add(user)
#     train_set=[]
#     random.seed(50)
#     for d in dataset:
#         positive_entry=[d[0],d[1],1]
#         negative_entry_item_set=itemSet.difference(itemsPerUser[d[0]])
#         random_item=random.choice(list(negative_entry_item_set))
#         negative_entry=[d[0],random_item,0]
#         train_set.append(positive_entry)
#         train_set.append(negative_entry)
#     return train_set

In [None]:
# train_set=build_train_set(dataset)

In [None]:
# itemsPerUser = defaultdict(set)
# usersPerItem=defaultdict(set)
# for d in train_set:
#     user,item = d[0], d[1]
#     itemsPerUser[user].add(item)
#     usersPerItem[item].add(user)

# itemSet=set([d[1] for d in train_set])
# userSet=set([d[0] for d in train_set])

In [None]:
# # calculate most popular set in train_set
# recipeCount = defaultdict(int)
# totalCooked = 0
# for d in train_set:
#     recipeCount[d[1]] += 1
#     totalCooked += 1

# mostPopular = [(recipeCount[x], x) for x in recipeCount]
# mostPopular.sort()
# mostPopular.reverse()

# return1 = set()
# count = 0
# for ic, i in mostPopular:
#     count += ic
#     return1.add(i)
#     if count > totalCooked*0.6: break

In [None]:
# def feature(user, item):
#     feat = [1]
#     if item in return1:
#         feat.append(1)
#     else:
#         feat.append(0)    
#     maxSim=0
#     for d in itemsPerUser[user]:
#         sim=Jaccard(usersPerItem[d],usersPerItem[item])
#         maxSim=max(maxSim,sim)
#     feat.append(maxSim-0.01)
#     return feat

In [None]:
# X=[]
# y=[]

In [None]:
# for d in train_set:
#     user=d[0]
#     item=d[1]
#     X.append(feature(user,item))
#     y.append(d[2]==1)

In [None]:
# model = sklearn.linear_model.LogisticRegression()
# model.fit(X, y)

In [None]:
# X_test=[]

In [None]:
# predictions = open("predictions_Made.txt", 'w')
# for l in open("stub_Made.txt"):
#     if l.startswith("user_id"):
#         predictions.write(l)
#         continue
#     user,item = l.strip().split('-')
#     X_test.append(feature(user,item))
# y_predict=model.predict(X_test)

# i=0
# for l in open("stub_Made.txt"):
#     if l.startswith("user_id"):
#         continue
#     user,item = l.strip().split('-')
#     predict=y_predict[i]
#     if predict==True:
#         predictions.write(user + '-' + item + ",1\n")
#     else:
#         predictions.write(user + '-' + item + ",0\n")
#     i+=1
# predictions.close()
# print("predicting finished!")