In [1]:
import os
import zipfile
import csv

import requests

def get_data():
    return csv.DictReader((x for x in open("data/trim_fac/FacEneMar2020.csv",encoding = "utf-8-sig")), delimiter=";",)

def get_ratings():
    return get_data()

def get_book_features():
    return get_data()

In [2]:
import json
from itertools import islice

ratings = get_data()

In [3]:
for line in islice(ratings, 2):
    print(json.dumps(line, indent=4))

{
    "NIT": "1000183234",
    "ARTCOD": "173164",
    "NUM_VECES_COMP": "1",
    "TRIMESTRE": "1"
}
{
    "NIT": "1000183234",
    "ARTCOD": "173203",
    "NUM_VECES_COMP": "1",
    "TRIMESTRE": "1"
}


In [4]:
from lightfm.data import Dataset

dataset = Dataset()
dataset.fit((x['NIT'] for x in get_ratings()),
            (x['ARTCOD'] for x in get_ratings()))

In [5]:
num_users, num_items = dataset.interactions_shape()
print('Num users: {}, num_items {}.'.format(num_users, num_items))

Num users: 11875, num_items 3384.


In [6]:
(interactions, weights) = dataset.build_interactions(((x['NIT'], x['ARTCOD'],float(x['NUM_VECES_COMP']))
                                                      for x in get_ratings()))

print(repr(interactions))
print(repr(weights))

<11875x3384 sparse matrix of type '<class 'numpy.int32'>'
	with 651552 stored elements in COOrdinate format>
<11875x3384 sparse matrix of type '<class 'numpy.float32'>'
	with 651552 stored elements in COOrdinate format>


In [7]:
print(interactions)

  (0, 0)	1
  (0, 1)	1
  (0, 2)	1
  (0, 3)	1
  (0, 4)	1
  (0, 5)	1
  (0, 6)	1
  (0, 7)	1
  (0, 8)	1
  (0, 9)	1
  (0, 10)	1
  (0, 11)	1
  (0, 12)	1
  (0, 13)	1
  (0, 14)	1
  (0, 15)	1
  (0, 16)	1
  (0, 17)	1
  (0, 18)	1
  (0, 19)	1
  (0, 20)	1
  (0, 21)	1
  (0, 22)	1
  (0, 23)	1
  (0, 24)	1
  :	:
  (11874, 165)	1
  (11874, 63)	1
  (11874, 480)	1
  (11874, 1272)	1
  (11874, 577)	1
  (11874, 472)	1
  (11874, 148)	1
  (11874, 2148)	1
  (11874, 557)	1
  (11874, 71)	1
  (11874, 164)	1
  (11874, 589)	1
  (11874, 360)	1
  (11874, 588)	1
  (11874, 392)	1
  (11874, 2508)	1
  (11874, 1810)	1
  (11874, 359)	1
  (11874, 1114)	1
  (11874, 1550)	1
  (11874, 173)	1
  (11874, 409)	1
  (11874, 852)	1
  (11874, 479)	1
  (11874, 203)	1


In [8]:
print(weights)

  (0, 0)	1.0
  (0, 1)	1.0
  (0, 2)	1.0
  (0, 3)	1.0
  (0, 4)	1.0
  (0, 5)	1.0
  (0, 6)	1.0
  (0, 7)	1.0
  (0, 8)	1.0
  (0, 9)	1.0
  (0, 10)	1.0
  (0, 11)	1.0
  (0, 12)	1.0
  (0, 13)	1.0
  (0, 14)	1.0
  (0, 15)	1.0
  (0, 16)	1.0
  (0, 17)	1.0
  (0, 18)	1.0
  (0, 19)	1.0
  (0, 20)	1.0
  (0, 21)	1.0
  (0, 22)	1.0
  (0, 23)	1.0
  (0, 24)	1.0
  :	:
  (11874, 165)	2.0
  (11874, 63)	2.0
  (11874, 480)	3.0
  (11874, 1272)	1.0
  (11874, 577)	6.0
  (11874, 472)	2.0
  (11874, 148)	4.0
  (11874, 2148)	1.0
  (11874, 557)	1.0
  (11874, 71)	1.0
  (11874, 164)	1.0
  (11874, 589)	4.0
  (11874, 360)	1.0
  (11874, 588)	4.0
  (11874, 392)	2.0
  (11874, 2508)	2.0
  (11874, 1810)	1.0
  (11874, 359)	3.0
  (11874, 1114)	1.0
  (11874, 1550)	7.0
  (11874, 173)	4.0
  (11874, 409)	7.0
  (11874, 852)	1.0
  (11874, 479)	3.0
  (11874, 203)	3.0


In [9]:
from lightfm import LightFM

model = LightFM(loss='bpr')
model.fit(weights)

<lightfm.lightfm.LightFM at 0x7fe4e77f40d0>

In [12]:
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score

train_precision = precision_at_k(model, weights, k=3).mean()

print('Precision: train %.2f' % (train_precision))

Precision: train 0.51


In [1]:
import os
import zipfile
import csv

dict_csv1 = csv.DictReader((x for x in open("data/NcAbr2020.csv",encoding = "utf-8-sig")), delimiter=";",)
dict_csv2 = csv.DictReader((x for x in open("data/FacAgo2020_f.csv",encoding = "utf-8-sig")), delimiter=";",)

In [2]:
import json
from itertools import islice

for line in islice(dict_csv1, 2):
    print(json.dumps(line, indent=4))

{
    "\ufeff1000805531": "1002807191",
    "135033": "108298",
    "1": "1"
}
{
    "\ufeff1000805531": "1002807191",
    "135033": "151321",
    "1": "1"
}


In [13]:
from lightfm.data import Dataset

dataset = Dataset()
dataset.fit((x['User-ID'] for x in dict_csv1),
            (x['PRODUCT-ID'] for x in dict_csv1))

In [14]:
num_users, num_items = dataset1.interactions_shape()
print('Num users: {}, num_items {}.'.format(num_users, num_items))

Num users: 6608, num_items 0.


In [10]:
(print(x) for x in dict_csv1)

<generator object <genexpr> at 0x7f2626e3c8d0>