In [1]:
import os
import zipfile
import csv

import requests

def get_data():
    return csv.DictReader((x for x in open("data/trim_fac/FacEneMar2020.csv",encoding = "utf-8-sig")), delimiter=";",)

def get_ratings():
    return get_data()

def get_book_features():
    return get_data()

In [2]:
import json
from itertools import islice

ratings = get_data()

In [3]:
for line in islice(ratings, 2):
    print(json.dumps(line, indent=4))

{
    "NIT": "1000183234",
    "ARTCOD": "173164",
    "NUM_VECES_COMP": "1",
    "TRIMESTRE": "1"
}
{
    "NIT": "1000183234",
    "ARTCOD": "173203",
    "NUM_VECES_COMP": "1",
    "TRIMESTRE": "1"
}


In [4]:
from lightfm.data import Dataset

dataset = Dataset()
dataset.fit((x['NIT'] for x in get_ratings()),
            (x['ARTCOD'] for x in get_ratings()))

In [5]:
num_users, num_items = dataset.interactions_shape()
print('Num users: {}, num_items {}.'.format(num_users, num_items))

Num users: 11875, num_items 3384.


In [6]:
dataset.fit_partial(items=(x['ARTCOD'] for x in get_ratings()),
                    item_features=(x['NUM_VECES_COMP'] for x in get_ratings()))

In [7]:
(interactions, weights) = dataset.build_interactions(((x['NIT'], x['ARTCOD'],float(x['NUM_VECES_COMP']))
                                                      for x in get_ratings()))

print(repr(interactions))

<11875x3384 sparse matrix of type '<class 'numpy.int32'>'
	with 651552 stored elements in COOrdinate format>


In [8]:
print(weights)

  (0, 0)	1.0
  (0, 1)	1.0
  (0, 2)	1.0
  (0, 3)	1.0
  (0, 4)	1.0
  (0, 5)	1.0
  (0, 6)	1.0
  (0, 7)	1.0
  (0, 8)	1.0
  (0, 9)	1.0
  (0, 10)	1.0
  (0, 11)	1.0
  (0, 12)	1.0
  (0, 13)	1.0
  (0, 14)	1.0
  (0, 15)	1.0
  (0, 16)	1.0
  (0, 17)	1.0
  (0, 18)	1.0
  (0, 19)	1.0
  (0, 20)	1.0
  (0, 21)	1.0
  (0, 22)	1.0
  (0, 23)	1.0
  (0, 24)	1.0
  :	:
  (11874, 165)	2.0
  (11874, 63)	2.0
  (11874, 480)	3.0
  (11874, 1272)	1.0
  (11874, 577)	6.0
  (11874, 472)	2.0
  (11874, 148)	4.0
  (11874, 2148)	1.0
  (11874, 557)	1.0
  (11874, 71)	1.0
  (11874, 164)	1.0
  (11874, 589)	4.0
  (11874, 360)	1.0
  (11874, 588)	4.0
  (11874, 392)	2.0
  (11874, 2508)	2.0
  (11874, 1810)	1.0
  (11874, 359)	3.0
  (11874, 1114)	1.0
  (11874, 1550)	7.0
  (11874, 173)	4.0
  (11874, 409)	7.0
  (11874, 852)	1.0
  (11874, 479)	3.0
  (11874, 203)	3.0


In [9]:
item_features = dataset.build_item_features(((x['ARTCOD'], [x['NUM_VECES_COMP']]) 
                                             for x in get_ratings()))
print(repr(item_features))

<3384x3514 sparse matrix of type '<class 'numpy.float32'>'
	with 26530 stored elements in Compressed Sparse Row format>


In [10]:
print(item_features)

  (0, 0)	0.09090909
  (0, 3384)	0.6363636
  (0, 3385)	0.09090909
  (0, 3386)	0.18181819
  (1, 1)	0.1
  (1, 3384)	0.8
  (1, 3386)	0.1
  (2, 2)	0.024390243
  (2, 3384)	0.6097561
  (2, 3385)	0.07317073
  (2, 3386)	0.14634146
  (2, 3390)	0.048780486
  (2, 3392)	0.048780486
  (2, 3394)	0.024390243
  (2, 3395)	0.024390243
  (3, 3)	0.0068965517
  (3, 3384)	0.7034483
  (3, 3385)	0.06896552
  (3, 3386)	0.18620689
  (3, 3390)	0.0068965517
  (3, 3391)	0.0068965517
  (3, 3392)	0.020689655
  (4, 4)	0.14285715
  (4, 3384)	0.85714287
  (5, 5)	0.003875969
  :	:
  (3371, 3384)	0.5
  (3372, 3372)	0.5
  (3372, 3384)	0.5
  (3373, 3373)	0.5
  (3373, 3384)	0.5
  (3374, 3374)	0.5
  (3374, 3384)	0.5
  (3375, 3375)	0.5
  (3375, 3384)	0.5
  (3376, 3376)	0.5
  (3376, 3384)	0.5
  (3377, 3377)	0.5
  (3377, 3384)	0.5
  (3378, 3378)	0.5
  (3378, 3384)	0.5
  (3379, 3379)	0.5
  (3379, 3384)	0.5
  (3380, 3380)	0.5
  (3380, 3384)	0.5
  (3381, 3381)	0.5
  (3381, 3384)	0.5
  (3382, 3382)	0.5
  (3382, 3384)	0.5
  (3383, 33

In [11]:
from lightfm import LightFM

model = LightFM(loss='bpr')
model.fit(interactions, item_features=item_features)

<lightfm.lightfm.LightFM at 0x7f4ff0955490>