In [1]:
import os, csv, zipfile, json
import pandas as pd
from lightfm import LightFM
from itertools import islice



In [2]:
def get_data():
    return(
        csv.DictReader(
        (x for x in open('data/User-Features.csv')), delimiter = ","
        ),
        csv.DictReader(
        (x for x in open('data/Item-Features.csv')), delimiter = ","
        ),
        csv.DictReader(
        (x for x in open('data/Ratings.csv')), delimiter = ","
        )
    )

def get_user_data():
    return get_data()[0]

def get_item_data():
    return get_data()[1]

def get_ratings():
    return get_data()[2]

In [None]:
print(csv.DictReader(x for x in open('data/Ratings.csv')))

In [3]:
user_features, item_features, ratings = get_data()

# Visualize the Dataset

In [4]:
print("_______________________USER FEATURES_________________________")
for line in islice(user_features, 3):
    print(json.dumps(line, indent = 4))
    print("_____________________________")
    
print("______________________ITEM FEATURES_________________________")
for line in islice(item_features, 3):
    print(json.dumps(line, indent = 4))
    print("___________________________")
    
print("__________________________RATINGS____________________________")
for line in islice(ratings, 3):
    print(json.dumps(line, indent = 4))
    print("_____________________________")

_______________________USER FEATURES_________________________
{
    "User_ID": "0",
    "Age": "18"
}
_____________________________
{
    "User_ID": "1",
    "Age": "18"
}
_____________________________
{
    "User_ID": "2",
    "Age": "18"
}
_____________________________
______________________ITEM FEATURES_________________________
{
    "Item_ID": "1",
    "Names": "Banaras Sada Paan",
    "Banaras": "1.0",
    "Calcutta": "",
    "Maghai": "",
    "Sada": "1.0",
    "Meetha": "",
    "Chocolate": "",
    "Dry Fruit": "",
    "Mango": "",
    "Strawberry": "",
    "Pineapple": "",
    "Gold": "",
    "Kaju": "",
    "Jelly": "",
    "Rose": "",
    "Shahi": "",
    "Kesar": "",
    "Vanilla": "",
    "Masala": "1.0",
    "Khatta": "1.0",
    "Orange": "",
    "White": "",
    "Silver": "",
    "RaatRani": "",
    "Nutella": "",
    "Special": ""
}
___________________________
{
    "Item_ID": "2",
    "Names": "Calcutta Sada Paan",
    "Banaras": "",
    "Calcutta": "1.0",
    "Maghai":

In [5]:
import pandas as pd
df = pd.read_csv('data/Item-Features.csv')

# item_features = []
# for i in df.columns:
#     if i != 'Item_ID' and i != 'Names':
#         item_features.append(i)
df

Unnamed: 0,Item_ID,Names,Banaras,Calcutta,Maghai,Sada,Meetha,Chocolate,Dry Fruit,Mango,...,Kesar,Vanilla,Masala,Khatta,Orange,White,Silver,RaatRani,Nutella,Special
0,1,Banaras Sada Paan,1.0,,,1.0,,,,,...,,,1.0,1.0,,,,,,
1,2,Calcutta Sada Paan,,1.0,,1.0,,,,,...,,,1.0,1.0,,,,,,
2,3,Banaras Meetha Paan,1.0,,,,1.0,,,,...,,,,,,,,,,
3,4,Calcutta Meetha Paan,,1.0,,,1.0,,,,...,,,,,,,,,,
4,5,Maghai Meetha Paan,,,1.0,,1.0,,,,...,,,,,,,,,,
5,6,Calcutta Chocolate Paan,,1.0,,,1.0,1.0,,,...,,,,,,,,,,
6,7,Maghai Chocolate Paan,,,1.0,,1.0,1.0,,,...,,,,,,,,,,
7,8,Calcutta Dry Fruit Paan,,1.0,,1.0,,,1.0,,...,,,,,,,,,,
8,9,Maghai Dry Fruit Paan,,,1.0,1.0,,,1.0,,...,,,,,,,,,,
9,10,Calcutta Mango Paan,,1.0,,,1.0,,,1.0,...,,,,,,,,,,


# BUILD ID MAPPINGS

In [6]:
# create a dataset
from lightfm import LightFM
from lightfm.data import Dataset

dataset = Dataset()

dataset.fit((x['User_ID'] for x in get_ratings()),
           (x['Item_ID'] for x in get_ratings()))

In [7]:
num_users, num_items = dataset.interactions_shape()
print("Num Users: {}, Num Items: {}".format(num_users, num_items))

Num Users: 15, Num Items: 50


In [8]:
dataset.fit_partial(items=(x['Item_ID'] for x in get_item_data()),
                    users = (x['User_ID'] for x in get_user_data()),
                    item_features=(x['Calcutta'] for x in get_item_data()),
                    user_features=(x['Age'] for x in get_user_data()))


In [9]:
dataset.fit_partial(items = (x['Item_ID'] for x in get_item_data()),
                   item_features = (x['Banaras'] for x in get_item_data()))

In [10]:
dataset.fit_partial(users = (x['User_ID'] for x in get_user_data()),
                   user_features=(x['Age'] for x in get_user_data()))

# BUILD INTERACTIONS

In [11]:
(interactions, weights) = dataset.build_interactions(((x['User_ID'], x['Item_ID']) for x in get_ratings()))
print(repr(interactions))

<17x50 sparse matrix of type '<class 'numpy.int32'>'
	with 250 stored elements in COOrdinate format>


In [12]:
item_features = dataset.build_item_features(((x['Item_ID'], [x['Calcutta'], x['Banaras']])
                                              for x in get_item_data()))

In [13]:
user_features = dataset.build_user_features(((x['User_ID'], [x['Age']])
                                              for x in get_user_data()))

In [14]:
from lightfm import LightFM
from lightfm.cross_validation import random_train_test_split



model = LightFM(loss='warp')
model.fit(interactions, item_features=item_features, user_features=user_features)

<lightfm.lightfm.LightFM at 0x217f0dc99c8>

In [15]:
import numpy as np

labels = np.array([x['Item_ID'] for x in get_ratings()])
labels.shape

(250,)

In [16]:
positives = []
recommended = []
def sample_recommendation(model, data, user_ids):
    n_users, n_items = data.shape
    for user_id in user_ids:
        known_positives = labels[data.tocsr()[user_id].indices]
        
        scores = model.predict(user_id, np.arange(n_items))
        
        top_items = labels[np.argsort(-scores)]
        
        print("USER ID: {}".format(user_id))
        print("   Known Positives:")
        
        for x in known_positives[:3]:
            positives.append(x)
            print("      {}".format(x))
            
        print("   Recommmended:")
        
        for x in top_items[:3]:
            recommended.append(x)
            print("      {}".format(x))
            
        positives.append(user_id)
        recommended.append(user_id)
    return positives, recommended

In [17]:
sample_recommendation(model, interactions, [1, 3, 4])

USER ID: 1
   Known Positives:
      1
      7
      1
   Recommmended:
      1
      7
      2
USER ID: 3
   Known Positives:
      1
      5
      7
   Recommmended:
      7
      1
      2
USER ID: 4
   Known Positives:
      1
      46
   Recommmended:
      1
      7
      2


(['1', '7', '1', 1, '1', '5', '7', 3, '1', '46', 4],
 ['1', '7', '2', 1, '7', '1', '2', 3, '1', '7', '2', 4])