In [21]:
import numpy as np

from lightfm.datasets import fetch_movielens

movielens = fetch_movielens(min_rating = 3.0)

In [25]:
for key, value in movielens.items():
    print(key, type(value), value.shape)

train <class 'scipy.sparse.coo.coo_matrix'> (943, 1682)
test <class 'scipy.sparse.coo.coo_matrix'> (943, 1682)
item_features <class 'scipy.sparse.csr.csr_matrix'> (1682, 1682)
item_feature_labels <class 'numpy.ndarray'> (1682,)
item_labels <class 'numpy.ndarray'> (1682,)


In [26]:
train = movielens['train']
test = movielens['test']
item_features = movielens['item_features']

In [23]:
for key, value in movielens.items():
    print(key, value)

train   (0, 0)	5
  (0, 1)	3
  (0, 2)	4
  (0, 3)	3
  (0, 4)	3
  (0, 5)	5
  (0, 6)	4
  (0, 8)	5
  (0, 9)	3
  (0, 11)	5
  (0, 12)	5
  (0, 13)	5
  (0, 14)	5
  (0, 15)	5
  (0, 16)	3
  (0, 17)	4
  (0, 18)	5
  (0, 21)	4
  (0, 22)	4
  (0, 23)	3
  (0, 24)	4
  (0, 25)	3
  (0, 27)	4
  (0, 29)	3
  (0, 30)	3
  :	:
  (942, 613)	5
  (942, 624)	3
  (942, 654)	4
  (942, 671)	5
  (942, 684)	4
  (942, 716)	4
  (942, 720)	5
  (942, 721)	3
  (942, 731)	4
  (942, 738)	4
  (942, 762)	4
  (942, 764)	3
  (942, 793)	3
  (942, 795)	3
  (942, 815)	4
  (942, 823)	4
  (942, 824)	3
  (942, 839)	4
  (942, 927)	5
  (942, 942)	5
  (942, 1043)	3
  (942, 1073)	4
  (942, 1187)	3
  (942, 1227)	3
  (942, 1329)	3
test   (0, 19)	4
  (0, 32)	4
  (0, 60)	4
  (0, 116)	3
  (0, 159)	4
  (0, 170)	5
  (0, 188)	3
  (0, 201)	5
  (0, 264)	4
  (1, 12)	4
  (1, 49)	5
  (1, 250)	5
  (1, 279)	3
  (1, 280)	3
  (1, 289)	3
  (1, 291)	4
  (1, 296)	4
  (1, 311)	3
  (2, 327)	5
  (2, 330)	4
  (2, 333)	3
  (2, 342)	3
  (3, 49)	5
  (3, 259)	4
  (3, 

In [7]:
from lightfm import LightFM
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score

model = LightFM(learning_rate=0.05, loss='bpr')
model.fit(train, epochs=10)

train_precision = precision_at_k(model, train, k=10).mean()
test_precision = precision_at_k(model, test, k=10).mean()

train_auc = auc_score(model, train).mean()
test_auc = auc_score(model, test).mean()

print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))
print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))

Precision: train 0.30, test 0.03.
AUC: train 0.83, test 0.74.


In [27]:
model = LightFM(learning_rate=0.05, loss='warp')

model.fit_partial(train, item_features=item_features, epochs=10)

train_precision = precision_at_k(model, train, k=10).mean()
test_precision = precision_at_k(model, test, k=10).mean()

train_auc = auc_score(model, train).mean()
test_auc = auc_score(model, test).mean()

print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))
print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))

Precision: train 0.57, test 0.10.
AUC: train 0.94, test 0.91.


In [13]:
def sample_recommendation(model, data, user_ids):


    n_users, n_items = data['train'].shape

    for user_id in user_ids:
        known_positives = data['item_labels'][data['train'].tocsr()[user_id].indices]

        scores = model.predict(user_id, np.arange(n_items))
        top_items = data['item_labels'][np.argsort(-scores)]

        print("User %s" % user_id)
        print("     Known positives:")

        for x in known_positives[:3]:
            print("        %s" % x)

        print("     Recommended:")

        for x in top_items[:5]:
            print("        %s" % x)

In [15]:
sample_recommendation(model, movielens, [10, 150, 35])

User 10
     Known positives:
        Dead Man Walking (1995)
        Mr. Holland's Opus (1995)
        Apollo 13 (1995)
        Much Ado About Nothing (1993)
        Princess Bride, The (1987)
     Recommended:
        Fargo (1996)
        Godfather, The (1972)
        Schindler's List (1993)
        Star Wars (1977)
        English Patient, The (1996)
User 150
     Known positives:
        Toy Story (1995)
        Get Shorty (1995)
        Postino, Il (1994)
        Desperado (1995)
        Star Wars (1977)
     Recommended:
        Casablanca (1942)
        Schindler's List (1993)
        Amadeus (1984)
        Star Wars (1977)
        One Flew Over the Cuckoo's Nest (1975)
User 35
     Known positives:
        Air Bud (1997)
        Spawn (1997)
        That Darn Cat! (1997)
        Lay of the Land, The (1997)
     Recommended:
        Air Force One (1997)
        My Best Friend's Wedding (1997)
        Kiss the Girls (1997)
        Postman, The (1997)
        G.I. Jane (1997)


In [1]:
import sqlite3
import pandas as pd
from scipy.sparse import coo_matrix
import numpy as np
conn = sqlite3.connect("jjmovie.db")
c = conn.cursor()
c.execute("PRAGMA table_info(Movies)")
c.fetchall()

[(0, 'MovieId', 'INTEGER', 1, None, 1),
 (1, 'Title', 'TEXT', 1, None, 0),
 (2, 'Price', 'REAL', 1, None, 0),
 (3, 'Adult', 'NUMERIC', 0, None, 0),
 (4, 'Budget', 'INTEGER', 0, None, 0),
 (5, 'ImdbId', 'INTEGER', 0, None, 0),
 (6, 'Popularity', 'INTEGER', 0, None, 0),
 (7, 'ReleaseDate', 'NUMERIC', 0, None, 0),
 (8, 'Revenue', 'INTEGER', 0, None, 0),
 (9, 'Runtime', 'INTEGER', 0, None, 0),
 (10, 'VoteAverage', 'REAL', 0, None, 0),
 (11, 'VoteCount', 'INTEGER', 0, None, 0)]

In [73]:
movies = pd.read_sql_query("SELECT MovieId FROM Movies ", conn)
genres = pd.read_sql_query("SELECT GenreId FROM Genres ", conn)
tbl = pd.read_sql_query("SELECT m.MovieId, mg.GenreId FROM Movies AS m LEFT JOIN MovieGenres AS mg ON m.MovieId=mg.MovieId ", conn)
tbl2 = tbl.pivot(columns="GenreId")

In [91]:
row = movies['MovieId'].values - 1
col = genres['GenreId'].values - 1

In [74]:
movies.head()

Unnamed: 0,MovieId
0,2
1,3
2,5
3,11
4,12


In [75]:
i = 0
id = movies.loc[i, 'MovieId']
print(str(id))

2


In [78]:
length1 = len(movies.index)
print(str(length1))

43467


In [79]:
length2 = len(genres.index)
print(str(length2))

20


In [80]:
d = pd.DataFrame(np.zeros((length1, length2)))
d.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [83]:
length3 = len(tbl.index)-89000
print(str(length3))

685


In [90]:
for i in range(length3):
    a = int(tbl.loc[i, 'MovieId'])-1
    b = int(tbl.loc[i, 'GenreId'])-1
    print("Movie: " + str(a) + " ,Genre: " + str(b))

Movie: 1 ,Genre: 17
Movie: 1 ,Genre: 79
Movie: 2 ,Genre: 17
Movie: 2 ,Genre: 34
Movie: 4 ,Genre: 34
Movie: 4 ,Genre: 79
Movie: 10 ,Genre: 11
Movie: 10 ,Genre: 27
Movie: 10 ,Genre: 877
Movie: 11 ,Genre: 15
Movie: 11 ,Genre: 10750
Movie: 12 ,Genre: 17
Movie: 12 ,Genre: 34
Movie: 12 ,Genre: 10748
Movie: 13 ,Genre: 17
Movie: 14 ,Genre: 17
Movie: 14 ,Genre: 9647
Movie: 15 ,Genre: 17
Movie: 15 ,Genre: 79
Movie: 15 ,Genre: 10401
Movie: 16 ,Genre: 26
Movie: 16 ,Genre: 52
Movie: 16 ,Genre: 9647
Movie: 17 ,Genre: 11
Movie: 17 ,Genre: 13
Movie: 17 ,Genre: 27
Movie: 17 ,Genre: 52
Movie: 17 ,Genre: 877
Movie: 18 ,Genre: 17
Movie: 18 ,Genre: 877
Movie: 19 ,Genre: 17
Movie: 19 ,Genre: 10748
Movie: 20 ,Genre: 98
Movie: 21 ,Genre: 11
Movie: 21 ,Genre: 13
Movie: 21 ,Genre: 27
Movie: 23 ,Genre: 27
Movie: 23 ,Genre: 79
Movie: 24 ,Genre: 17
Movie: 24 ,Genre: 10751
Movie: 25 ,Genre: 17
Movie: 26 ,Genre: 17
Movie: 26 ,Genre: 10401
Movie: 26 ,Genre: 10748
Movie: 27 ,Genre: 17
Movie: 27 ,Genre: 10751
Movie: 29

In [70]:
d.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3457,3458,3459,3460,3461,3462,3463,3464,3465,3466
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
for i in range(dlugosc):
    genre = tbl.loc[i, 'GenreId']
    for j in range(dlugosc):
        if tbl.loc[j, 'GenreId']== genre:
            a = tbl.loc[i, 'MovieId']-1
            b = tbl.loc[j, 'MovieId']-1
            d.iloc[[i], [i]] = 1
            d.iloc[[a], [b]] = 1