In [1]:
# 라이브러리 임포트
import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegression

In [2]:
# Collaborative filtering을 위한 예제 데이터 생성
UserItemMatrix = np.array([np.array([5, np.nan, 4, np.nan, 1, np.nan, 3]),
                           np.array([4, 4, 4, np.nan, np.nan, np.nan, 1]),
                           np.array([5, 4, np.nan, 1, 2, np.nan, 3]),
                           np.array([1, 2, 1, 4, 3, 5, 2]),
                           np.array([np.nan, 1, np.nan, 3, 5, 5, np.nan]),
                           np.array([np.nan, 2, np.nan, np.nan, 4, 4, 2]),
                           np.array([5, np.nan, np.nan, 1, np.nan, np.nan, 2])
                          ])

In [3]:
# User-Item matrix
df = pd.DataFrame(UserItemMatrix, 
                  columns=['item1', 'item2', 'item3', 'item4', 'item5', 'item6', 'item7'])
df['user_id'] = list(df.index)
df

Unnamed: 0,item1,item2,item3,item4,item5,item6,item7,user_id
0,5.0,,4.0,,1.0,,3.0,0
1,4.0,4.0,4.0,,,,1.0,1
2,5.0,4.0,,1.0,2.0,,3.0,2
3,1.0,2.0,1.0,4.0,3.0,5.0,2.0,3
4,,1.0,,3.0,5.0,5.0,,4
5,,2.0,,,4.0,4.0,2.0,5
6,5.0,,,1.0,,,2.0,6


In [6]:
df2 = df.fillna(0, inplace=False)

In [7]:
df.fillna(0, inplace=True)

In [8]:
df

Unnamed: 0,item1,item2,item3,item4,item5,item6,item7,user_id
0,5.0,0.0,4.0,0.0,1.0,0.0,3.0,0
1,4.0,4.0,4.0,0.0,0.0,0.0,1.0,1
2,5.0,4.0,0.0,1.0,2.0,0.0,3.0,2
3,1.0,2.0,1.0,4.0,3.0,5.0,2.0,3
4,0.0,1.0,0.0,3.0,5.0,5.0,0.0,4
5,0.0,2.0,0.0,0.0,4.0,4.0,2.0,5
6,5.0,0.0,0.0,1.0,0.0,0.0,2.0,6


In [27]:
df.drop(columns='user_id', inplace=True)

In [28]:
uimat = df.values
uimat

array([[5., 0., 4., 0., 1., 0., 3.],
       [4., 4., 4., 0., 0., 0., 1.],
       [5., 4., 0., 1., 2., 0., 3.],
       [1., 2., 1., 4., 3., 5., 2.],
       [0., 1., 0., 3., 5., 5., 0.],
       [0., 2., 0., 0., 4., 4., 2.],
       [5., 0., 0., 1., 0., 0., 2.]])

In [29]:
uimat[uimat > 0] = 1

In [30]:
uimat

array([[1., 0., 1., 0., 1., 0., 1.],
       [1., 1., 1., 0., 0., 0., 1.],
       [1., 1., 0., 1., 1., 0., 1.],
       [1., 1., 1., 1., 1., 1., 1.],
       [0., 1., 0., 1., 1., 1., 0.],
       [0., 1., 0., 0., 1., 1., 1.],
       [1., 0., 0., 1., 0., 0., 1.]])

In [31]:
n_user, n_item = uimat.shape

In [23]:
np.setdiff1d(np.arange(n_item), 1)

array([0, 2, 3, 4, 5, 6, 7])

In [32]:
models = []
for i in range(n_item):
    indep_col = np.setdiff1d(np.arange(n_item), i)
    dep_col = i
    
    x = uimat[:, indep_col]
    y = uimat[:, dep_col]
    
    lr = LogisticRegression().fit(x, y)
    models.append(lr)



In [33]:
uimat_pred = np.zeros(uimat.shape)
uimat_pred

array([[0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.]])

In [37]:
i = 1
indep_col = np.setdiff1d(np.arange(n_item), i)
    
x = uimat[:, indep_col]

models[i].predict(x)

array([1., 1., 1., 1., 1., 1., 1.])

In [38]:
for i in range(n_item):
    indep_col = np.setdiff1d(np.arange(n_item), i)
    
    x = uimat[:, indep_col]
    
    uimat_pred[:, i] = models[i].predict(x)

print(uimat_pred)

[[1. 1. 1. 0. 1. 0. 1.]
 [1. 1. 1. 0. 1. 0. 1.]
 [1. 1. 0. 1. 1. 0. 1.]
 [1. 1. 0. 1. 1. 0. 1.]
 [0. 1. 0. 1. 1. 1. 1.]
 [0. 1. 0. 1. 1. 1. 1.]
 [1. 1. 0. 1. 1. 0. 1.]]
