# 레스토랑 메뉴 추천시스템
- 레스토랑 추천
- 메뉴 추천

In [1]:
from numpy import *
from numpy import linalg as la

In [2]:
def ecludSim(inA,inB):
    return 1.0/(1.0 + la.norm(inA - inB))

def pearsSim(inA,inB):
    if len(inA) < 3 : return 1.0
    return 0.5+0.5*corrcoef(inA, inB, rowvar = 0)[0][1]

def cosSim(inA,inB):
    num = float(inA.T*inB)
    denom = la.norm(inA)*la.norm(inB)
    return 0.5+0.5*(num/denom)

## 1. 맛보지 않은 음식 추천하기
- item-based Collaborative Filtering


1. 사용자가 아직 점수를 매기지 않은 item (값이 0인 것)
2. 위 1에 대해 예상 점수를 구함(item의 유사성 이용)
3. 목록을 내림차순으로 정렬, N개의 item 추천

In [3]:
def standEst(dataMat, user, simMeas, item):
    n = shape(dataMat)[1]
    simTotal = 0.0; ratSimTotal = 0.0
    for j in range(n):
        userRating = dataMat[user,j]
        if userRating == 0: continue
        # 두 사용자에 의해 점수가 매겨진 item 찾기, 0이 아닌 ([행], [열]) 배열
        overLap = nonzero(logical_and(dataMat[:,item].A>0, dataMat[:,j].A>0))[0]
        if len(overLap) == 0: similarity = 0
        else: similarity = simMeas(dataMat[overLap,item], dataMat[overLap,j])
        print('the %d and %d similarity is: %f' % (item, j, similarity))
        simTotal += similarity
        ratSimTotal += similarity * userRating
    if simTotal == 0: return 0
    else: return ratSimTotal/simTotal

In [4]:
def recommend(dataMat, user, N=3, simMeas=cosSim, estMethod=standEst):
    # 1. 점수를 매기지 않은 item 찾기
    unratedItems = nonzero(dataMat[user,:].A==0)[1]
    if len(unratedItems) == 0: return 'you rated everything'
    itemScores = []
    
    # 2. 점수를 매기지 않은 item의 유사도
    for item in unratedItems:
        estimatedScore = estMethod(dataMat, user, simMeas, item)
        itemScores.append((item, estimatedScore))
    
    return sorted(itemScores, key=lambda jj: jj[1], reverse=True)[:N]

In [5]:
def loadExData():
    return[[4, 4, 0, 2, 2],
           [4, 0, 0, 3, 3],
           [4, 0, 0, 1, 1],
           [1, 1, 1, 2, 0],
           [2, 2, 2, 0, 0],
           [5, 5, 5, 0, 0],
           [1, 1, 1, 0, 0]]

In [6]:
myMat = mat(loadExData())

In [7]:
import pdb
recommend(myMat,2)

the 1 and 0 similarity is: 1.000000
the 1 and 3 similarity is: 0.928746
the 1 and 4 similarity is: 1.000000
the 2 and 0 similarity is: 1.000000
the 2 and 3 similarity is: 1.000000
the 2 and 4 similarity is: 0.000000


[(2, 2.5), (1, 2.0243290220056256)]

## 2. SVD로 추천하기

In [14]:
def loadExData2():
    return[[2, 0, 0, 4, 4, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5],
           [0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0],
           [3, 3, 4, 0, 3, 0, 0, 2, 2, 0, 0],
           [5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 5, 0, 0, 5, 0],
           [4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 5],
           [0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4],
           [0, 0, 0, 0, 0, 0, 5, 0, 0, 5, 0],
           [0, 0, 0, 3, 0, 0, 0, 0, 4, 5, 0],
           [1, 1, 2, 1, 1, 2, 1, 0, 4, 5, 0]]

In [15]:
myMat2=mat(loadExData2())
myMat2

matrix([[2, 0, 0, 4, 4, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5],
        [0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0],
        [3, 3, 4, 0, 3, 0, 0, 2, 2, 0, 0],
        [5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 5, 0, 0, 5, 0],
        [4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 5],
        [0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4],
        [0, 0, 0, 0, 0, 0, 5, 0, 0, 5, 0],
        [0, 0, 0, 3, 0, 0, 0, 0, 4, 5, 0],
        [1, 1, 2, 1, 1, 2, 1, 0, 4, 5, 0]])

In [16]:
U,Sigma,VT=linalg.svd(myMat2)

In [17]:
Sigma

array([1.34342819e+01, 1.18190832e+01, 8.20176076e+00, 6.86912480e+00,
       5.29063022e+00, 3.91213561e+00, 2.94562509e+00, 2.35486137e+00,
       2.08702082e+00, 7.08715931e-01, 1.36040486e-16])

- 전체 제곱 특이값의 합에서 90%까지 특이값 선정

In [18]:
Sig2 = Sigma ** 2

In [19]:
sum(Sig2)

497.0

In [20]:
sum(Sig2)*.9

447.3

In [23]:
sum(Sig2[:5])

462.6151815287942

In [28]:
def svdEst(dataMat, user, simMeas, item):
    n = shape(dataMat)[1]
    simTotal = 0.0; ratSimTotal = 0.0
    U,Sigma,VT = la.svd(dataMat)
    Sig5 = mat(eye(5)*Sigma[:5]) #arrange Sig5 into a diagonal matrix
    xformedItems = dataMat.T * U[:,:5] * Sig5.I  #create transformed items
    for j in range(n):
        userRating = dataMat[user,j]
        if userRating == 0 or j==item: continue
        similarity = simMeas(xformedItems[item,:].T,\
                             xformedItems[j,:].T)
        print('the %d and %d similarity is: %f' % (item, j, similarity))
        simTotal += similarity
        ratSimTotal += similarity * userRating
    if simTotal == 0: return 0
    else: return ratSimTotal/simTotal

In [29]:
recommend(myMat2,2,estMethod=svdEst)

the 0 and 7 similarity is: 0.858762
the 0 and 9 similarity is: 0.443178
the 1 and 7 similarity is: 0.912336
the 1 and 9 similarity is: 0.509364
the 2 and 7 similarity is: 0.871203
the 2 and 9 similarity is: 0.516620
the 3 and 7 similarity is: 0.631617
the 3 and 9 similarity is: 0.548647
the 4 and 7 similarity is: 0.686696
the 4 and 9 similarity is: 0.448957
the 5 and 7 similarity is: 0.422636
the 5 and 9 similarity is: 0.647437
the 6 and 7 similarity is: 0.418756
the 6 and 9 similarity is: 0.724403
the 8 and 7 similarity is: 0.728176
the 8 and 9 similarity is: 0.738655
the 10 and 7 similarity is: 0.320211
the 10 and 9 similarity is: 0.489873


[(6, 2.9010549617145442), (5, 2.815120365684119), (10, 2.8141556010824655)]