# 감성 점수를 기반으로 선수 추천 시스템 개발
KNU 감성사전에서 도출한 댓글의 감성 점수를 통해 새로운 감성 점수 변수를 만들고 이를 기반으로 새로운 평점 행렬을 만들었다. 유사한 선수를 추천해주는 알고리즘은 KNN을 사용하여 사용자가 함수의
인자에 원하는 선수를 입력하면 4명의 관련 선수를 추천해주는 시스템이다.


In [1]:
import numpy as np
import pandas as pd

In [2]:
DATA_PATH = '..../AI_camp/Section4/project/data/'

In [3]:
df = pd.read_csv(f'{DATA_PATH}sentiment_scores.csv')
df.tail()

Unnamed: 0,userId,player_name,position,comment,rating,score
199876,클롭광팬,박지수,df,선수체감 자체가 엄청 가볍고 민첩함,4.0,1
199877,클롭광팬,박지수,df,주력이랑 방향전환 모두 빨라가지고 수비커버 범위도 넓게 가져갈수 있음.,4.0,0
199878,클롭광팬,박지수,df,5카 기준 2천만bp 정도로 대장급 공격수들을 스피드로 제압가능.. 우리집 센터백은...,4.0,-1
199879,레알의라장군,카르바할,df,좋기만한데....,4.04,0
199880,자기엘카,조 고메스,df,단점 헤딩 실존이랑 똑같음,3.83,0


In [72]:
df['score'].describe()

count    199881.000000
mean          0.456422
std           1.530735
min         -11.000000
25%           0.000000
50%           0.000000
75%           0.000000
max          44.000000
Name: score, dtype: float64

In [74]:
from sklearn.preprocessing import MinMaxScaler
s = MinMaxScaler()

# 감성점수 정규화
df['new_score'] = s.fit_transform(df[['score']])
df.tail()

Unnamed: 0,userId,player_name,position,comment,rating,score,new_score
199876,클롭광팬,박지수,df,선수체감 자체가 엄청 가볍고 민첩함,4.0,1,0.218182
199877,클롭광팬,박지수,df,주력이랑 방향전환 모두 빨라가지고 수비커버 범위도 넓게 가져갈수 있음.,4.0,0,0.2
199878,클롭광팬,박지수,df,5카 기준 2천만bp 정도로 대장급 공격수들을 스피드로 제압가능.. 우리집 센터백은...,4.0,-1,0.181818
199879,레알의라장군,카르바할,df,좋기만한데....,4.04,0,0.2
199880,자기엘카,조 고메스,df,단점 헤딩 실존이랑 똑같음,3.83,0,0.2


In [75]:
df['new_score'].describe()

count    199881.000000
mean          0.208299
std           0.027832
min           0.000000
25%           0.200000
50%           0.200000
75%           0.200000
max           1.000000
Name: new_score, dtype: float64

In [76]:
#긍정이면 정규화된 감성점수에 + 1을 해서 점수가 더 커지게 하고,
#부정이면 정규화된 감성점수를 1에서 빼서 점수가 더 작아지게 바꿈.

df.loc[df['score'] > 0, 'new_rating'] = df['rating'] * (df['new_score'] + 1)
df.loc[df['score'] <= 0, 'new_rating'] = df['rating'] * (1 - df['new_score'])

In [77]:
df['rating'].describe()

count    199881.000000
mean          4.033958
std           0.783179
min           0.000000
25%           3.880000
50%           4.180000
75%           4.420000
max           5.000000
Name: rating, dtype: float64

In [78]:
df['new_rating'].describe()

count    199881.000000
mean          3.631418
std           1.045098
min           0.000000
25%           3.184000
50%           3.448000
75%           3.872000
max           8.660000
Name: new_rating, dtype: float64

In [80]:
df = df[['userId', 'player_name', 'new_rating']]
display(df.tail())

Unnamed: 0,userId,player_name,new_rating
199876,클롭광팬,박지수,4.872727
199877,클롭광팬,박지수,3.2
199878,클롭광팬,박지수,3.272727
199879,레알의라장군,카르바할,3.232
199880,자기엘카,조 고메스,3.064


In [81]:
n_users = df.userId.unique().shape[0]
n_players = df.player_name.unique().shape[0]

#평점행렬 만들기
ratings = df.pivot_table('new_rating', index = 'player_name', columns = 'userId')
ratings.fillna(0, inplace = True)
ratings.head()

userId,0228,09캐릭7카팜,1002lmw,100이니,10u드로,11231ㅁㅁ,123123asdsa,1231241521,123321a,123asd5,...,힐조우로,힐피거,힘내라힘내,힘들면박카스,힘들엉힘들엉,힙찌리,힙프짝,힙합민수루,힝빙닝밍팅,힝항홍
player_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A. 그낭둘리에,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A. 그랑크비스트,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A. 단주마,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A. 델피에로,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A. 드라고비치,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [82]:
ratings.shape

(2906, 28003)

In [85]:
# 아이템 행렬간 코사인 유사도
from sklearn.metrics.pairwise import cosine_similarity
item_based_collabor = cosine_similarity(ratings)
item_based_collabor

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 1.        , 0.        , ..., 0.06738011, 0.        ,
        0.        ],
       [0.        , 0.        , 1.        , ..., 0.02392136, 0.        ,
        0.        ],
       ...,
       [0.        , 0.06738011, 0.02392136, ..., 1.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 1.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        1.        ]])

In [86]:
#KNN 알고리즘을 사용한 유사 선수 추천 모델.
#유사도는 cosine 함수를 사용한다.
from sklearn.neighbors import NearestNeighbors

n = 5
cosine_knn = NearestNeighbors(n_neighbors=n, algorithm='brute', metric = 'cosine')
item_cosine_knn_fit = cosine_knn.fit(ratings.values)
item_distances, item_indices = item_cosine_knn_fit.kneighbors(ratings.values)
#유사한 거리와, 가까운 선수들의 index

In [96]:
item_indices[0]

array([1937, 1935, 1939, 1938, 1941])

In [110]:
def get_recommendation(name):
  print(name + ' 선수와 가장 연관있는 선수리스트: \n')
  for idx, val in enumerate(items_dic[name][1:]):
    print(f'{idx+1}. ' + val)

In [112]:
get_recommendation('박지성')

박지성 선수와 가장 연관있는 선수리스트: 

1. 기성용
2. 홍명보
3. 손흥민
4. 유상철


In [113]:
get_recommendation('리오넬 메시')

리오넬 메시 선수와 가장 연관있는 선수리스트: 

1. 네이마르 Jr.
2. 킬리안 음바페
3. 루이스 수아레스
4. 피케
