In [1]:
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

view_log_train = pd.read_csv('../data/view_log.csv')
article_info = pd.read_csv('../data/article_info.csv')
submission = pd.read_csv('../submission/sample_submission.csv')

In [2]:
view_log_train.head(100)

Unnamed: 0,userID,articleID,userRegion,userCountry
0,USER_0000,ARTICLE_0661,NY,US
1,USER_0000,ARTICLE_2316,NY,US
2,USER_0000,ARTICLE_1345,NY,US
3,USER_0000,ARTICLE_1089,NY,US
4,USER_0000,ARTICLE_1484,NY,US
...,...,...,...,...
95,USER_0005,ARTICLE_0564,MG,BR
96,USER_0005,ARTICLE_0931,MG,BR
97,USER_0005,ARTICLE_2223,MG,BR
98,USER_0005,ARTICLE_1577,MG,BR


In [3]:
view_log_train.drop(['userRegion','userCountry'],axis =1 , inplace=True)
df = view_log_train

In [4]:
# 사용자-아이템 행렬 생성
user_item_matrix = df.pivot_table(index='userID', columns='articleID', aggfunc='size', fill_value=0)

In [5]:
# 희소 행렬로 변환
user_item_sparse_matrix = csr_matrix(user_item_matrix.values)

# 모델 생성
model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(user_item_sparse_matrix)

# 특정 사용자에 대한 추천 생성
def get_recommendations(user_id, n_recommendations=3):
    # 사용자 인덱스 찾기
    user_index = user_item_matrix.index.tolist().index(user_id)

    # 사용자 벡터 가져오기
    user_vector = user_item_sparse_matrix[user_index]

    # 유사 사용자 찾기
    distances, indices = model_knn.kneighbors(user_vector, n_neighbors=n_recommendations+1)

    # 유사 사용자들의 기사 ID 추출
    similar_users = indices.flatten()[1:]  # 자기 자신 제외
    similar_user_ids = [user_item_matrix.index[i] for i in similar_users]

    # 유사 사용자들이 본 기사 ID 모으기
    articles_seen = user_item_matrix.loc[similar_user_ids].sum(axis=0)

    # 추천 기사 정렬
    recommendations = articles_seen.sort_values(ascending=False)

    return recommendations.head(n_recommendations).index.tolist()



In [7]:
# 예시: USER_0000에 대한 추천
recommended_articles = get_recommendations('USER_0001', n_recommendations=5)
print(f"Recommended articles for USER_0000: {recommended_articles}")

Recommended articles for USER_0000: ['ARTICLE_2868', 'ARTICLE_2493', 'ARTICLE_2087', 'ARTICLE_1418', 'ARTICLE_1536']


In [19]:
# 모든 사용자에 대해 추천 항목 생성
recommendations = []
for user in user_item_matrix.index:
    top5recommend = get_recommendations(user, n_recommendations=5)
    for article in top5recommend:
        recommendations.append([user, article])

In [20]:
# 추천 결과를 데이터프레임으로 변환
top_recommendations = pd.DataFrame(recommendations, columns=['userID', 'articleID'])

# 제출 파일 생성
submission = pd.DataFrame(columns=['userID', 'articleID'])
submission['userID'] = top_recommendations['userID']
submission['articleID'] = top_recommendations['articleID']

# 파일 저장
submission.to_csv('../submission/baseline_knearst.csv', index=False)