<a href="https://colab.research.google.com/github/KJM94/Single_project/blob/main/%EC%9B%B9%20%EA%B8%B0%EC%82%AC%20%EC%B6%94%EC%B2%9C%20AI%20%EA%B2%BD%EC%A7%84%EB%8C%80%ED%9A%8C/Untitled5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
article_info = pd.read_csv('./article_info.csv')
view_log = pd.read_csv('./view_log.csv')

# Combine title and content for TF-IDF vectorization
article_info['text'] = article_info['Title'] + " " + article_info['Content']

# Compute the TF-IDF matrix
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(article_info['text'])

# Compute cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Create a mapping of articleID to index
article_indices = pd.Series(article_info.index, index=article_info['articleID']).drop_duplicates()

# Function to get article recommendations based on content similarity
def get_content_recommendations(article_id, num_recommendations=5):
    idx = article_indices.get(article_id, None)
    if idx is None:
        return []
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_recommendations + 1]
    article_indices_recommended = [i[0] for i in sim_scores]
    return article_info['articleID'].iloc[article_indices_recommended].values.tolist()

# Generate user-article interaction matrix
user_article_matrix = view_log.pivot_table(index='userID', columns='articleID', aggfunc='size', fill_value=0)

# Compute user similarity matrix using collaborative filtering
user_similarity = cosine_similarity(user_article_matrix)

# Generate content-based recommendations
content_recommendations = []
for user in view_log['userID'].unique():
    viewed_articles = view_log[view_log['userID'] == user]['articleID'].tolist()
    recommendations = []
    for article in viewed_articles:
        recommendations.extend(get_content_recommendations(article, num_recommendations=1))
    recommendations = list(set(recommendations))[:5]
    for rec in recommendations:
        content_recommendations.append([user, rec])

# Convert content-based recommendations to DataFrame
content_recommendations_df = pd.DataFrame(content_recommendations, columns=['userID', 'articleID'])

# Hybrid approach: combining collaborative filtering with content-based filtering
def hybrid_recommendation(user_id, num_recommendations=5):
    # Collaborative filtering recommendations
    user_idx = user_article_matrix.index.get_loc(user_id)
    cf_scores = user_similarity[user_idx].dot(user_article_matrix) / np.array([np.abs(user_similarity[user_idx]).sum()])
    cf_recommendations = np.argsort(cf_scores)[::-1][:num_recommendations]

    # Content-based filtering recommendations
    cb_recommendations = []
    viewed_articles = view_log[view_log['userID'] == user_id]['articleID'].tolist()
    for article in viewed_articles:
        cb_recommendations.extend(get_content_recommendations(article, num_recommendations=1))
    cb_recommendations = list(set(cb_recommendations))[:num_recommendations]

    # Combine recommendations
    recommendations = list(set(cf_recommendations) | set(cb_recommendations))[:num_recommendations]
    return recommendations

# Generate hybrid recommendations for all users
hybrid_recommendations = []
for user in view_log['userID'].unique():
    recommendations = hybrid_recommendation(user, num_recommendations=5)
    for rec in recommendations:
        hybrid_recommendations.append([user, rec])

# Convert hybrid recommendations to DataFrame
hybrid_recommendations_df = pd.DataFrame(hybrid_recommendations, columns=['userID', 'articleID'])

# Save the hybrid recommendations to a CSV file
hybrid_recommendations_df.to_csv('./hybrid_recommendations.csv', index=False)

# Display the first few rows of the recommendations
print(hybrid_recommendations_df.head())


      userID     articleID
0  USER_0000  ARTICLE_2806
1  USER_0000           390
2  USER_0000  ARTICLE_1053
3  USER_0000          2156
4  USER_0000  ARTICLE_2642
