In [None]:
##Content-based Recommendations

#vogue-content

In [1]:
#1.Prepare a dataset / TF-IDF
import pandas as pd

data = {
    'article_id': [1, 2, 3, 4, 5],
    'title': [
        '2023 Fashion Trends: What to Wear',
        'Best Skincare Routines for Glowing Skin',
        'Celebrity Style: Red Carpet Looks',
        'Top 10 Summer Makeup Tips',
        'Exclusive Interview with Fashion Icon'
    ],
    'content': [
        'Discover the latest fashion trends for 2024 including bold colors, oversized coats, and statement accessories.',
        'A guide to the best skincare routines to achieve glowing skin, from daily cleansers to moisturizers.',
        'A look at the best celebrity outfits from the latest red carpet events, including designer gowns and accessories.',
        'Get ready for summer with these top 10 makeup tips, including bronzers, waterproof mascara, and bright lipstick.',
        'An exclusive interview with a fashion icon, discussing personal style, career, and inspirations.'
    ]
}

# convert to DataFrame
df = pd.DataFrame(data)

In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer

# 2.Vectorize Text Using TF-IDF
tfidf = TfidfVectorizer(stop_words='english')

# Fit and transform the content into a TF-IDF matrix
tfidf_matrix = tfidf.fit_transform(df['content'])

# Output (5, N)，5 articles and N special words
print(tfidf_matrix.shape)



(5, 48)


In [3]:
#3.3. Compute the Cosine Similarity Matrix

from sklearn.metrics.pairwise import linear_kernel

# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Print the similarity matrix (it shows similarity scores between all articles)
print(cosine_sim)

[[1.         0.         0.16828207 0.04302347 0.06862003]
 [0.         1.         0.06496773 0.         0.        ]
 [0.16828207 0.06496773 1.         0.04302347 0.        ]
 [0.04302347 0.         0.04302347 1.         0.        ]
 [0.06862003 0.         0.         0.         1.        ]]


In [4]:
# Function to get recommendations based on an article title
def get_recommendations(article_title, cosine_sim=cosine_sim):
    # Find the index of the article that matches the title
    idx = df[df['title'] == article_title].index[0]
    
    # Get similarity scores for all articles with the selected article
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort the articles based on similarity scores (in descending order)
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get the top 5 most similar articles (excluding the article itself)
    sim_scores = sim_scores[1:6]
    
    # Get the indices of the most similar articles
    article_indices = [i[0] for i in sim_scores]
    
    # Return the titles of the recommended articles
    return df['title'].iloc[article_indices]

# Test the recommendation system
print(get_recommendations('Celebrity Style: Red Carpet Looks'))


0          2023 Fashion Trends: What to Wear
1    Best Skincare Routines for Glowing Skin
3                  Top 10 Summer Makeup Tips
4      Exclusive Interview with Fashion Icon
Name: title, dtype: object
