In [1]:
from datasets import load_dataset
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

dataset = load_dataset("Lurunchik/WikiHowNFQA")
df = pd.DataFrame(dataset['train'])
articles = df.head(300)

vectorizer = TfidfVectorizer(stop_words='english')
article_vectors = vectorizer.fit_transform(articles['answer'])

interests = ['Cooking', 'Sports', 'Music', 'Reading', 'Traveling', 'Photography', 'Gaming', 'Writing', 'Painting', 'Dancing', 'Hiking', 'Yoga', 'Film', 'Fashion', 'Technology', 'Politics', 'Fitness', 'Meditation', 'Gardening', 'Astronomy', 'Playing musical instruments', 'Drawing', 'Collecting unique items', 'Learning new languages', 'Volunteering', 'Hunting', 'Fishing', 'Camping', 'Surfing', 'Scuba diving', 'Snowboarding', 'Skiing', 'Mountain biking', 'Skateboarding', 'Writing poetry', 'Reading philosophy', 'Playing board games', 'Watching documentaries', 'Learning history', 'Playing chess']

recommended_articles = {interest: [] for interest in interests}

recommended_ids = set()

for i, article in articles.iterrows():
    article_text = article['question'] + ' ' + article['answer']
    article_vector = vectorizer.transform([article_text])
    similarity_scores = cosine_similarity(article_vector, article_vectors)[0]
    top_similar_articles = similarity_scores.argsort()[:-6:-1]
    for j in top_similar_articles:
        for interest in interests:
            if interest.lower() in (articles['question'][j] + ' ' + articles['answer'][j]).lower():
                if j not in recommended_ids:
                    recommended_articles[interest].append({
                        'title': articles['question'][j],
                        'text': articles['answer'][j]
                    })
                    recommended_ids.add(j)

user_interests = ['Cooking', 'Traveling', 'Writing']

user_recommendations = []
for interest in user_interests:
    user_recommendations.extend(recommended_articles[interest])

for article in user_recommendations:
    print(article['title'])
    print(article['text'])
    print('---')
    
print('All Articles:')
for i, article in articles.iterrows():
    if i not in recommended_ids:
        print(article['question'])
        print(article['answer'])
        print('---')

Found cached dataset wiki_how_nfqa (C:/Users/Anyone/.cache/huggingface/datasets/wiki_how_nfqa/default/1.0.0)
Found cached dataset wiki_how_nfqa (C:/Users/Anyone/.cache/huggingface/datasets/Lurunchik___wiki_how_nfqa/default/1.0.0/12df8b210c4fb268372b5099e58e7efedb7d8ee11380186981c96c4839e56426)


  0%|          | 0/3 [00:00<?, ?it/s]

How To Cook Pork Tenderloin
To cook pork tenderloin, put it in a roasting pan and cook it in the oven for 55 minutes at 400 degrees Fahrenheit, turning it over halfway through. You can also sear the pork tenderloin on both sides in a skillet before putting it in the oven, which will reduce the cooking time to 15 minutes. If you want to grill pork tenderloin, start by preheating the grill to medium-high heat. Then, cook the tenderloin on the grill for 30-40 minutes over indirect heat, flipping it occasionally.
---
How To Grill Filet Mignon
To get the best flavor from your filet mignon, brush it with a bit of oil before grilling. Then, if you have a charcoal grill, create a hot side and a cool side with your coals. Cook the filet over high heat for 2-5 minutes. Alternatively, to cook a filet mignon on a gas grill, cook the filet until the internal temperature reaches 120-160 degrees Fahrenheit. Either way, once the filet is done cooking allow it to rest for 5-10 minutes before cutting in