In [42]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import NMF, TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.datasets import fetch_20newsgroups

remove = ('headers', 'footers', 'quotes')
# Carregar o conjunto de dados "fetch_20newsgroups"
newsgroups_data = fetch_20newsgroups(subset='test', remove=remove)

# Criar um DataFrame a partir dos dados
df = pd.DataFrame({'Text': newsgroups_data.data, 'Target': newsgroups_data.target})

# Criar uma matriz TF-IDF dos documentos
vectorizer = TfidfVectorizer(max_df=0.8, min_df=2, stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['Text'])

# Aplicar a Factorização de Matrizes Não-Negativas (NMF)
num_topics = 5
nmf_model = NMF(n_components=num_topics, random_state=42)
nmf_matrix = nmf_model.fit_transform(tfidf_matrix)

# Aplicar a Decomposição em Valores Singulares (SVD)
svd_model = TruncatedSVD(n_components=num_topics, random_state=42)
svd_matrix = svd_model.fit_transform(tfidf_matrix)

# Calcular as similaridades entre os documentos
nmf_similarities = cosine_similarity(nmf_matrix)
svd_similarities = cosine_similarity(svd_matrix)

# Função para recomendar artigos com base no interesse do usuário
def recommend_articles(interest, num_recommendations=5):
    # Vetorizar o interesse do usuário
    interest_vec = vectorizer.transform([interest])

    # Transformar o interesse usando NMF e SVD
    interest_nmf = nmf_model.transform(interest_vec)
    interest_svd = svd_model.transform(interest_vec)

    # Calcular a similaridade de cosseno entre o interesse do usuário e os documentos
    nmf_similarities = cosine_similarity(interest_nmf, nmf_matrix)
    svd_similarities = cosine_similarity(interest_svd, svd_matrix)

    # Obter os índices dos documentos mais similares
    nmf_indices = nmf_similarities.argsort()[0][::-1]
    svd_indices = svd_similarities.argsort()[0][::-1]

    # Recomendar os artigos mais relevantes
    nmf_recommendations = df.iloc[nmf_indices[:num_recommendations]]['Text']
    svd_recommendations = df.iloc[svd_indices[:num_recommendations]]['Text']

    return nmf_recommendations, svd_recommendations

# Exemplo de recomendação de artigos
interest = "Sports"
nmf_recommended_articles, svd_recommended_articles = recommend_articles(interest)

print("Recomendações de notícias com base no NMF:")
print(nmf_recommended_articles)
print("\nRecomendações de notícias com base no SVD:")
print(svd_recommended_articles)


Recomendações de notícias com base no NMF:
5906    I agree thouroughly!!  Screw the damn contract...
295     \n\n\nHow much do you watch and follow hockey?...
258     -=> Quoting Bill Gregory to All <=-\n\n \n\n B...
7037    Hey,guess what's coming to ESPN for a change? ...
1403    If I were Pat Burns I'd throw in the towel. Th...
Name: Text, dtype: object

Recomendações de notícias com base no SVD:
4698    \n\n\n\nThe top 11 teams of this tournament wi...
454     \n\n\n\n\n\nHow is that possible?  He was on t...
2871    Tuesday's game of Beloved Yakult Swallows\n\n(...
6239    Who holds the record for most career strikeout...
3996    \nGee, they lost to St. Louis twice this year....
Name: Text, dtype: object
