In [24]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import NMF, TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity

In [26]:
# Carregar o conjunto de dados da BBC News
df = pd.read_csv('C:/Users/jacks/OneDrive/Área de Trabalho/metodos/BBC News Test.csv')  # Substitua pelo caminho correto do arquivo CSV

In [36]:
print(df.head())  # Exibe as primeiras linhas do DataFrame
print()
print(df.columns)  # Exibe as colunas do DataFrame

   ArticleId                                               Text
0       1018  qpr keeper day heads for preston queens park r...
1       1319  software watching while you work software that...
2       1138  d arcy injury adds to ireland woe gordon d arc...
3        459  india s reliance family feud heats up the ongo...
4       1020  boro suffer morrison injury blow middlesbrough...

Index(['ArticleId', 'Text'], dtype='object')


In [37]:
# Pré-processamento dos dados
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(df['Text'])

In [38]:
# Aplicar a modelagem de tópicos com NMF com um número maior de iterações
num_topics = 10
max_iter = 1000
nmf_model = NMF(n_components=num_topics, max_iter=max_iter, random_state=42)
nmf_topics = nmf_model.fit_transform(X)

In [39]:
# Reduzir a dimensionalidade com SVD
svd_model = TruncatedSVD(n_components=100, random_state=42)
svd_features = svd_model.fit_transform(X)

In [40]:
# Função para recomendar artigos com base no interesse do usuário
def recommend_articles(interest, num_recommendations=5):
    # Vetorizar o interesse do usuário
    interest_vec = vectorizer.transform([interest])

    # Transformar o interesse usando NMF e SVD
    interest_nmf = nmf_model.transform(interest_vec)
    interest_svd = svd_model.transform(interest_vec)

    # Calcular a similaridade de cosseno entre o interesse do usuário e os documentos
    nmf_similarities = cosine_similarity(interest_nmf, nmf_topics)
    svd_similarities = cosine_similarity(interest_svd, svd_features)

    # Obter os índices dos documentos mais similares
    nmf_indices = nmf_similarities.argsort()[0][::-1]
    svd_indices = svd_similarities.argsort()[0][::-1]

    # Recomendar os artigos mais relevantes
    nmf_recommendations = df.iloc[nmf_indices[:num_recommendations]]['Text']
    svd_recommendations = df.iloc[svd_indices[:num_recommendations]]['Text']

    return nmf_recommendations, svd_recommendations

In [44]:
# Exemplo de recomendação de artigos
interest = "games"
nmf_recommended_articles, svd_recommended_articles = recommend_articles(interest)

In [45]:
print("Recomendações baseadas em NMF:")
print(nmf_recommended_articles)

Recomendações baseadas em NMF:
195    nintendo ds makes its euro debut nintendo s ds...
689    sony psp console hits us in march us gamers wi...
423    games win for blu-ray dvd format the next-gene...
392    disney backs sony dvd technology a next genera...
93     disney backs sony dvd technology a next genera...
Name: Text, dtype: object


In [46]:
print("\nRecomendações baseadas em SVD:")
print(svd_recommended_articles)


Recomendações baseadas em SVD:
558    games  deserve a place in class  computer game...
596    parents face video game lessons ways of ensuri...
602    games enter the classroom video games could so...
525    ea to take on film and tv giants video game gi...
70     news corp eyes video games market news corp  t...
Name: Text, dtype: object
