# 2. Non-Negative Matrix Factorization (NMF)

Linear-algebraic model that factors high-dimensional vectors into a low-dimensionality representation.

## Use Case
Often produces more coherent topics than LDA for short texts.

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import NMF

# Load Data
df_train = pd.read_csv('../Dataset/train.txt', sep=';', names=['text', 'emotion'])

# Preprocess (TF-IDF is better for NMF)
vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
X = vectorizer.fit_transform(df_train['text'])

# Train NMF
nmf = NMF(n_components=5, random_state=42)
W = nmf.fit_transform(X)
H = nmf.components_

# Display Topics
def print_top_words(model, feature_names, n_top_words):
    for topic_idx, topic in enumerate(model.components_):
        message = "Topic #%d: " % topic_idx
        message += " ".join([feature_names[i]
                             for i in topic.argsort()[:-n_top_words - 1:-1]])
        print(message)
    print()

print_top_words(nmf, vectorizer.get_feature_names_out(), 10)