In [1]:
%pip install langchain langchain-community langchain-ollama pandas

Collecting langchain-ollama
  Downloading langchain_ollama-0.3.2-py3-none-any.whl.metadata (1.5 kB)
Collecting ollama<1,>=0.4.4 (from langchain-ollama)
  Using cached ollama-0.4.8-py3-none-any.whl.metadata (4.7 kB)
Collecting langchain-core<1.0.0,>=0.3.51 (from langchain)
  Using cached langchain_core-0.3.58-py3-none-any.whl.metadata (5.9 kB)
Downloading langchain_ollama-0.3.2-py3-none-any.whl (20 kB)
Using cached langchain_core-0.3.58-py3-none-any.whl (437 kB)
Using cached ollama-0.4.8-py3-none-any.whl (13 kB)
Installing collected packages: ollama, langchain-core, langchain-ollama
  Attempting uninstall: langchain-core
    Found existing installation: langchain-core 0.3.51
    Uninstalling langchain-core-0.3.51:
      Successfully uninstalled langchain-core-0.3.51
Successfully installed langchain-core-0.3.58 langchain-ollama-0.3.2 ollama-0.4.8
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import pandas as pd
import os
df = pd.read_csv("memento_data/bertopic_topics.csv")

In [10]:
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser


import pandas as pd
from tqdm import tqdm
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser


def rename_topics_with_langchain(df, model_name="llama3", max_chars=500):
    """
    Rename BERTopic topics using LangChain and Ollama with a progress bar.

    Parameters:
    - df: pandas.DataFrame with BERTopic output, containing 'Topic', 'Representation', and 'Representative_Docs'.
    - model_name: Name of the Ollama model to use (e.g., 'llama3').
    - max_chars: Maximum number of characters to use from the representative document.

    Returns:
    - A copy of the DataFrame with a new column 'Generated_Name'.
    """
    llm = ChatOllama(model=model_name)

    prompt_template = ChatPromptTemplate.from_template(
        "Given the topic keywords: {keywords}\n"
        'And this document excerpt:\n"{doc}"\n\n'
        "That is generated from BertTopic, a topic modelling method\n"
        "Suggest a short and descriptive topic name (1–5 words). Output only the name, no other text.\n"
    )

    parser = StrOutputParser()
    chain = prompt_template | llm | parser

    df_copy = df.copy()
    generated_names = []

    for _, row in tqdm(df_copy.iterrows(), total=len(df_copy), desc="Renaming topics"):
        if row["Topic"] == -1:
            generated_names.append("Outlier")
        else:
            rep_words = row["Representation"]
            doc_excerpt = ""
            if isinstance(row["Representative_Docs"], str):
                try:
                    doc_list = eval(row["Representative_Docs"])
                    if isinstance(doc_list, list) and doc_list:
                        doc_excerpt = doc_list[0][:max_chars]
                except Exception:
                    doc_excerpt = ""
            try:
                name = chain.invoke({"keywords": rep_words, "doc": doc_excerpt}).strip()
            except Exception:
                name = "Error"
            generated_names.append(name)

    df_copy["Generated_Name"] = generated_names
    return df_copy

In [11]:
df_renamed = rename_topics_with_langchain(df, model_name="llama3", max_chars=500)

Renaming topics: 100%|██████████| 247/247 [00:50<00:00,  4.84it/s]


In [20]:
# Remove qutation from the newly generated name
df_renamed["Generated_Name"] = df_renamed["Generated_Name"].str.replace('"', '', regex=False)

In [21]:
df_renamed.to_csv("memento_data/bertopic_topics_renamed.csv", index=False)

In [14]:
df_renamed = pd.read_csv("memento_data/bertopic_topics_renamed.csv")

In [15]:
df = pd.read_csv("memento_data/topic_results copy.csv")

In [5]:
df.head(10)

Unnamed: 0,source,channel_title,channel_description,title,description,link,pubDate,author,html,assigned_topic_name
0,https://machinelearningmastery.com/feed,MachineLearningMastery.com,Making developers awesome at machine learning,3 Ways Vibe Coding and AI-Assisted Development...,Vibe coding and AI-assisted development are tw...,https://machinelearningmastery.com/3-ways-vibe...,"Mon, 31 Mar 2025 11:00:41 +0000",Iván Palomares Carrascosa,,27_adversarial_adversarially_adversary_adversa...
1,https://machinelearningmastery.com/feed,MachineLearningMastery.com,Making developers awesome at machine learning,Advanced Q&A Features with DistilBERT,This post is divided into three parts; they ar...,https://machinelearningmastery.com/advanced-qa...,"Sat, 29 Mar 2025 18:33:57 +0000",Muhammad Asad Iqbal Khan,,244_dropout_underfitting_overfitting_regulariz...
2,https://machinelearningmastery.com/feed,MachineLearningMastery.com,Making developers awesome at machine learning,A Gentle Introduction to Attention and Transfo...,This post is divided into three parts; they ar...,https://machinelearningmastery.com/a-gentle-in...,"Fri, 28 Mar 2025 14:38:37 +0000",Adrian Tam,,235_memory_attention_memoryefficient_decoding
3,https://machinelearningmastery.com/feed,MachineLearningMastery.com,Making developers awesome at machine learning,Building a Recommender System From Scratch wit...,"In this article, we will build step by step a ...",https://machinelearningmastery.com/building-a-...,"Fri, 28 Mar 2025 12:00:08 +0000",Iván Palomares Carrascosa,,50_ranking_rankings_ranked_rank
4,https://machinelearningmastery.com/feed,MachineLearningMastery.com,Making developers awesome at machine learning,The Beginner’s Guide to Machine Learning with ...,Machine learning has become an essential tool ...,https://machinelearningmastery.com/the-beginne...,"Wed, 26 Mar 2025 16:20:41 +0000",Jayita Gulati,,10_learning_theory_algorithmic_papers
5,https://machinelearningmastery.com/feed,MachineLearningMastery.com,Making developers awesome at machine learning,A Gentle Introduction to Graph Neural Networks...,Graph neural networks (GNNs) can be pictured a...,https://machinelearningmastery.com/a-gentle-in...,"Tue, 25 Mar 2025 15:26:06 +0000",Iván Palomares Carrascosa,,100_reinforcement_policies_learning_policy
6,https://machinelearningmastery.com/feed,MachineLearningMastery.com,Making developers awesome at machine learning,10 Must-Know Python Libraries for LLMs in 2025,Large language models (LLMs) are changing the ...,https://machinelearningmastery.com/10-must-kno...,"Mon, 24 Mar 2025 14:43:17 +0000",Jayita Gulati,,16_microsoft_technologies_researchers_technolo...
7,https://machinelearningmastery.com/feed,MachineLearningMastery.com,Making developers awesome at machine learning,Implementing Multilingual Translation with T5 ...,This post is divided into three parts; they ar...,https://machinelearningmastery.com/implementin...,"Sun, 23 Mar 2025 16:32:59 +0000",Muhammad Asad Iqbal Khan,,240_adaptivemcmc_mcmc_hamiltonian_monte
8,https://machinelearningmastery.com/feed,MachineLearningMastery.com,Making developers awesome at machine learning,Bias Detection in LLM Outputs: Statistical App...,Natural language processing models including t...,https://machinelearningmastery.com/bias-detect...,"Fri, 21 Mar 2025 16:46:32 +0000",Cornellius Yudha Wijaya,,"219_robust_robustly_outliers_estimation, 244_d..."
9,https://machinelearningmastery.com/feed,MachineLearningMastery.com,Making developers awesome at machine learning,Building Q&A Systems with DistilBERT and Trans...,This post is in three parts; they are: • Build...,https://machinelearningmastery.com/building-qa...,"Thu, 20 Mar 2025 16:11:33 +0000",Muhammad Asad Iqbal Khan,,244_dropout_underfitting_overfitting_regulariz...


In [16]:
# Replace df.assigned_topic_name that have the old df.Name with new name from df_renamed.Generated_Name
topic2name_map = dict(zip(df_renamed.Name, df_renamed.Generated_Name))


df.assigned_topic_name = df.assigned_topic_name.apply(
    lambda x: (", ".join([
            topic2name_map.get(topic, topic)
            for topic in str(x).split(", ")
            if topic in topic2name_map
        ])
        if pd.notna(x)
        else x
    )
)

In [17]:
df.head()

Unnamed: 0,source,channel_title,channel_description,title,description,link,pubDate,author,html,assigned_topic_name
0,https://machinelearningmastery.com/feed,MachineLearningMastery.com,Making developers awesome at machine learning,3 Ways Vibe Coding and AI-Assisted Development...,Vibe coding and AI-assisted development are tw...,https://machinelearningmastery.com/3-ways-vibe...,"Mon, 31 Mar 2025 11:00:41 +0000",Iván Palomares Carrascosa,,Adversarial Attack Defense Strategies
1,https://machinelearningmastery.com/feed,MachineLearningMastery.com,Making developers awesome at machine learning,Advanced Q&A Features with DistilBERT,This post is divided into three parts; they ar...,https://machinelearningmastery.com/advanced-qa...,"Sat, 29 Mar 2025 18:33:57 +0000",Muhammad Asad Iqbal Khan,,Deep Learning Regularization
2,https://machinelearningmastery.com/feed,MachineLearningMastery.com,Making developers awesome at machine learning,A Gentle Introduction to Attention and Transfo...,This post is divided into three parts; they ar...,https://machinelearningmastery.com/a-gentle-in...,"Fri, 28 Mar 2025 14:38:37 +0000",Adrian Tam,,Efficient Language Processing
3,https://machinelearningmastery.com/feed,MachineLearningMastery.com,Making developers awesome at machine learning,Building a Recommender System From Scratch wit...,"In this article, we will build step by step a ...",https://machinelearningmastery.com/building-a-...,"Fri, 28 Mar 2025 12:00:08 +0000",Iván Palomares Carrascosa,,Pairwise Ranking Algorithms
4,https://machinelearningmastery.com/feed,MachineLearningMastery.com,Making developers awesome at machine learning,The Beginner’s Guide to Machine Learning with ...,Machine learning has become an essential tool ...,https://machinelearningmastery.com/the-beginne...,"Wed, 26 Mar 2025 16:20:41 +0000",Jayita Gulati,,Machine Learning Theory Papers


In [18]:
df.to_csv("../data.csv", index=False)