In [3]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
import torch

import os
os.environ["TOKENIZERS_PARALLELISM"] = "false" # to avoid some warnings

# Load data
df = pd.read_csv('../data/us_politicians.csv')
df.dropna(subset=['doc_clean'], inplace=True)

# Path setup
import sys
sys.path.append('../gtm/')
from corpus import GTMCorpus
from gtm import GTM
from utils import bert_embeddings_from_list

def embed_fn_bert(texts):
    return bert_embeddings_from_list(
        texts=texts,
        sbert_model_to_load="all-MiniLM-L6-v2",
        batch_size=64,
        max_seq_length=256,
        device="cuda" if torch.cuda.is_available() else "cpu"
    )

# Create vectorizer
default_vectorizer = CountVectorizer()

# Define modalities
modalities = {
    "lang1": {
        "column": "doc_clean",
        "views": {
            "embedding": {
                "type": "embedding",
                "embed_fn": embed_fn_bert
            },
            "bow": {
                "type": "bow",
                "vectorizer": default_vectorizer
            }
        }
    }
}

# Create dataset
train_dataset = GTMCorpus(df, modalities=modalities)




Batches:   0%|          | 0/16 [00:00<?, ?it/s]

In [5]:
# Train the model
tm = GTM(
    train_data=train_dataset,
    n_topics=20,
    encoder_input="lang1_embedding",
    decoder_input="lang1_bow"
)

  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked


Epoch   1	Mean Training Loss:11.9951801

Topic_0: ['know', 'tax', 'corporation', 'costly', 'obtain', 'condition', 'heart', 'name']
Topic_1: ['much', 'realize', 'right', 'whole', 'act', 'approach', 'state', 'very']
Topic_2: ['ability', 'exceed', 'enactment', 'approve', 'rise', 'world', 'property', 'bank']
Topic_3: ['pay', 'amendment', 'effect', 'leader', 'most', 'fiscal', 'certain', 'government']
Topic_4: ['first', 'respect', 'page', 'privilege', 'basis', 'conclusion', 'session', 'production']
Topic_5: ['point', 'name', 'promote', 'coal', 'recommendation', 'insure', 'rural', 'repeat']
Topic_6: ['high', 'grow', 'amendment', 'ask', 'sense', 'school', 'tax', 'hand']
Topic_7: ['buy', 'authority', 'die', 'reference', 'situation', 'stand', 'deficit', 'peace']
Topic_8: ['necessity', 'correct', 'economic', 'argument', 'cause', 'soldier', 'yield', 'growth']
Topic_9: ['circumstance', 'detail', 'form', 'federal', 'fail', 'heart', 'draw', 'good']
Topic_10: ['seek', 'get', 'produce', 'start', 'redu

  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked


Epoch   2	Mean Training Loss:11.2578080

Topic_0: ['know', 'tax', 'obtain', 'corporation', 'condition', 'term', 'name', 'military']
Topic_1: ['much', 'realize', 'whole', 'right', 'act', 'state', 'approach', 'testimony']
Topic_2: ['ability', 'rise', 'world', 'approve', 'try', 'enactment', 'exceed', 'bank']
Topic_3: ['amendment', 'pay', 'most', 'certain', 'effect', 'government', 'one', 'fiscal']
Topic_4: ['first', 'respect', 'page', 'basis', 'privilege', 'session', 'conclusion', 'other']
Topic_5: ['point', 'name', 'call', 'promote', 'percent', 'side', 'people', 'recommendation']
Topic_6: ['high', 'amendment', 'grow', 'ask', 'sense', 'tax', 'fix', 'school']
Topic_7: ['situation', 'authority', 'buy', 'stand', 'reference', 'much', 'die', 'thing']
Topic_8: ['argument', 'correct', 'economic', 'necessity', 'cause', 'yield', 'way', 'proper']
Topic_9: ['circumstance', 'form', 'detail', 'federal', 'good', 'word', 'total', 'grow']
Topic_10: ['get', 'seek', 'produce', 'reduce', 'start', 'talk', 'b

  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked


Epoch   3	Mean Training Loss:10.9072438

Topic_0: ['know', 'obtain', 'tax', 'condition', 'corporation', 'term', 'name', 'military']
Topic_1: ['much', 'whole', 'act', 'state', 'right', 'realize', 'approach', 'million']
Topic_2: ['ability', 'rise', 'world', 'try', 'approve', 'position', 'decision', 'bank']
Topic_3: ['amendment', 'pay', 'certain', 'most', 'effect', 'government', 'one', 'ask']
Topic_4: ['first', 'respect', 'other', 'basis', 'page', 'future', 'session', 'fact']
Topic_5: ['point', 'call', 'percent', 'people', 'name', 'side', 'promote', 'high']
Topic_6: ['high', 'amendment', 'grow', 'ask', 'sense', 'tax', 'order', 'hand']
Topic_7: ['situation', 'authority', 'stand', 'buy', 'much', 'reference', 'thing', 'pay']
Topic_8: ['argument', 'cause', 'economic', 'way', 'yield', 'correct', 'necessity', 'proper']
Topic_9: ['good', 'circumstance', 'form', 'word', 'federal', 'detail', 'total', 'grow']
Topic_10: ['get', 'seek', 'reduce', 'produce', 'start', 'talk', 'basic', 'agree']
Topic_1

  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked


Epoch   4	Mean Training Loss:10.7743122

Topic_0: ['know', 'obtain', 'tax', 'term', 'condition', 'corporation', 'name', 'military']
Topic_1: ['much', 'whole', 'state', 'act', 'right', 'realize', 'approach', 'million']
Topic_2: ['rise', 'ability', 'world', 'try', 'approve', 'position', 'first', 'decision']
Topic_3: ['amendment', 'pay', 'certain', 'most', 'effect', 'government', 'one', 'ask']
Topic_4: ['first', 'respect', 'other', 'basis', 'future', 'fact', 'page', 'session']
Topic_5: ['point', 'call', 'people', 'percent', 'side', 'name', 'high', 'promote']
Topic_6: ['high', 'amendment', 'ask', 'grow', 'order', 'sense', 'tax', 'least']
Topic_7: ['situation', 'authority', 'stand', 'much', 'thing', 'buy', 'pay', 'see']
Topic_8: ['argument', 'cause', 'way', 'economic', 'yield', 'correct', 'proper', 'necessity']
Topic_9: ['good', 'word', 'circumstance', 'form', 'federal', 'total', 'grow', 'detail']
Topic_10: ['get', 'seek', 'reduce', 'produce', 'start', 'talk', 'basic', 'agree']
Topic_11: [

  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked


Epoch   5	Mean Training Loss:10.5940420

Topic_0: ['know', 'obtain', 'tax', 'term', 'condition', 'corporation', 'name', 'military']
Topic_1: ['much', 'whole', 'state', 'act', 'right', 'realize', 'approach', 'million']
Topic_2: ['rise', 'ability', 'world', 'try', 'first', 'position', 'approve', 'today']
Topic_3: ['amendment', 'certain', 'pay', 'most', 'effect', 'government', 'one', 'ask']
Topic_4: ['first', 'respect', 'other', 'fact', 'future', 'basis', 'able', 'session']
Topic_5: ['point', 'call', 'people', 'percent', 'high', 'side', 'name', 'realize']
Topic_6: ['high', 'amendment', 'ask', 'grow', 'order', 'tax', 'sense', 'least']
Topic_7: ['situation', 'much', 'authority', 'stand', 'thing', 'pay', 'see', 'buy']
Topic_8: ['way', 'cause', 'argument', 'yield', 'economic', 'proper', 'correct', 'necessity']
Topic_9: ['good', 'word', 'circumstance', 'federal', 'form', 'grow', 'total', 'ask']
Topic_10: ['get', 'reduce', 'seek', 'produce', 'talk', 'start', 'agree', 'basic']
Topic_11: ['vote'

  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked


Epoch   6	Mean Training Loss:10.4164224

Topic_0: ['know', 'obtain', 'tax', 'term', 'condition', 'corporation', 'name', 'military']
Topic_1: ['much', 'whole', 'state', 'act', 'right', 'realize', 'approach', 'million']
Topic_2: ['rise', 'ability', 'world', 'try', 'first', 'position', 'today', 'approve']
Topic_3: ['amendment', 'certain', 'pay', 'most', 'effect', 'government', 'one', 'ask']
Topic_4: ['first', 'respect', 'other', 'fact', 'future', 'basis', 'able', 'session']
Topic_5: ['point', 'people', 'call', 'percent', 'high', 'side', 'name', 'realize']
Topic_6: ['high', 'amendment', 'ask', 'grow', 'order', 'least', 'tax', 'sense']
Topic_7: ['situation', 'much', 'stand', 'thing', 'authority', 'see', 'pay', 'money']
Topic_8: ['way', 'cause', 'argument', 'yield', 'economic', 'proper', 'correct', 'necessity']
Topic_9: ['good', 'word', 'federal', 'circumstance', 'form', 'grow', 'total', 'ask']
Topic_10: ['get', 'reduce', 'seek', 'produce', 'talk', 'start', 'agree', 'basic']
Topic_11: ['vot

  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked


Epoch   7	Mean Training Loss:10.2764396

Topic_0: ['know', 'obtain', 'tax', 'term', 'condition', 'corporation', 'matter', 'life']
Topic_1: ['much', 'whole', 'state', 'act', 'right', 'realize', 'approach', 'million']
Topic_2: ['rise', 'ability', 'world', 'try', 'first', 'today', 'position', 'serve']
Topic_3: ['amendment', 'certain', 'most', 'pay', 'effect', 'government', 'one', 'fact']
Topic_4: ['first', 'respect', 'other', 'fact', 'future', 'basis', 'able', 'consider']
Topic_5: ['point', 'people', 'call', 'percent', 'high', 'side', 'realize', 'name']
Topic_6: ['high', 'amendment', 'ask', 'order', 'grow', 'least', 'tax', 'sense']
Topic_7: ['situation', 'much', 'thing', 'stand', 'see', 'authority', 'pay', 'money']
Topic_8: ['way', 'cause', 'argument', 'yield', 'economic', 'proper', 'correct', 'available']
Topic_9: ['good', 'word', 'federal', 'grow', 'total', 'ask', 'circumstance', 'form']
Topic_10: ['get', 'reduce', 'seek', 'produce', 'talk', 'start', 'agree', 'basic']
Topic_11: ['vote'

  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked


Epoch   8	Mean Training Loss:10.1574749

Topic_0: ['know', 'obtain', 'term', 'tax', 'condition', 'corporation', 'matter', 'life']
Topic_1: ['much', 'whole', 'state', 'act', 'right', 'realize', 'approach', 'million']
Topic_2: ['rise', 'ability', 'world', 'try', 'first', 'today', 'position', 'serve']
Topic_3: ['amendment', 'certain', 'most', 'pay', 'effect', 'government', 'one', 'fact']
Topic_4: ['first', 'other', 'respect', 'fact', 'future', 'basis', 'able', 'consider']
Topic_5: ['point', 'people', 'call', 'percent', 'high', 'side', 'realize', 'work']
Topic_6: ['amendment', 'high', 'ask', 'order', 'grow', 'least', 'today', 'sense']
Topic_7: ['situation', 'much', 'thing', 'see', 'stand', 'pay', 'authority', 'money']
Topic_8: ['way', 'cause', 'argument', 'yield', 'economic', 'proper', 'available', 'order']
Topic_9: ['good', 'word', 'federal', 'ask', 'total', 'grow', 'present', 'circumstance']
Topic_10: ['get', 'reduce', 'seek', 'talk', 'produce', 'agree', 'start', 'today']
Topic_11: ['vo

  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has


Epoch   9	Mean Training Loss:10.0761562

Topic_0: ['know', 'obtain', 'term', 'tax', 'condition', 'matter', 'life', 'corporation']
Topic_1: ['much', 'whole', 'state', 'act', 'right', 'realize', 'approach', 'million']
Topic_2: ['rise', 'world', 'ability', 'try', 'first', 'today', 'position', 'serve']
Topic_3: ['amendment', 'certain', 'most', 'pay', 'government', 'effect', 'one', 'fact']
Topic_4: ['first', 'other', 'respect', 'fact', 'future', 'basis', 'able', 'consider']
Topic_5: ['point', 'people', 'call', 'percent', 'high', 'side', 'work', 'realize']
Topic_6: ['amendment', 'high', 'ask', 'order', 'grow', 'least', 'today', 'sense']
Topic_7: ['situation', 'much', 'see', 'thing', 'stand', 'pay', 'authority', 'go']
Topic_8: ['way', 'cause', 'argument', 'yield', 'economic', 'proper', 'available', 'order']
Topic_9: ['good', 'word', 'ask', 'federal', 'present', 'total', 'grow', 'day']
Topic_10: ['get', 'reduce', 'seek', 'talk', 'produce', 'today', 'agree', 'good']
Topic_11: ['vote', 'leaders

  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked


Epoch  10	Mean Training Loss:9.9946411

Topic_0: ['know', 'obtain', 'term', 'tax', 'condition', 'matter', 'life', 'use']
Topic_1: ['much', 'state', 'whole', 'act', 'right', 'realize', 'approach', 'million']
Topic_2: ['rise', 'world', 'ability', 'try', 'first', 'today', 'position', 'serve']
Topic_3: ['amendment', 'certain', 'most', 'pay', 'government', 'effect', 'one', 'fact']
Topic_4: ['first', 'other', 'respect', 'fact', 'future', 'basis', 'able', 'consider']
Topic_5: ['point', 'people', 'call', 'high', 'percent', 'side', 'work', 'realize']
Topic_6: ['amendment', 'ask', 'high', 'order', 'today', 'least', 'grow', 'sense']
Topic_7: ['situation', 'much', 'see', 'thing', 'pay', 'stand', 'go', 'authority']
Topic_8: ['way', 'cause', 'yield', 'argument', 'economic', 'proper', 'available', 'order']
Topic_9: ['good', 'word', 'ask', 'present', 'federal', 'total', 'day', 'grow']
Topic_10: ['get', 'reduce', 'talk', 'seek', 'today', 'produce', 'good', 'agree']
Topic_11: ['vote', 'leadership', 'di

  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked


Epoch  11	Mean Training Loss:9.9497337

Topic_0: ['know', 'obtain', 'term', 'tax', 'condition', 'matter', 'life', 'use']
Topic_1: ['much', 'state', 'whole', 'act', 'right', 'realize', 'approach', 'million']
Topic_2: ['rise', 'world', 'ability', 'try', 'first', 'today', 'position', 'serve']
Topic_3: ['amendment', 'certain', 'most', 'pay', 'government', 'effect', 'fact', 'one']
Topic_4: ['first', 'other', 'respect', 'fact', 'future', 'basis', 'able', 'consider']
Topic_5: ['point', 'people', 'call', 'high', 'percent', 'side', 'work', 'public']
Topic_6: ['amendment', 'ask', 'order', 'high', 'today', 'least', 'like', 'grow']
Topic_7: ['situation', 'much', 'see', 'thing', 'pay', 'stand', 'go', 'additional']
Topic_8: ['way', 'cause', 'yield', 'argument', 'economic', 'proper', 'available', 'order']
Topic_9: ['good', 'word', 'ask', 'present', 'federal', 'total', 'day', 'system']
Topic_10: ['get', 'reduce', 'talk', 'today', 'good', 'seek', 'produce', 'agree']
Topic_11: ['vote', 'leadership', 'd

  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked


Epoch  12	Mean Training Loss:9.8754466

Topic_0: ['know', 'obtain', 'term', 'tax', 'condition', 'matter', 'life', 'use']
Topic_1: ['much', 'state', 'whole', 'act', 'right', 'approach', 'realize', 'million']
Topic_2: ['rise', 'world', 'try', 'ability', 'first', 'today', 'position', 'serve']
Topic_3: ['amendment', 'certain', 'most', 'pay', 'government', 'fact', 'effect', 'one']
Topic_4: ['first', 'other', 'respect', 'fact', 'future', 'basis', 'able', 'consider']
Topic_5: ['point', 'people', 'call', 'high', 'percent', 'work', 'side', 'public']
Topic_6: ['amendment', 'ask', 'order', 'high', 'today', 'least', 'like', 'question']
Topic_7: ['situation', 'much', 'see', 'thing', 'pay', 'go', 'stand', 'man']
Topic_8: ['way', 'cause', 'yield', 'argument', 'economic', 'proper', 'available', 'order']
Topic_9: ['good', 'word', 'ask', 'present', 'federal', 'total', 'day', 'system']
Topic_10: ['get', 'reduce', 'today', 'good', 'talk', 'seek', 'agree', 'produce']
Topic_11: ['vote', 'leadership', 'dist

  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked


Epoch  13	Mean Training Loss:9.8431755

Topic_0: ['know', 'obtain', 'term', 'tax', 'condition', 'matter', 'life', 'use']
Topic_1: ['much', 'state', 'whole', 'act', 'right', 'approach', 'realize', 'million']
Topic_2: ['rise', 'world', 'try', 'ability', 'first', 'today', 'position', 'serve']
Topic_3: ['amendment', 'certain', 'most', 'pay', 'government', 'fact', 'effect', 'one']
Topic_4: ['first', 'other', 'respect', 'fact', 'future', 'basis', 'able', 'consider']
Topic_5: ['point', 'people', 'call', 'high', 'percent', 'work', 'side', 'public']
Topic_6: ['amendment', 'ask', 'order', 'high', 'today', 'like', 'question', 'least']
Topic_7: ['situation', 'much', 'see', 'thing', 'go', 'pay', 'stand', 'man']
Topic_8: ['way', 'cause', 'yield', 'argument', 'economic', 'proper', 'available', 'order']
Topic_9: ['good', 'word', 'ask', 'present', 'system', 'federal', 'day', 'total']
Topic_10: ['get', 'reduce', 'good', 'today', 'talk', 'seek', 'agree', 'mean']
Topic_11: ['vote', 'leadership', 'distric

  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked


Epoch  14	Mean Training Loss:9.7916347

Topic_0: ['know', 'obtain', 'term', 'tax', 'condition', 'matter', 'life', 'use']
Topic_1: ['much', 'state', 'whole', 'act', 'right', 'approach', 'realize', 'million']
Topic_2: ['rise', 'world', 'try', 'ability', 'first', 'today', 'position', 'serve']
Topic_3: ['amendment', 'certain', 'most', 'pay', 'fact', 'government', 'effect', 'one']
Topic_4: ['first', 'other', 'respect', 'fact', 'future', 'basis', 'able', 'legislation']
Topic_5: ['point', 'people', 'call', 'high', 'percent', 'work', 'side', 'state']
Topic_6: ['amendment', 'ask', 'order', 'high', 'today', 'like', 'question', 'act']
Topic_7: ['much', 'situation', 'see', 'thing', 'go', 'pay', 'man', 'stand']
Topic_8: ['way', 'cause', 'yield', 'argument', 'economic', 'proper', 'available', 'order']
Topic_9: ['good', 'word', 'ask', 'present', 'system', 'day', 'total', 'federal']
Topic_10: ['get', 'reduce', 'good', 'today', 'talk', 'agree', 'mean', 'seek']
Topic_11: ['vote', 'leadership', 'distric

  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked


Epoch  15	Mean Training Loss:9.7583576

Topic_0: ['know', 'obtain', 'term', 'tax', 'condition', 'matter', 'life', 'use']
Topic_1: ['much', 'state', 'whole', 'act', 'right', 'approach', 'realize', 'million']
Topic_2: ['rise', 'world', 'try', 'ability', 'first', 'today', 'position', 'serve']
Topic_3: ['amendment', 'certain', 'most', 'pay', 'fact', 'government', 'effect', 'one']
Topic_4: ['first', 'other', 'respect', 'fact', 'future', 'basis', 'able', 'legislation']
Topic_5: ['point', 'people', 'call', 'high', 'percent', 'work', 'side', 'state']
Topic_6: ['amendment', 'ask', 'order', 'high', 'question', 'like', 'today', 'act']
Topic_7: ['much', 'see', 'situation', 'thing', 'go', 'pay', 'man', 'stand']
Topic_8: ['way', 'cause', 'yield', 'argument', 'economic', 'proper', 'available', 'order']
Topic_9: ['good', 'word', 'ask', 'present', 'system', 'day', 'total', 'federal']
Topic_10: ['get', 'good', 'reduce', 'today', 'talk', 'mean', 'agree', 'report']
Topic_11: ['vote', 'leadership', 'distr

  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has


Epoch  16	Mean Training Loss:9.7307541

Topic_0: ['know', 'obtain', 'term', 'tax', 'condition', 'matter', 'life', 'use']
Topic_1: ['much', 'state', 'whole', 'act', 'right', 'approach', 'realize', 'million']
Topic_2: ['rise', 'world', 'try', 'ability', 'first', 'today', 'position', 'serve']
Topic_3: ['amendment', 'certain', 'most', 'pay', 'fact', 'government', 'effect', 'one']
Topic_4: ['first', 'other', 'respect', 'fact', 'future', 'basis', 'able', 'legislation']
Topic_5: ['point', 'people', 'call', 'high', 'percent', 'work', 'side', 'state']
Topic_6: ['amendment', 'ask', 'order', 'question', 'like', 'high', 'today', 'act']
Topic_7: ['much', 'see', 'situation', 'thing', 'go', 'pay', 'man', 'stand']
Topic_8: ['way', 'cause', 'yield', 'argument', 'economic', 'proper', 'available', 'order']
Topic_9: ['good', 'word', 'ask', 'present', 'system', 'total', 'day', 'much']
Topic_10: ['get', 'good', 'today', 'reduce', 'talk', 'mean', 'report', 'agree']
Topic_11: ['vote', 'leadership', 'addition

  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked


Epoch  17	Mean Training Loss:9.7099267

Topic_0: ['know', 'obtain', 'term', 'tax', 'condition', 'matter', 'life', 'use']
Topic_1: ['much', 'state', 'whole', 'act', 'right', 'approach', 'realize', 'million']
Topic_2: ['rise', 'world', 'try', 'ability', 'first', 'today', 'position', 'serve']
Topic_3: ['amendment', 'certain', 'most', 'pay', 'fact', 'government', 'effect', 'one']
Topic_4: ['first', 'other', 'fact', 'respect', 'future', 'basis', 'able', 'legislation']
Topic_5: ['point', 'people', 'call', 'high', 'work', 'percent', 'side', 'state']
Topic_6: ['amendment', 'ask', 'order', 'question', 'like', 'today', 'act', 'high']
Topic_7: ['much', 'see', 'situation', 'thing', 'go', 'man', 'pay', 'stand']
Topic_8: ['way', 'cause', 'yield', 'argument', 'economic', 'proper', 'available', 'order']
Topic_9: ['good', 'word', 'ask', 'present', 'system', 'much', 'total', 'day']
Topic_10: ['get', 'good', 'today', 'reduce', 'talk', 'report', 'mean', 'agree']
Topic_11: ['vote', 'leadership', 'addition

  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked


Epoch  18	Mean Training Loss:9.6757408

Topic_0: ['know', 'obtain', 'term', 'tax', 'condition', 'matter', 'life', 'use']
Topic_1: ['much', 'state', 'whole', 'act', 'right', 'approach', 'realize', 'million']
Topic_2: ['rise', 'world', 'try', 'ability', 'first', 'today', 'position', 'serve']
Topic_3: ['amendment', 'certain', 'most', 'pay', 'fact', 'government', 'one', 'effect']
Topic_4: ['first', 'other', 'fact', 'respect', 'future', 'basis', 'able', 'legislation']
Topic_5: ['point', 'people', 'call', 'high', 'work', 'percent', 'state', 'side']
Topic_6: ['amendment', 'ask', 'order', 'question', 'like', 'act', 'today', 'desire']
Topic_7: ['much', 'see', 'thing', 'situation', 'go', 'man', 'pay', 'additional']
Topic_8: ['way', 'cause', 'yield', 'argument', 'economic', 'proper', 'available', 'order']
Topic_9: ['good', 'word', 'ask', 'present', 'system', 'much', 'effort', 'make']
Topic_10: ['get', 'good', 'today', 'reduce', 'report', 'talk', 'mean', 'agree']
Topic_11: ['vote', 'leadership', 

  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  self.pid = os.fork()
huggingface/tokenizers: The current process just got forked


Epoch  19	Mean Training Loss:9.6759905

Topic_0: ['know', 'obtain', 'term', 'tax', 'condition', 'matter', 'life', 'use']
Topic_1: ['much', 'state', 'whole', 'act', 'right', 'approach', 'realize', 'million']
Topic_2: ['rise', 'world', 'try', 'ability', 'first', 'today', 'position', 'serve']
Topic_3: ['amendment', 'certain', 'most', 'pay', 'fact', 'government', 'one', 'effect']
Topic_4: ['first', 'other', 'fact', 'respect', 'future', 'basis', 'able', 'legislation']
Topic_5: ['point', 'people', 'call', 'work', 'high', 'percent', 'state', 'side']
Topic_6: ['amendment', 'ask', 'order', 'question', 'like', 'act', 'desire', 'today']
Topic_7: ['much', 'see', 'thing', 'go', 'situation', 'man', 'pay', 'additional']
Topic_8: ['way', 'cause', 'yield', 'argument', 'economic', 'proper', 'available', 'deal']
Topic_9: ['good', 'word', 'ask', 'present', 'system', 'much', 'make', 'effort']
Topic_10: ['get', 'good', 'today', 'reduce', 'report', 'mean', 'talk', 'agree']
Topic_11: ['vote', 'addition', 'le