In [2]:
import os
import pickle
import datetime
import glob
import numpy as np
from collections import defaultdict
from tqdm import tqdm
import logging

# For NLP processing and modeling
import gensim
from gensim.models import Word2Vec
from gensim.models.phrases import Phrases, Phraser

# Configure logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

class RedditWord2VecBuilder:
    def __init__(self, 
                 base_data_dir="processed_comments",
                 model_dir="models/model_v5",
                 vector_size=300,
                 window=5,
                 min_count=5,
                 epochs=5,
                 workers=16,
                 sg=1,  # Skip-gram (1) vs CBOW (0)
                 min_comments_to_train=100000):
        """
        Initialize the model builder with parameters
        
        Args:
            base_data_dir: Directory containing processed comment pickle files
            model_dir: Directory to save trained models
            vector_size: Dimension of word vectors
            window: Maximum distance between current and predicted word
            min_count: Minimum word count for inclusion in vocabulary
            epochs: Number of training epochs
            workers: Number of worker threads
            sg: Training algorithm: 1 for skip-gram, 0 for CBOW
            min_comments_to_train: Minimum number of comments required for training
        """
        self.base_data_dir = base_data_dir
        self.model_dir = model_dir
        self.vector_size = vector_size
        self.window = window
        self.min_count = min_count
        self.epochs = epochs
        self.workers = workers
        self.sg = sg
        self.min_comments_to_train = min_comments_to_train
        
        # Ensure model directories exist
        os.makedirs(f"{self.model_dir}/interim", exist_ok=True)
        
        # Time periods for analysis
        self.time_periods = ["before_2016", "2017_2020", "2021_2024"]
        
        # Initialize models for each time period
        self.models = {period: None for period in self.time_periods}
        self.bigram_models = {period: None for period in self.time_periods}
        
    def _get_date_from_comment(self, comment):
        """Extract date from a comment dictionary"""
        try:
            return datetime.datetime.strptime(comment["date"], "%Y-%m-%d").date()
        except (KeyError, ValueError):
            # If date cannot be parsed, try using timestamp
            try:
                return datetime.datetime.fromtimestamp(int(comment["timestamp"])).date()
            except (KeyError, ValueError):
                return None
    
    def _get_period(self, date):
        """Determine which time period a date belongs to"""
        if date is None:
            return None
            
        year = date.year
        if year <= 2016:
            return "before_2016"
        elif 2017 <= year <= 2020:
            return "2017_2020"
        elif 2021 <= year <= 2024:
            return "2021_2024"
        return None
    
    def _build_bigram_model(self, comments, period, min_count=5):
        """Build a bigram model for the given comments"""
        print(f"Building bigram model for {period} with {len(comments)} comments...")
        
        # Extract just the processed text from comments
        sentences = [comment["processed_text"] for comment in comments if "processed_text" in comment]
        
        # Create or update bigram model
        if self.bigram_models[period] is None:
            # First time creation
            phrases = Phrases(sentences, min_count=min_count, threshold=0.7, scoring='npmi')
            self.bigram_models[period] = Phraser(phrases)
        else:
            # For updates, we need to create a new Phrases model, train it on all sentences,
            # and then create a new Phraser
            print(f"Creating new bigram model for {period} (cannot update Phraser objects)")
            phrases = Phrases(sentences, min_count=min_count, threshold=0.7, scoring='npmi')
            self.bigram_models[period] = Phraser(phrases)
            
        return self.bigram_models[period]
    
    def _apply_bigrams(self, comments, bigram_model):
        """Apply bigram model to comments"""
        processed_comments = []
        
        for comment in comments:
            if "processed_text" in comment:
                # Apply bigram transformation to processed text
                bigrammed_text = bigram_model[comment["processed_text"]]
                processed_comments.append(bigrammed_text)
                
        return processed_comments
    
    def _create_or_update_model(self, comments, period):
        """Create a new model or update an existing one"""
        if self.models[period] is None:
            # Create new model
            print(f"Creating new Word2Vec model for {period}")
            model = Word2Vec(
                vector_size=self.vector_size,
                window=self.window,
                min_count=self.min_count,
                workers=self.workers,
                sg=self.sg
            )
            model.build_vocab(comments)
        else:
            # Update existing model
            print(f"Updating existing Word2Vec model for {period}")
            model = self.models[period]
            model.build_vocab(comments, update=True)
            
        # Train model
        print(f"Training model on {len(comments)} comments")
        model.train(
            comments,
            total_examples=len(comments),
            epochs=self.epochs
        )
        
        self.models[period] = model
        return model
    
    def _save_model(self, model, subreddit, period, is_interim=False):
        """Save model to disk"""
        if is_interim:
            path = f"{self.model_dir}/interim/{subreddit}_{period}_interim.model"
        else:
            path = f"{self.model_dir}/{subreddit}_{period}.model"
            
        model.save(path)
        print(f"Model saved to {path}")
    
    def find_pickle_files(self, subreddit):
        """Find all pickle files for a subreddit"""
        pattern = f"{self.base_data_dir}/{subreddit}/{subreddit}_batch*.pkl"
        files = sorted(glob.glob(pattern))
        if not files:
            print(f"No pickle files found for {subreddit} in {self.base_data_dir}/{subreddit}/")
        return files
    
    def process_pickle_file(self, file_path):
        """Load and process a pickle file"""
        try:
            with open(file_path, 'rb') as f:
                comments = pickle.load(f)
            print(f"Loaded {len(comments)} comments from {file_path}")
            return comments
        except Exception as e:
            print(f"Error loading {file_path}: {e}")
            return []
    
    def build_models(self, subreddit, chunk_size=1000000):
        """
        Build Word2Vec models for a subreddit by time period
        
        Args:
            subreddit: Name of the subreddit to process
            chunk_size: Number of comments to process in each chunk
        """
        print(f"Building models for {subreddit}")
        
        # Find all pickle files
        pickle_files = self.find_pickle_files(subreddit)
        if not pickle_files:
            return
        
        # Process each file
        comments_by_period = {period: [] for period in self.time_periods}
        
        for file_path in pickle_files:
            print(f"Processing {file_path}")
            comments = self.process_pickle_file(file_path)
            
            # Sort comments by date if not already sorted
            for comment in comments:
                date = self._get_date_from_comment(comment)
                period = self._get_period(date)
                
                if period:
                    comments_by_period[period].append(comment)
            
            # Process each time period separately
            for period in self.time_periods:
                period_comments = comments_by_period[period]
                
                # If we have enough comments, train in chunks
                while len(period_comments) >= chunk_size:
                    print(f"Processing chunk of {chunk_size} comments for {period}")
                    
                    # Take a chunk of comments
                    chunk = period_comments[:chunk_size]
                    period_comments = period_comments[chunk_size:]
                    
                    # Build or update bigram model
                    bigram_model = self._build_bigram_model(chunk, period)
                    
                    # Apply bigrams
                    processed_chunk = self._apply_bigrams(chunk, bigram_model)
                    
                    if len(processed_chunk) > self.min_comments_to_train:
                        # Create or update model
                        model = self._create_or_update_model(processed_chunk, period)
                        
                        # Save interim model
                        self._save_model(model, subreddit, period, is_interim=True)
                
                # Store remaining comments for next file
                comments_by_period[period] = period_comments
        
        # Process any remaining comments that didn't make a full chunk
        for period, remaining_comments in comments_by_period.items():
            if len(remaining_comments) > self.min_comments_to_train:
                print(f"Processing final {len(remaining_comments)} comments for {period}")
                
                # Build or update bigram model
                bigram_model = self._build_bigram_model(remaining_comments, period)
                
                # Apply bigrams
                processed_chunk = self._apply_bigrams(remaining_comments, bigram_model)
                
                # Create or update model
                model = self._create_or_update_model(processed_chunk, period)
                
                # Save final model
                if model is not None:
                    self._save_model(model, subreddit, period, is_interim=False)
            else:
                print(f"Skipping final {len(remaining_comments)} comments for {period} (less than minimum required)")
        
        # Save final models
        for period, model in self.models.items():
            if model is not None:
                self._save_model(model, subreddit, period, is_interim=False)
        
        print(f"Completed building models for {subreddit}")


def main():
    # Initialize the model builder
    builder = RedditWord2VecBuilder(
        base_data_dir="processed_comments",
        model_dir="models/model_v5",
        vector_size=300,
        window=5,
        min_count=10,  # Minimum word frequency
        epochs=5,
        workers=16,
        sg=1,  # Skip-gram model
        min_comments_to_train=100000
    )
    
    # List of subreddits to process
    subreddits = ["democrats", "republican", "conservative", "liberal", "backpacking", "vagabond"]
    
    # Build models for each subreddit
    for subreddit in subreddits:
        builder.build_models(subreddit, chunk_size=1000000)


if __name__ == "__main__":
    main()

Building models for democrats
Processing processed_comments/democrats\democrats_batch1.pkl
Loaded 1000000 comments from processed_comments/democrats\democrats_batch1.pkl
Processing processed_comments/democrats\democrats_batch2.pkl
Loaded 935437 comments from processed_comments/democrats\democrats_batch2.pkl


2025-08-19 17:59:36,820 : INFO : collecting all words and their counts
2025-08-19 17:59:36,821 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 17:59:36,848 : INFO : PROGRESS: at sentence #10000, processed 75608 words and 62464 word types
2025-08-19 17:59:36,888 : INFO : PROGRESS: at sentence #20000, processed 180000 words and 135361 word types


Processing chunk of 1000000 comments for 2021_2024
Building bigram model for 2021_2024 with 1000000 comments...


2025-08-19 17:59:36,934 : INFO : PROGRESS: at sentence #30000, processed 298263 words and 207549 word types
2025-08-19 17:59:36,976 : INFO : PROGRESS: at sentence #40000, processed 404663 words and 267973 word types
2025-08-19 17:59:37,027 : INFO : PROGRESS: at sentence #50000, processed 531060 words and 335115 word types
2025-08-19 17:59:37,076 : INFO : PROGRESS: at sentence #60000, processed 648297 words and 391050 word types
2025-08-19 17:59:37,125 : INFO : PROGRESS: at sentence #70000, processed 775037 words and 452188 word types
2025-08-19 17:59:37,171 : INFO : PROGRESS: at sentence #80000, processed 889776 words and 505802 word types
2025-08-19 17:59:37,220 : INFO : PROGRESS: at sentence #90000, processed 1010201 words and 559857 word types
2025-08-19 17:59:37,270 : INFO : PROGRESS: at sentence #100000, processed 1129841 words and 610802 word types
2025-08-19 17:59:37,320 : INFO : PROGRESS: at sentence #110000, processed 1249037 words and 662914 word types
2025-08-19 17:59:37,381

Creating new Word2Vec model for 2021_2024


2025-08-19 17:59:49,024 : INFO : PROGRESS: at sentence #230000, processed 2640567 words, keeping 43077 word types
2025-08-19 17:59:49,034 : INFO : PROGRESS: at sentence #240000, processed 2762424 words, keeping 44021 word types
2025-08-19 17:59:49,044 : INFO : PROGRESS: at sentence #250000, processed 2895993 words, keeping 44846 word types
2025-08-19 17:59:49,054 : INFO : PROGRESS: at sentence #260000, processed 3024226 words, keeping 45745 word types
2025-08-19 17:59:49,063 : INFO : PROGRESS: at sentence #270000, processed 3156309 words, keeping 46532 word types
2025-08-19 17:59:49,071 : INFO : PROGRESS: at sentence #280000, processed 3289066 words, keeping 47392 word types
2025-08-19 17:59:49,080 : INFO : PROGRESS: at sentence #290000, processed 3426320 words, keeping 48149 word types
2025-08-19 17:59:49,090 : INFO : PROGRESS: at sentence #300000, processed 3554407 words, keeping 49009 word types
2025-08-19 17:59:49,099 : INFO : PROGRESS: at sentence #310000, processed 3678185 words,

Training model on 1000000 comments


2025-08-19 17:59:51,013 : INFO : EPOCH 0 - PROGRESS: at 29.33% examples, 3156405 words/s, in_qsize 31, out_qsize 0
2025-08-19 17:59:52,015 : INFO : EPOCH 0 - PROGRESS: at 59.02% examples, 3257519 words/s, in_qsize 31, out_qsize 0
2025-08-19 17:59:53,018 : INFO : EPOCH 0 - PROGRESS: at 90.26% examples, 3267229 words/s, in_qsize 31, out_qsize 0
2025-08-19 17:59:53,319 : INFO : EPOCH 0: training on 11899554 raw words (10854287 effective words) took 3.3s, 3279912 effective words/s
2025-08-19 17:59:54,333 : INFO : EPOCH 1 - PROGRESS: at 29.42% examples, 3176116 words/s, in_qsize 31, out_qsize 0
2025-08-19 17:59:55,334 : INFO : EPOCH 1 - PROGRESS: at 59.02% examples, 3264427 words/s, in_qsize 31, out_qsize 1
2025-08-19 17:59:56,335 : INFO : EPOCH 1 - PROGRESS: at 90.50% examples, 3283542 words/s, in_qsize 32, out_qsize 1
2025-08-19 17:59:56,619 : INFO : EPOCH 1: training on 11899554 raw words (10856224 effective words) took 3.3s, 3303598 effective words/s
2025-08-19 17:59:57,633 : INFO : EPO

Model saved to models/word2vec/interim/democrats_2021_2024_interim.model
Processing final 127372 comments for before_2016
Building bigram model for before_2016 with 127372 comments...


2025-08-19 18:00:06,912 : INFO : PROGRESS: at sentence #20000, processed 439266 words and 296485 word types
2025-08-19 18:00:07,026 : INFO : PROGRESS: at sentence #30000, processed 655522 words and 413840 word types
2025-08-19 18:00:07,157 : INFO : PROGRESS: at sentence #40000, processed 875139 words and 524212 word types
2025-08-19 18:00:07,296 : INFO : PROGRESS: at sentence #50000, processed 1081241 words and 618027 word types
2025-08-19 18:00:07,443 : INFO : PROGRESS: at sentence #60000, processed 1320645 words and 711438 word types
2025-08-19 18:00:07,561 : INFO : PROGRESS: at sentence #70000, processed 1535227 words and 795264 word types
2025-08-19 18:00:07,668 : INFO : PROGRESS: at sentence #80000, processed 1731201 words and 869425 word types
2025-08-19 18:00:07,777 : INFO : PROGRESS: at sentence #90000, processed 1906476 words and 933396 word types
2025-08-19 18:00:07,887 : INFO : PROGRESS: at sentence #100000, processed 2087014 words and 997054 word types
2025-08-19 18:00:07,9

Creating new Word2Vec model for before_2016


2025-08-19 18:00:10,073 : INFO : collected 39707 word types from a corpus of 2561192 raw words and 127372 sentences
2025-08-19 18:00:10,074 : INFO : Creating a fresh vocabulary
2025-08-19 18:00:10,092 : INFO : Word2Vec lifecycle event {'msg': 'effective_min_count=10 retains 10956 unique words (27.59% of original 39707, drops 28751)', 'datetime': '2025-08-19T18:00:10.092827', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'prepare_vocab'}
2025-08-19 18:00:10,092 : INFO : Word2Vec lifecycle event {'msg': 'effective_min_count=10 leaves 2490879 word corpus (97.25% of original 2561192, drops 70313)', 'datetime': '2025-08-19T18:00:10.092827', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'prepare_vocab'}
2025-08-19 18:00:10,118 : INFO : deleting the raw counts di

Training model on 127372 comments


2025-08-19 18:00:10,967 : INFO : EPOCH 0: training on 2561192 raw words (2332045 effective words) took 0.8s, 2959090 effective words/s
2025-08-19 18:00:11,763 : INFO : EPOCH 1: training on 2561192 raw words (2332208 effective words) took 0.8s, 2962625 effective words/s
2025-08-19 18:00:12,528 : INFO : EPOCH 2: training on 2561192 raw words (2331939 effective words) took 0.8s, 3080926 effective words/s
2025-08-19 18:00:13,288 : INFO : EPOCH 3: training on 2561192 raw words (2332148 effective words) took 0.8s, 3103877 effective words/s
2025-08-19 18:00:14,041 : INFO : EPOCH 4: training on 2561192 raw words (2332063 effective words) took 0.7s, 3123809 effective words/s
2025-08-19 18:00:14,042 : INFO : Word2Vec lifecycle event {'msg': 'training on 12805960 raw words (11660403 effective words) took 3.9s, 3011265 effective words/s', 'datetime': '2025-08-19T18:00:14.042848', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platf

Model saved to models/word2vec/democrats_before_2016.model
Processing final 473415 comments for 2017_2020
Building bigram model for 2017_2020 with 473415 comments...


2025-08-19 18:00:14,296 : INFO : PROGRESS: at sentence #30000, processed 518498 words and 327222 word types
2025-08-19 18:00:14,373 : INFO : PROGRESS: at sentence #40000, processed 709682 words and 419527 word types
2025-08-19 18:00:14,453 : INFO : PROGRESS: at sentence #50000, processed 909500 words and 511086 word types
2025-08-19 18:00:14,523 : INFO : PROGRESS: at sentence #60000, processed 1084279 words and 589180 word types
2025-08-19 18:00:14,591 : INFO : PROGRESS: at sentence #70000, processed 1242554 words and 657387 word types
2025-08-19 18:00:14,673 : INFO : PROGRESS: at sentence #80000, processed 1406852 words and 726327 word types
2025-08-19 18:00:14,746 : INFO : PROGRESS: at sentence #90000, processed 1579306 words and 796576 word types
2025-08-19 18:00:14,812 : INFO : PROGRESS: at sentence #100000, processed 1735564 words and 859397 word types
2025-08-19 18:00:14,879 : INFO : PROGRESS: at sentence #110000, processed 1895825 words and 920803 word types
2025-08-19 18:00:14,

Creating new Word2Vec model for 2017_2020


2025-08-19 18:00:20,715 : INFO : PROGRESS: at sentence #200000, processed 3202941 words, keeping 45008 word types
2025-08-19 18:00:20,723 : INFO : PROGRESS: at sentence #210000, processed 3341074 words, keeping 45760 word types
2025-08-19 18:00:20,732 : INFO : PROGRESS: at sentence #220000, processed 3477361 words, keeping 46620 word types
2025-08-19 18:00:20,740 : INFO : PROGRESS: at sentence #230000, processed 3611264 words, keeping 47449 word types
2025-08-19 18:00:20,750 : INFO : PROGRESS: at sentence #240000, processed 3764630 words, keeping 48248 word types
2025-08-19 18:00:20,758 : INFO : PROGRESS: at sentence #250000, processed 3894364 words, keeping 48987 word types
2025-08-19 18:00:20,766 : INFO : PROGRESS: at sentence #260000, processed 4017027 words, keeping 49809 word types
2025-08-19 18:00:20,774 : INFO : PROGRESS: at sentence #270000, processed 4137805 words, keeping 50537 word types
2025-08-19 18:00:20,784 : INFO : PROGRESS: at sentence #280000, processed 4263618 words,

Training model on 473415 comments


2025-08-19 18:00:22,054 : INFO : EPOCH 0 - PROGRESS: at 47.28% examples, 3216570 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:00:22,849 : INFO : EPOCH 0: training on 6522858 raw words (5954944 effective words) took 1.8s, 3308882 effective words/s
2025-08-19 18:00:23,858 : INFO : EPOCH 1 - PROGRESS: at 46.95% examples, 3212730 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:00:24,647 : INFO : EPOCH 1: training on 6522858 raw words (5955154 effective words) took 1.8s, 3329264 effective words/s
2025-08-19 18:00:25,656 : INFO : EPOCH 2 - PROGRESS: at 47.73% examples, 3259430 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:00:26,432 : INFO : EPOCH 2: training on 6522858 raw words (5954568 effective words) took 1.8s, 3352022 effective words/s
2025-08-19 18:00:27,441 : INFO : EPOCH 3 - PROGRESS: at 46.81% examples, 3204879 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:00:28,286 : INFO : EPOCH 3: training on 6522858 raw words (5955274 effective words) took 1.8s, 3226854 effective words/s


Model saved to models/word2vec/democrats_2017_2020.model
Processing final 334650 comments for 2021_2024
Building bigram model for 2021_2024 with 334650 comments...
Creating new bigram model for 2021_2024 (cannot update Phraser objects)


2025-08-19 18:00:30,582 : INFO : PROGRESS: at sentence #50000, processed 618170 words and 373596 word types
2025-08-19 18:00:30,631 : INFO : PROGRESS: at sentence #60000, processed 737164 words and 429671 word types
2025-08-19 18:00:30,683 : INFO : PROGRESS: at sentence #70000, processed 852681 words and 481636 word types
2025-08-19 18:00:30,730 : INFO : PROGRESS: at sentence #80000, processed 967109 words and 531127 word types
2025-08-19 18:00:30,782 : INFO : PROGRESS: at sentence #90000, processed 1089181 words and 583021 word types
2025-08-19 18:00:30,833 : INFO : PROGRESS: at sentence #100000, processed 1200311 words and 627058 word types
2025-08-19 18:00:30,890 : INFO : PROGRESS: at sentence #110000, processed 1325245 words and 673494 word types
2025-08-19 18:00:30,953 : INFO : PROGRESS: at sentence #120000, processed 1440894 words and 716112 word types
2025-08-19 18:00:31,015 : INFO : PROGRESS: at sentence #130000, processed 1556594 words and 758837 word types
2025-08-19 18:00:31

Updating existing Word2Vec model for 2021_2024


2025-08-19 18:00:36,253 : INFO : PROGRESS: at sentence #290000, processed 2862720 words, keeping 41570 word types
2025-08-19 18:00:36,259 : INFO : PROGRESS: at sentence #300000, processed 2939672 words, keeping 42082 word types
2025-08-19 18:00:36,264 : INFO : PROGRESS: at sentence #310000, processed 3016944 words, keeping 42568 word types
2025-08-19 18:00:36,270 : INFO : PROGRESS: at sentence #320000, processed 3097637 words, keeping 43126 word types
2025-08-19 18:00:36,277 : INFO : PROGRESS: at sentence #330000, processed 3169090 words, keeping 43697 word types
2025-08-19 18:00:36,280 : INFO : collected 43993 word types from a corpus of 3204711 raw words and 334650 sentences
2025-08-19 18:00:36,280 : INFO : Updating model with new vocabulary
2025-08-19 18:00:36,314 : INFO : Word2Vec lifecycle event {'msg': 'added 282 new unique words (0.64% of original 43993) and increased the count of 11382 pre-existing words (25.87% of original 43993)', 'datetime': '2025-08-19T18:00:36.314492', 'ge

Training model on 334650 comments


2025-08-19 18:00:37,332 : INFO : EPOCH 0: training on 3204711 raw words (2836939 effective words) took 0.9s, 3145597 effective words/s
2025-08-19 18:00:38,254 : INFO : EPOCH 1: training on 3204711 raw words (2837445 effective words) took 0.9s, 3110717 effective words/s
2025-08-19 18:00:39,175 : INFO : EPOCH 2: training on 3204711 raw words (2836907 effective words) took 0.9s, 3118398 effective words/s
2025-08-19 18:00:40,081 : INFO : EPOCH 3: training on 3204711 raw words (2836790 effective words) took 0.9s, 3174278 effective words/s
2025-08-19 18:00:40,975 : INFO : EPOCH 4: training on 3204711 raw words (2837479 effective words) took 0.9s, 3212484 effective words/s
2025-08-19 18:00:40,976 : INFO : Word2Vec lifecycle event {'msg': 'training on 16023555 raw words (14185560 effective words) took 4.6s, 3111911 effective words/s', 'datetime': '2025-08-19T18:00:40.976583', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platf

Model saved to models/word2vec/democrats_2021_2024.model
Model saved to models/word2vec/democrats_before_2016.model
Model saved to models/word2vec/democrats_2017_2020.model
Model saved to models/word2vec/democrats_2021_2024.model
Completed building models for democrats
Building models for republican
Processing processed_comments/republican\republican_batch1.pkl
Loaded 1000000 comments from processed_comments/republican\republican_batch1.pkl
Processing processed_comments/republican\republican_batch2.pkl
Loaded 292460 comments from processed_comments/republican\republican_batch2.pkl


2025-08-19 18:00:48,152 : INFO : collecting all words and their counts
2025-08-19 18:00:48,153 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 18:00:48,248 : INFO : PROGRESS: at sentence #10000, processed 248055 words and 178168 word types


Processing final 263823 comments for before_2016
Building bigram model for before_2016 with 263823 comments...
Creating new bigram model for before_2016 (cannot update Phraser objects)


2025-08-19 18:00:48,353 : INFO : PROGRESS: at sentence #20000, processed 524761 words and 329255 word types
2025-08-19 18:00:48,456 : INFO : PROGRESS: at sentence #30000, processed 777709 words and 452375 word types
2025-08-19 18:00:48,561 : INFO : PROGRESS: at sentence #40000, processed 1037093 words and 565600 word types
2025-08-19 18:00:48,664 : INFO : PROGRESS: at sentence #50000, processed 1289747 words and 668379 word types
2025-08-19 18:00:48,779 : INFO : PROGRESS: at sentence #60000, processed 1540869 words and 764650 word types
2025-08-19 18:00:48,883 : INFO : PROGRESS: at sentence #70000, processed 1790672 words and 856919 word types
2025-08-19 18:00:48,998 : INFO : PROGRESS: at sentence #80000, processed 2062473 words and 952107 word types
2025-08-19 18:00:49,112 : INFO : PROGRESS: at sentence #90000, processed 2325846 words and 1044965 word types
2025-08-19 18:00:49,213 : INFO : PROGRESS: at sentence #100000, processed 2563311 words and 1127971 word types
2025-08-19 18:00:4

Updating existing Word2Vec model for before_2016


2025-08-19 18:00:54,122 : INFO : PROGRESS: at sentence #140000, processed 3483080 words, keeping 42774 word types
2025-08-19 18:00:54,136 : INFO : PROGRESS: at sentence #150000, processed 3726180 words, keeping 43999 word types
2025-08-19 18:00:54,148 : INFO : PROGRESS: at sentence #160000, processed 3930868 words, keeping 45108 word types
2025-08-19 18:00:54,160 : INFO : PROGRESS: at sentence #170000, processed 4148593 words, keeping 46147 word types
2025-08-19 18:00:54,171 : INFO : PROGRESS: at sentence #180000, processed 4343114 words, keeping 47093 word types
2025-08-19 18:00:54,182 : INFO : PROGRESS: at sentence #190000, processed 4535274 words, keeping 47954 word types
2025-08-19 18:00:54,192 : INFO : PROGRESS: at sentence #200000, processed 4721373 words, keeping 48812 word types
2025-08-19 18:00:54,206 : INFO : PROGRESS: at sentence #210000, processed 4934346 words, keeping 49742 word types
2025-08-19 18:00:54,217 : INFO : PROGRESS: at sentence #220000, processed 5132324 words,

Training model on 263823 comments


2025-08-19 18:00:55,398 : INFO : EPOCH 0 - PROGRESS: at 52.38% examples, 3172218 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:00:56,103 : INFO : EPOCH 0: training on 6030045 raw words (5557531 effective words) took 1.7s, 3255834 effective words/s
2025-08-19 18:00:57,115 : INFO : EPOCH 1 - PROGRESS: at 51.40% examples, 3107041 words/s, in_qsize 32, out_qsize 0
2025-08-19 18:00:57,841 : INFO : EPOCH 1: training on 6030045 raw words (5556956 effective words) took 1.7s, 3210708 effective words/s
2025-08-19 18:00:58,864 : INFO : EPOCH 2 - PROGRESS: at 51.12% examples, 3053552 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:00:59,605 : INFO : EPOCH 2: training on 6030045 raw words (5557041 effective words) took 1.8s, 3161286 effective words/s
2025-08-19 18:01:00,615 : INFO : EPOCH 3 - PROGRESS: at 51.12% examples, 3096190 words/s, in_qsize 32, out_qsize 0
2025-08-19 18:01:01,351 : INFO : EPOCH 3: training on 6030045 raw words (5557489 effective words) took 1.7s, 3197148 effective words/s


Model saved to models/word2vec/republican_before_2016.model
Processing final 415293 comments for 2017_2020
Building bigram model for 2017_2020 with 415293 comments...
Creating new bigram model for 2017_2020 (cannot update Phraser objects)


2025-08-19 18:01:03,317 : INFO : PROGRESS: at sentence #20000, processed 480069 words and 314121 word types
2025-08-19 18:01:03,435 : INFO : PROGRESS: at sentence #30000, processed 762347 words and 456814 word types
2025-08-19 18:01:03,546 : INFO : PROGRESS: at sentence #40000, processed 1029911 words and 582050 word types
2025-08-19 18:01:03,657 : INFO : PROGRESS: at sentence #50000, processed 1290944 words and 696358 word types
2025-08-19 18:01:03,769 : INFO : PROGRESS: at sentence #60000, processed 1533480 words and 798538 word types
2025-08-19 18:01:03,840 : INFO : PROGRESS: at sentence #70000, processed 1694006 words and 861548 word types
2025-08-19 18:01:03,914 : INFO : PROGRESS: at sentence #80000, processed 1860071 words and 928555 word types
2025-08-19 18:01:03,982 : INFO : PROGRESS: at sentence #90000, processed 2017167 words and 991451 word types
2025-08-19 18:01:04,047 : INFO : PROGRESS: at sentence #100000, processed 2164124 words and 1048506 word types
2025-08-19 18:01:04

Updating existing Word2Vec model for 2017_2020


2025-08-19 18:01:09,669 : INFO : PROGRESS: at sentence #180000, processed 3225101 words, keeping 43036 word types
2025-08-19 18:01:09,678 : INFO : PROGRESS: at sentence #190000, processed 3363028 words, keeping 44039 word types
2025-08-19 18:01:09,687 : INFO : PROGRESS: at sentence #200000, processed 3502287 words, keeping 44911 word types
2025-08-19 18:01:09,695 : INFO : PROGRESS: at sentence #210000, processed 3638765 words, keeping 45914 word types
2025-08-19 18:01:09,704 : INFO : PROGRESS: at sentence #220000, processed 3762185 words, keeping 46779 word types
2025-08-19 18:01:09,716 : INFO : PROGRESS: at sentence #230000, processed 3922600 words, keeping 47686 word types
2025-08-19 18:01:09,726 : INFO : PROGRESS: at sentence #240000, processed 4087414 words, keeping 48540 word types
2025-08-19 18:01:09,735 : INFO : PROGRESS: at sentence #250000, processed 4232588 words, keeping 49347 word types
2025-08-19 18:01:09,744 : INFO : PROGRESS: at sentence #260000, processed 4379182 words,

Training model on 415293 comments


2025-08-19 18:01:11,024 : INFO : EPOCH 0 - PROGRESS: at 46.87% examples, 3164117 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:01:11,821 : INFO : EPOCH 0: training on 6415212 raw words (5922712 effective words) took 1.8s, 3291621 effective words/s
2025-08-19 18:01:12,833 : INFO : EPOCH 1 - PROGRESS: at 46.69% examples, 3149915 words/s, in_qsize 32, out_qsize 0
2025-08-19 18:01:13,635 : INFO : EPOCH 1: training on 6415212 raw words (5922285 effective words) took 1.8s, 3278784 effective words/s
2025-08-19 18:01:14,643 : INFO : EPOCH 2 - PROGRESS: at 46.51% examples, 3147617 words/s, in_qsize 32, out_qsize 0
2025-08-19 18:01:15,462 : INFO : EPOCH 2: training on 6415212 raw words (5922204 effective words) took 1.8s, 3255682 effective words/s
2025-08-19 18:01:16,470 : INFO : EPOCH 3 - PROGRESS: at 46.51% examples, 3145222 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:01:17,323 : INFO : EPOCH 3: training on 6415212 raw words (5922799 effective words) took 1.9s, 3193803 effective words/s


Model saved to models/word2vec/republican_2017_2020.model
Processing final 613344 comments for 2021_2024
Building bigram model for 2021_2024 with 613344 comments...
Creating new bigram model for 2021_2024 (cannot update Phraser objects)


2025-08-19 18:01:19,523 : INFO : PROGRESS: at sentence #40000, processed 555874 words and 348336 word types
2025-08-19 18:01:19,580 : INFO : PROGRESS: at sentence #50000, processed 683413 words and 411507 word types
2025-08-19 18:01:19,636 : INFO : PROGRESS: at sentence #60000, processed 827508 words and 479931 word types
2025-08-19 18:01:19,693 : INFO : PROGRESS: at sentence #70000, processed 964834 words and 542408 word types
2025-08-19 18:01:19,748 : INFO : PROGRESS: at sentence #80000, processed 1094976 words and 599850 word types
2025-08-19 18:01:19,803 : INFO : PROGRESS: at sentence #90000, processed 1227483 words and 658228 word types
2025-08-19 18:01:19,866 : INFO : PROGRESS: at sentence #100000, processed 1345468 words and 708811 word types
2025-08-19 18:01:19,924 : INFO : PROGRESS: at sentence #110000, processed 1480234 words and 764822 word types
2025-08-19 18:01:19,975 : INFO : PROGRESS: at sentence #120000, processed 1604937 words and 814070 word types
2025-08-19 18:01:20,

Updating existing Word2Vec model for 2021_2024


2025-08-19 18:01:27,335 : INFO : PROGRESS: at sentence #240000, processed 3115782 words, keeping 45993 word types
2025-08-19 18:01:27,343 : INFO : PROGRESS: at sentence #250000, processed 3228455 words, keeping 46837 word types
2025-08-19 18:01:27,352 : INFO : PROGRESS: at sentence #260000, processed 3358088 words, keeping 47788 word types
2025-08-19 18:01:27,361 : INFO : PROGRESS: at sentence #270000, processed 3495389 words, keeping 48783 word types
2025-08-19 18:01:27,369 : INFO : PROGRESS: at sentence #280000, processed 3621079 words, keeping 49695 word types
2025-08-19 18:01:27,377 : INFO : PROGRESS: at sentence #290000, processed 3764182 words, keeping 50546 word types
2025-08-19 18:01:27,386 : INFO : PROGRESS: at sentence #300000, processed 3895522 words, keeping 51401 word types
2025-08-19 18:01:27,395 : INFO : PROGRESS: at sentence #310000, processed 4044844 words, keeping 52229 word types
2025-08-19 18:01:27,404 : INFO : PROGRESS: at sentence #320000, processed 4171593 words,

Training model on 613344 comments


2025-08-19 18:01:28,878 : INFO : EPOCH 0 - PROGRESS: at 44.76% examples, 3273459 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:01:29,879 : INFO : EPOCH 0 - PROGRESS: at 90.02% examples, 3268855 words/s, in_qsize 30, out_qsize 1
2025-08-19 18:01:30,042 : INFO : EPOCH 0: training on 7751192 raw words (7155267 effective words) took 2.2s, 3298888 effective words/s
2025-08-19 18:01:31,055 : INFO : EPOCH 1 - PROGRESS: at 44.17% examples, 3240330 words/s, in_qsize 30, out_qsize 1
2025-08-19 18:01:32,056 : INFO : EPOCH 1 - PROGRESS: at 89.89% examples, 3270275 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:01:32,223 : INFO : EPOCH 1: training on 7751192 raw words (7154916 effective words) took 2.2s, 3297680 effective words/s
2025-08-19 18:01:33,235 : INFO : EPOCH 2 - PROGRESS: at 43.88% examples, 3219468 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:01:34,239 : INFO : EPOCH 2 - PROGRESS: at 90.37% examples, 3283038 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:01:34,396 : INFO : EPOCH 2

Model saved to models/word2vec/republican_2021_2024.model
Model saved to models/word2vec/republican_before_2016.model
Model saved to models/word2vec/republican_2017_2020.model
Model saved to models/word2vec/republican_2021_2024.model
Completed building models for republican
Building models for conservative
Processing processed_comments/conservative\conservative_batch1.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch1.pkl


2025-08-19 18:01:45,738 : INFO : collecting all words and their counts
2025-08-19 18:01:45,738 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 18:01:45,822 : INFO : PROGRESS: at sentence #10000, processed 229214 words and 173826 word types


Processing chunk of 1000000 comments for before_2016
Building bigram model for before_2016 with 1000000 comments...
Creating new bigram model for before_2016 (cannot update Phraser objects)


2025-08-19 18:01:45,914 : INFO : PROGRESS: at sentence #20000, processed 472794 words and 313726 word types
2025-08-19 18:01:46,011 : INFO : PROGRESS: at sentence #30000, processed 709456 words and 431645 word types
2025-08-19 18:01:46,110 : INFO : PROGRESS: at sentence #40000, processed 957528 words and 546326 word types
2025-08-19 18:01:46,207 : INFO : PROGRESS: at sentence #50000, processed 1198094 words and 653046 word types
2025-08-19 18:01:46,316 : INFO : PROGRESS: at sentence #60000, processed 1441200 words and 755267 word types
2025-08-19 18:01:46,417 : INFO : PROGRESS: at sentence #70000, processed 1690764 words and 856761 word types
2025-08-19 18:01:46,524 : INFO : PROGRESS: at sentence #80000, processed 1945852 words and 951864 word types
2025-08-19 18:01:46,623 : INFO : PROGRESS: at sentence #90000, processed 2189764 words and 1040331 word types
2025-08-19 18:01:46,719 : INFO : PROGRESS: at sentence #100000, processed 2423487 words and 1124951 word types
2025-08-19 18:01:46

Updating existing Word2Vec model for before_2016


2025-08-19 18:02:06,715 : INFO : PROGRESS: at sentence #140000, processed 3351918 words, keeping 43380 word types
2025-08-19 18:02:06,729 : INFO : PROGRESS: at sentence #150000, processed 3596968 words, keeping 44664 word types
2025-08-19 18:02:06,743 : INFO : PROGRESS: at sentence #160000, processed 3834609 words, keeping 45797 word types
2025-08-19 18:02:06,755 : INFO : PROGRESS: at sentence #170000, processed 4050308 words, keeping 46801 word types
2025-08-19 18:02:06,767 : INFO : PROGRESS: at sentence #180000, processed 4251648 words, keeping 47882 word types
2025-08-19 18:02:06,779 : INFO : PROGRESS: at sentence #190000, processed 4467156 words, keeping 49038 word types
2025-08-19 18:02:06,790 : INFO : PROGRESS: at sentence #200000, processed 4657393 words, keeping 50051 word types
2025-08-19 18:02:06,801 : INFO : PROGRESS: at sentence #210000, processed 4858883 words, keeping 51056 word types
2025-08-19 18:02:06,812 : INFO : PROGRESS: at sentence #220000, processed 5058904 words,

Training model on 1000000 comments


2025-08-19 18:02:08,877 : INFO : EPOCH 0 - PROGRESS: at 13.64% examples, 3056129 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:02:09,882 : INFO : EPOCH 0 - PROGRESS: at 29.11% examples, 3077475 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:02:10,887 : INFO : EPOCH 0 - PROGRESS: at 46.68% examples, 3124404 words/s, in_qsize 32, out_qsize 0
2025-08-19 18:02:11,892 : INFO : EPOCH 0 - PROGRESS: at 64.69% examples, 3138905 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:02:12,893 : INFO : EPOCH 0 - PROGRESS: at 82.85% examples, 3149562 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:02:13,687 : INFO : EPOCH 0: training on 19645249 raw words (18402339 effective words) took 5.8s, 3166531 effective words/s
2025-08-19 18:02:14,699 : INFO : EPOCH 1 - PROGRESS: at 13.37% examples, 2987292 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:02:15,704 : INFO : EPOCH 1 - PROGRESS: at 28.99% examples, 3060499 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:02:16,705 : INFO : EPOCH 1 - PROGRESS: at 46

Model saved to models/word2vec/interim/conservative_before_2016_interim.model
Processing processed_comments/conservative\conservative_batch10.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch10.pkl
Processing chunk of 1000000 comments for 2021_2024
Building bigram model for 2021_2024 with 1000000 comments...


2025-08-19 18:02:44,486 : INFO : collecting all words and their counts
2025-08-19 18:02:44,487 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 18:02:44,545 : INFO : PROGRESS: at sentence #10000, processed 135999 words and 109702 word types
2025-08-19 18:02:44,605 : INFO : PROGRESS: at sentence #20000, processed 266146 words and 192461 word types
2025-08-19 18:02:44,666 : INFO : PROGRESS: at sentence #30000, processed 398874 words and 268227 word types


Creating new bigram model for 2021_2024 (cannot update Phraser objects)


2025-08-19 18:02:44,725 : INFO : PROGRESS: at sentence #40000, processed 538979 words and 341661 word types
2025-08-19 18:02:44,783 : INFO : PROGRESS: at sentence #50000, processed 676027 words and 412003 word types
2025-08-19 18:02:44,837 : INFO : PROGRESS: at sentence #60000, processed 809013 words and 477238 word types
2025-08-19 18:02:44,895 : INFO : PROGRESS: at sentence #70000, processed 950017 words and 543883 word types
2025-08-19 18:02:44,954 : INFO : PROGRESS: at sentence #80000, processed 1089113 words and 606872 word types
2025-08-19 18:02:45,007 : INFO : PROGRESS: at sentence #90000, processed 1208887 words and 658730 word types
2025-08-19 18:02:45,076 : INFO : PROGRESS: at sentence #100000, processed 1323218 words and 706912 word types
2025-08-19 18:02:45,128 : INFO : PROGRESS: at sentence #110000, processed 1446537 words and 760024 word types
2025-08-19 18:02:45,186 : INFO : PROGRESS: at sentence #120000, processed 1580802 words and 814469 word types
2025-08-19 18:02:45,

Updating existing Word2Vec model for 2021_2024


2025-08-19 18:02:57,104 : INFO : PROGRESS: at sentence #190000, processed 2483457 words, keeping 40364 word types
2025-08-19 18:02:57,113 : INFO : PROGRESS: at sentence #200000, processed 2620804 words, keeping 41364 word types
2025-08-19 18:02:57,121 : INFO : PROGRESS: at sentence #210000, processed 2738107 words, keeping 42193 word types
2025-08-19 18:02:57,130 : INFO : PROGRESS: at sentence #220000, processed 2872514 words, keeping 43086 word types
2025-08-19 18:02:57,139 : INFO : PROGRESS: at sentence #230000, processed 2993348 words, keeping 43990 word types
2025-08-19 18:02:57,148 : INFO : PROGRESS: at sentence #240000, processed 3127685 words, keeping 44928 word types
2025-08-19 18:02:57,157 : INFO : PROGRESS: at sentence #250000, processed 3271732 words, keeping 45823 word types
2025-08-19 18:02:57,165 : INFO : PROGRESS: at sentence #260000, processed 3398855 words, keeping 46653 word types
2025-08-19 18:02:57,173 : INFO : PROGRESS: at sentence #270000, processed 3526320 words,

Training model on 1000000 comments


2025-08-19 18:02:58,969 : INFO : EPOCH 0 - PROGRESS: at 24.67% examples, 2978880 words/s, in_qsize 30, out_qsize 1
2025-08-19 18:02:59,972 : INFO : EPOCH 0 - PROGRESS: at 53.04% examples, 3127137 words/s, in_qsize 30, out_qsize 1
2025-08-19 18:03:00,972 : INFO : EPOCH 0 - PROGRESS: at 81.93% examples, 3172479 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:03:01,644 : INFO : EPOCH 0: training on 12610719 raw words (11662974 effective words) took 3.7s, 3172321 effective words/s
2025-08-19 18:03:02,657 : INFO : EPOCH 1 - PROGRESS: at 24.74% examples, 2986358 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:03:03,659 : INFO : EPOCH 1 - PROGRESS: at 52.26% examples, 3080789 words/s, in_qsize 30, out_qsize 1
2025-08-19 18:03:04,661 : INFO : EPOCH 1 - PROGRESS: at 80.31% examples, 3106202 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:03:05,358 : INFO : EPOCH 1: training on 12610719 raw words (11663920 effective words) took 3.7s, 3149893 effective words/s
2025-08-19 18:03:06,370 : INFO : EPO

Model saved to models/word2vec/interim/conservative_2021_2024_interim.model
Processing processed_comments/conservative\conservative_batch11.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch11.pkl


2025-08-19 18:03:22,683 : INFO : collecting all words and their counts
2025-08-19 18:03:22,683 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 18:03:22,735 : INFO : PROGRESS: at sentence #10000, processed 134888 words and 108447 word types
2025-08-19 18:03:22,785 : INFO : PROGRESS: at sentence #20000, processed 250434 words and 182352 word types


Processing chunk of 1000000 comments for 2021_2024
Building bigram model for 2021_2024 with 1000000 comments...
Creating new bigram model for 2021_2024 (cannot update Phraser objects)


2025-08-19 18:03:22,834 : INFO : PROGRESS: at sentence #30000, processed 375640 words and 255779 word types
2025-08-19 18:03:22,880 : INFO : PROGRESS: at sentence #40000, processed 489177 words and 316867 word types
2025-08-19 18:03:22,926 : INFO : PROGRESS: at sentence #50000, processed 589578 words and 368513 word types
2025-08-19 18:03:22,977 : INFO : PROGRESS: at sentence #60000, processed 702876 words and 425092 word types
2025-08-19 18:03:23,032 : INFO : PROGRESS: at sentence #70000, processed 821285 words and 481944 word types
2025-08-19 18:03:23,083 : INFO : PROGRESS: at sentence #80000, processed 939259 words and 538191 word types
2025-08-19 18:03:23,136 : INFO : PROGRESS: at sentence #90000, processed 1062510 words and 595789 word types
2025-08-19 18:03:23,192 : INFO : PROGRESS: at sentence #100000, processed 1195205 words and 653423 word types
2025-08-19 18:03:23,253 : INFO : PROGRESS: at sentence #110000, processed 1312335 words and 702790 word types
2025-08-19 18:03:23,302

Updating existing Word2Vec model for 2021_2024


2025-08-19 18:03:36,855 : INFO : PROGRESS: at sentence #220000, processed 2679213 words, keeping 42923 word types
2025-08-19 18:03:36,864 : INFO : PROGRESS: at sentence #230000, processed 2806152 words, keeping 43801 word types
2025-08-19 18:03:36,872 : INFO : PROGRESS: at sentence #240000, processed 2934444 words, keeping 44657 word types
2025-08-19 18:03:36,880 : INFO : PROGRESS: at sentence #250000, processed 3052984 words, keeping 45517 word types
2025-08-19 18:03:36,888 : INFO : PROGRESS: at sentence #260000, processed 3185942 words, keeping 46332 word types
2025-08-19 18:03:36,897 : INFO : PROGRESS: at sentence #270000, processed 3312681 words, keeping 47168 word types
2025-08-19 18:03:36,904 : INFO : PROGRESS: at sentence #280000, processed 3430735 words, keeping 47988 word types
2025-08-19 18:03:36,913 : INFO : PROGRESS: at sentence #290000, processed 3548760 words, keeping 48725 word types
2025-08-19 18:03:36,922 : INFO : PROGRESS: at sentence #300000, processed 3672283 words,

Training model on 1000000 comments


2025-08-19 18:03:38,708 : INFO : EPOCH 0 - PROGRESS: at 26.74% examples, 3026945 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:03:39,713 : INFO : EPOCH 0 - PROGRESS: at 55.47% examples, 3098029 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:03:40,713 : INFO : EPOCH 0 - PROGRESS: at 86.19% examples, 3156471 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:03:41,100 : INFO : EPOCH 0: training on 11718748 raw words (10829466 effective words) took 3.4s, 3190144 effective words/s
2025-08-19 18:03:42,114 : INFO : EPOCH 1 - PROGRESS: at 27.17% examples, 3072215 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:03:43,117 : INFO : EPOCH 1 - PROGRESS: at 56.15% examples, 3141636 words/s, in_qsize 32, out_qsize 0
2025-08-19 18:03:44,119 : INFO : EPOCH 1 - PROGRESS: at 86.59% examples, 3171980 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:03:44,520 : INFO : EPOCH 1: training on 11718748 raw words (10829677 effective words) took 3.4s, 3175667 effective words/s
2025-08-19 18:03:45,534 : INFO : EPO

Model saved to models/word2vec/interim/conservative_2021_2024_interim.model
Processing processed_comments/conservative\conservative_batch12.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch12.pkl


2025-08-19 18:04:01,571 : INFO : collecting all words and their counts
2025-08-19 18:04:01,572 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 18:04:01,614 : INFO : PROGRESS: at sentence #10000, processed 96436 words and 79654 word types
2025-08-19 18:04:01,658 : INFO : PROGRESS: at sentence #20000, processed 189739 words and 141495 word types
2025-08-19 18:04:01,701 : INFO : PROGRESS: at sentence #30000, processed 281767 words and 196314 word types


Processing chunk of 1000000 comments for 2021_2024
Building bigram model for 2021_2024 with 1000000 comments...
Creating new bigram model for 2021_2024 (cannot update Phraser objects)


2025-08-19 18:04:01,747 : INFO : PROGRESS: at sentence #40000, processed 374976 words and 249021 word types
2025-08-19 18:04:01,795 : INFO : PROGRESS: at sentence #50000, processed 470873 words and 301142 word types
2025-08-19 18:04:01,852 : INFO : PROGRESS: at sentence #60000, processed 578912 words and 358230 word types
2025-08-19 18:04:01,908 : INFO : PROGRESS: at sentence #70000, processed 697299 words and 416837 word types
2025-08-19 18:04:01,962 : INFO : PROGRESS: at sentence #80000, processed 811332 words and 473364 word types
2025-08-19 18:04:02,020 : INFO : PROGRESS: at sentence #90000, processed 941584 words and 535566 word types
2025-08-19 18:04:02,077 : INFO : PROGRESS: at sentence #100000, processed 1064745 words and 592967 word types
2025-08-19 18:04:02,131 : INFO : PROGRESS: at sentence #110000, processed 1181414 words and 645154 word types
2025-08-19 18:04:02,161 : INFO : PROGRESS: at sentence #120000, processed 1244512 words and 668960 word types
2025-08-19 18:04:02,22

Updating existing Word2Vec model for 2021_2024


2025-08-19 18:04:13,533 : INFO : PROGRESS: at sentence #230000, processed 2506125 words, keeping 41191 word types
2025-08-19 18:04:13,542 : INFO : PROGRESS: at sentence #240000, processed 2622870 words, keeping 42203 word types
2025-08-19 18:04:13,550 : INFO : PROGRESS: at sentence #250000, processed 2732828 words, keeping 43020 word types
2025-08-19 18:04:13,558 : INFO : PROGRESS: at sentence #260000, processed 2840603 words, keeping 43877 word types
2025-08-19 18:04:13,566 : INFO : PROGRESS: at sentence #270000, processed 2948378 words, keeping 44745 word types
2025-08-19 18:04:13,578 : INFO : PROGRESS: at sentence #280000, processed 3065121 words, keeping 45460 word types
2025-08-19 18:04:13,590 : INFO : PROGRESS: at sentence #290000, processed 3181679 words, keeping 46243 word types
2025-08-19 18:04:13,600 : INFO : PROGRESS: at sentence #300000, processed 3298751 words, keeping 47036 word types
2025-08-19 18:04:13,610 : INFO : PROGRESS: at sentence #310000, processed 3415747 words,

Training model on 1000000 comments


2025-08-19 18:04:15,436 : INFO : EPOCH 0 - PROGRESS: at 31.47% examples, 3187491 words/s, in_qsize 32, out_qsize 1
2025-08-19 18:04:16,438 : INFO : EPOCH 0 - PROGRESS: at 64.54% examples, 3286861 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:04:17,441 : INFO : EPOCH 0 - PROGRESS: at 97.61% examples, 3302849 words/s, in_qsize 27, out_qsize 0
2025-08-19 18:04:17,508 : INFO : EPOCH 0: training on 11013409 raw words (10187783 effective words) took 3.1s, 3311227 effective words/s
2025-08-19 18:04:18,528 : INFO : EPOCH 1 - PROGRESS: at 29.79% examples, 3001045 words/s, in_qsize 30, out_qsize 1
2025-08-19 18:04:19,530 : INFO : EPOCH 1 - PROGRESS: at 63.34% examples, 3225011 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:04:20,535 : INFO : EPOCH 1 - PROGRESS: at 96.81% examples, 3273588 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:04:20,624 : INFO : EPOCH 1: training on 11013409 raw words (10188559 effective words) took 3.1s, 3285277 effective words/s
2025-08-19 18:04:21,637 : INFO : EPO

Model saved to models/word2vec/interim/conservative_2021_2024_interim.model
Processing processed_comments/conservative\conservative_batch13.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch13.pkl
Processing chunk of 1000000 comments for 2021_2024
Building bigram model for 2021_2024 with 1000000 comments...


2025-08-19 18:04:36,557 : INFO : collecting all words and their counts
2025-08-19 18:04:36,558 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 18:04:36,606 : INFO : PROGRESS: at sentence #10000, processed 110314 words and 90475 word types
2025-08-19 18:04:36,656 : INFO : PROGRESS: at sentence #20000, processed 223974 words and 165610 word types
2025-08-19 18:04:36,709 : INFO : PROGRESS: at sentence #30000, processed 339757 words and 236237 word types


Creating new bigram model for 2021_2024 (cannot update Phraser objects)


2025-08-19 18:04:36,761 : INFO : PROGRESS: at sentence #40000, processed 456542 words and 303523 word types
2025-08-19 18:04:36,813 : INFO : PROGRESS: at sentence #50000, processed 553367 words and 355810 word types
2025-08-19 18:04:36,858 : INFO : PROGRESS: at sentence #60000, processed 658658 words and 409072 word types
2025-08-19 18:04:36,911 : INFO : PROGRESS: at sentence #70000, processed 774310 words and 466755 word types
2025-08-19 18:04:36,967 : INFO : PROGRESS: at sentence #80000, processed 893261 words and 522721 word types
2025-08-19 18:04:37,009 : INFO : PROGRESS: at sentence #90000, processed 989773 words and 567638 word types
2025-08-19 18:04:37,066 : INFO : PROGRESS: at sentence #100000, processed 1111671 words and 623168 word types
2025-08-19 18:04:37,119 : INFO : PROGRESS: at sentence #110000, processed 1220260 words and 670880 word types
2025-08-19 18:04:37,191 : INFO : PROGRESS: at sentence #120000, processed 1347683 words and 724992 word types
2025-08-19 18:04:37,24

Updating existing Word2Vec model for 2021_2024


2025-08-19 18:04:48,835 : INFO : PROGRESS: at sentence #220000, processed 2511370 words, keeping 43166 word types
2025-08-19 18:04:48,845 : INFO : PROGRESS: at sentence #230000, processed 2626614 words, keeping 44052 word types
2025-08-19 18:04:48,853 : INFO : PROGRESS: at sentence #240000, processed 2735668 words, keeping 44872 word types
2025-08-19 18:04:48,866 : INFO : PROGRESS: at sentence #250000, processed 2859694 words, keeping 45769 word types
2025-08-19 18:04:48,875 : INFO : PROGRESS: at sentence #260000, processed 2980465 words, keeping 46598 word types
2025-08-19 18:04:48,884 : INFO : PROGRESS: at sentence #270000, processed 3096203 words, keeping 47403 word types
2025-08-19 18:04:48,893 : INFO : PROGRESS: at sentence #280000, processed 3213528 words, keeping 48332 word types
2025-08-19 18:04:48,901 : INFO : PROGRESS: at sentence #290000, processed 3325098 words, keeping 49115 word types
2025-08-19 18:04:48,910 : INFO : PROGRESS: at sentence #300000, processed 3439274 words,

Training model on 1000000 comments


2025-08-19 18:04:50,753 : INFO : EPOCH 0 - PROGRESS: at 29.61% examples, 3134643 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:04:51,758 : INFO : EPOCH 0 - PROGRESS: at 61.20% examples, 3197367 words/s, in_qsize 32, out_qsize 0
2025-08-19 18:04:52,764 : INFO : EPOCH 0 - PROGRESS: at 93.25% examples, 3234932 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:04:52,953 : INFO : EPOCH 0: training on 11221118 raw words (10418452 effective words) took 3.2s, 3249382 effective words/s
2025-08-19 18:04:53,969 : INFO : EPOCH 1 - PROGRESS: at 29.42% examples, 3123357 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:04:54,972 : INFO : EPOCH 1 - PROGRESS: at 61.71% examples, 3238311 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:04:55,973 : INFO : EPOCH 1 - PROGRESS: at 93.98% examples, 3269473 words/s, in_qsize 30, out_qsize 1
2025-08-19 18:04:56,141 : INFO : EPOCH 1: training on 11221118 raw words (10418014 effective words) took 3.2s, 3280848 effective words/s
2025-08-19 18:04:57,154 : INFO : EPO

Model saved to models/word2vec/interim/conservative_2021_2024_interim.model
Processing processed_comments/conservative\conservative_batch14.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch14.pkl
Processing chunk of 1000000 comments for 2021_2024
Building bigram model for 2021_2024 with 1000000 comments...


2025-08-19 18:05:11,930 : INFO : collecting all words and their counts
2025-08-19 18:05:11,930 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 18:05:11,975 : INFO : PROGRESS: at sentence #10000, processed 99098 words and 83086 word types
2025-08-19 18:05:12,025 : INFO : PROGRESS: at sentence #20000, processed 213308 words and 160041 word types
2025-08-19 18:05:12,074 : INFO : PROGRESS: at sentence #30000, processed 327466 words and 223728 word types


Creating new bigram model for 2021_2024 (cannot update Phraser objects)


2025-08-19 18:05:12,118 : INFO : PROGRESS: at sentence #40000, processed 436030 words and 281044 word types
2025-08-19 18:05:12,169 : INFO : PROGRESS: at sentence #50000, processed 545277 words and 337147 word types
2025-08-19 18:05:12,221 : INFO : PROGRESS: at sentence #60000, processed 655917 words and 390419 word types
2025-08-19 18:05:12,269 : INFO : PROGRESS: at sentence #70000, processed 768096 words and 442998 word types
2025-08-19 18:05:12,321 : INFO : PROGRESS: at sentence #80000, processed 882719 words and 496857 word types
2025-08-19 18:05:12,368 : INFO : PROGRESS: at sentence #90000, processed 992752 words and 548397 word types
2025-08-19 18:05:12,422 : INFO : PROGRESS: at sentence #100000, processed 1102527 words and 598729 word types
2025-08-19 18:05:12,472 : INFO : PROGRESS: at sentence #110000, processed 1211300 words and 646994 word types
2025-08-19 18:05:12,525 : INFO : PROGRESS: at sentence #120000, processed 1326780 words and 697518 word types
2025-08-19 18:05:12,57

Updating existing Word2Vec model for 2021_2024


2025-08-19 18:05:24,768 : INFO : PROGRESS: at sentence #260000, processed 2669462 words, keeping 43169 word types
2025-08-19 18:05:24,775 : INFO : PROGRESS: at sentence #270000, processed 2772746 words, keeping 43930 word types
2025-08-19 18:05:24,783 : INFO : PROGRESS: at sentence #280000, processed 2878501 words, keeping 44782 word types
2025-08-19 18:05:24,790 : INFO : PROGRESS: at sentence #290000, processed 2986113 words, keeping 45508 word types
2025-08-19 18:05:24,797 : INFO : PROGRESS: at sentence #300000, processed 3085310 words, keeping 46259 word types
2025-08-19 18:05:24,808 : INFO : PROGRESS: at sentence #310000, processed 3181929 words, keeping 46951 word types
2025-08-19 18:05:24,820 : INFO : PROGRESS: at sentence #320000, processed 3281308 words, keeping 47672 word types
2025-08-19 18:05:24,828 : INFO : PROGRESS: at sentence #330000, processed 3387690 words, keeping 48395 word types
2025-08-19 18:05:24,835 : INFO : PROGRESS: at sentence #340000, processed 3480281 words,

Training model on 1000000 comments


2025-08-19 18:05:26,505 : INFO : EPOCH 0 - PROGRESS: at 32.02% examples, 3013397 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:05:27,507 : INFO : EPOCH 0 - PROGRESS: at 67.33% examples, 3147786 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:05:28,344 : INFO : EPOCH 0: training on 9816022 raw words (9004848 effective words) took 2.8s, 3168878 effective words/s
2025-08-19 18:05:29,356 : INFO : EPOCH 1 - PROGRESS: at 33.83% examples, 3186621 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:05:30,356 : INFO : EPOCH 1 - PROGRESS: at 69.13% examples, 3246047 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:05:31,099 : INFO : EPOCH 1: training on 9816022 raw words (9004138 effective words) took 2.7s, 3282456 effective words/s
2025-08-19 18:05:32,119 : INFO : EPOCH 2 - PROGRESS: at 32.02% examples, 3015219 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:05:33,121 : INFO : EPOCH 2 - PROGRESS: at 67.86% examples, 3176737 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:05:33,931 : INFO : EPOCH 2

Model saved to models/word2vec/interim/conservative_2021_2024_interim.model
Processing processed_comments/conservative\conservative_batch15.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch15.pkl
Processing chunk of 1000000 comments for 2021_2024
Building bigram model for 2021_2024 with 1000000 comments...


2025-08-19 18:05:45,778 : INFO : collecting all words and their counts
2025-08-19 18:05:45,779 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 18:05:45,814 : INFO : PROGRESS: at sentence #10000, processed 89825 words and 75557 word types
2025-08-19 18:05:45,847 : INFO : PROGRESS: at sentence #20000, processed 180229 words and 137711 word types
2025-08-19 18:05:45,884 : INFO : PROGRESS: at sentence #30000, processed 262016 words and 188707 word types
2025-08-19 18:05:45,919 : INFO : PROGRESS: at sentence #40000, processed 351835 words and 238946 word types
2025-08-19 18:05:45,951 : INFO : PROGRESS: at sentence #50000, processed 438470 words and 284254 word types


Creating new bigram model for 2021_2024 (cannot update Phraser objects)


2025-08-19 18:05:45,987 : INFO : PROGRESS: at sentence #60000, processed 525554 words and 330044 word types
2025-08-19 18:05:46,029 : INFO : PROGRESS: at sentence #70000, processed 607672 words and 371894 word types
2025-08-19 18:05:46,069 : INFO : PROGRESS: at sentence #80000, processed 700906 words and 419112 word types
2025-08-19 18:05:46,105 : INFO : PROGRESS: at sentence #90000, processed 785462 words and 461351 word types
2025-08-19 18:05:46,138 : INFO : PROGRESS: at sentence #100000, processed 866281 words and 499538 word types
2025-08-19 18:05:46,165 : INFO : PROGRESS: at sentence #110000, processed 930290 words and 527258 word types
2025-08-19 18:05:46,195 : INFO : PROGRESS: at sentence #120000, processed 1004045 words and 560374 word types
2025-08-19 18:05:46,248 : INFO : PROGRESS: at sentence #130000, processed 1106520 words and 607878 word types
2025-08-19 18:05:46,293 : INFO : PROGRESS: at sentence #140000, processed 1206471 words and 654135 word types
2025-08-19 18:05:46,

Updating existing Word2Vec model for 2021_2024


2025-08-19 18:05:55,128 : INFO : PROGRESS: at sentence #280000, processed 2510329 words, keeping 43850 word types
2025-08-19 18:05:55,137 : INFO : PROGRESS: at sentence #290000, processed 2618346 words, keeping 44899 word types
2025-08-19 18:05:55,144 : INFO : PROGRESS: at sentence #300000, processed 2721470 words, keeping 45709 word types
2025-08-19 18:05:55,151 : INFO : PROGRESS: at sentence #310000, processed 2822297 words, keeping 46605 word types
2025-08-19 18:05:55,158 : INFO : PROGRESS: at sentence #320000, processed 2915341 words, keeping 47391 word types
2025-08-19 18:05:55,166 : INFO : PROGRESS: at sentence #330000, processed 3028206 words, keeping 48118 word types
2025-08-19 18:05:55,173 : INFO : PROGRESS: at sentence #340000, processed 3131791 words, keeping 48664 word types
2025-08-19 18:05:55,180 : INFO : PROGRESS: at sentence #350000, processed 3229253 words, keeping 49295 word types
2025-08-19 18:05:55,187 : INFO : PROGRESS: at sentence #360000, processed 3334520 words,

Training model on 1000000 comments


2025-08-19 18:05:56,826 : INFO : EPOCH 0 - PROGRESS: at 35.45% examples, 2977743 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:05:57,826 : INFO : EPOCH 0 - PROGRESS: at 73.56% examples, 3092835 words/s, in_qsize 30, out_qsize 1
2025-08-19 18:05:58,468 : INFO : EPOCH 0: training on 9181667 raw words (8357908 effective words) took 2.6s, 3160282 effective words/s
2025-08-19 18:05:59,482 : INFO : EPOCH 1 - PROGRESS: at 37.15% examples, 3149188 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:06:00,483 : INFO : EPOCH 1 - PROGRESS: at 75.46% examples, 3176008 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:06:01,106 : INFO : EPOCH 1: training on 9181667 raw words (8356323 effective words) took 2.6s, 3184289 effective words/s
2025-08-19 18:06:02,121 : INFO : EPOCH 2 - PROGRESS: at 36.96% examples, 3129719 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:06:03,122 : INFO : EPOCH 2 - PROGRESS: at 76.29% examples, 3207963 words/s, in_qsize 32, out_qsize 0
2025-08-19 18:06:03,699 : INFO : EPOCH 2

Model saved to models/word2vec/interim/conservative_2021_2024_interim.model
Processing processed_comments/conservative\conservative_batch16.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch16.pkl
Processing chunk of 1000000 comments for 2021_2024
Building bigram model for 2021_2024 with 1000000 comments...


2025-08-19 18:06:15,099 : INFO : collecting all words and their counts
2025-08-19 18:06:15,099 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 18:06:15,138 : INFO : PROGRESS: at sentence #10000, processed 99242 words and 80474 word types
2025-08-19 18:06:15,179 : INFO : PROGRESS: at sentence #20000, processed 205526 words and 154225 word types
2025-08-19 18:06:15,218 : INFO : PROGRESS: at sentence #30000, processed 305974 words and 215494 word types
2025-08-19 18:06:15,256 : INFO : PROGRESS: at sentence #40000, processed 406281 words and 272053 word types


Creating new bigram model for 2021_2024 (cannot update Phraser objects)


2025-08-19 18:06:15,306 : INFO : PROGRESS: at sentence #50000, processed 528286 words and 340157 word types
2025-08-19 18:06:15,355 : INFO : PROGRESS: at sentence #60000, processed 629725 words and 394415 word types
2025-08-19 18:06:15,395 : INFO : PROGRESS: at sentence #70000, processed 723244 words and 442476 word types
2025-08-19 18:06:15,456 : INFO : PROGRESS: at sentence #80000, processed 838377 words and 499731 word types
2025-08-19 18:06:15,506 : INFO : PROGRESS: at sentence #90000, processed 946465 words and 551815 word types
2025-08-19 18:06:15,554 : INFO : PROGRESS: at sentence #100000, processed 1055928 words and 603553 word types
2025-08-19 18:06:15,609 : INFO : PROGRESS: at sentence #110000, processed 1169587 words and 656313 word types
2025-08-19 18:06:15,651 : INFO : PROGRESS: at sentence #120000, processed 1265244 words and 698763 word types
2025-08-19 18:06:15,715 : INFO : PROGRESS: at sentence #130000, processed 1387381 words and 753143 word types
2025-08-19 18:06:15,

Updating existing Word2Vec model for 2021_2024


2025-08-19 18:06:27,769 : INFO : PROGRESS: at sentence #260000, processed 2742145 words, keeping 44933 word types
2025-08-19 18:06:27,776 : INFO : PROGRESS: at sentence #270000, processed 2856226 words, keeping 45942 word types
2025-08-19 18:06:27,783 : INFO : PROGRESS: at sentence #280000, processed 2962236 words, keeping 46645 word types
2025-08-19 18:06:27,790 : INFO : PROGRESS: at sentence #290000, processed 3057699 words, keeping 47334 word types
2025-08-19 18:06:27,799 : INFO : PROGRESS: at sentence #300000, processed 3162604 words, keeping 48111 word types
2025-08-19 18:06:27,807 : INFO : PROGRESS: at sentence #310000, processed 3272639 words, keeping 48771 word types
2025-08-19 18:06:27,813 : INFO : PROGRESS: at sentence #320000, processed 3359951 words, keeping 49323 word types
2025-08-19 18:06:27,820 : INFO : PROGRESS: at sentence #330000, processed 3458842 words, keeping 49901 word types
2025-08-19 18:06:27,827 : INFO : PROGRESS: at sentence #340000, processed 3562168 words,

Training model on 1000000 comments


2025-08-19 18:06:29,528 : INFO : EPOCH 0 - PROGRESS: at 30.75% examples, 2991943 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:06:30,533 : INFO : EPOCH 0 - PROGRESS: at 66.98% examples, 3086156 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:06:31,519 : INFO : EPOCH 0: training on 10088198 raw words (9271668 effective words) took 3.0s, 3099716 effective words/s
2025-08-19 18:06:32,534 : INFO : EPOCH 1 - PROGRESS: at 31.94% examples, 3090803 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:06:33,538 : INFO : EPOCH 1 - PROGRESS: at 67.48% examples, 3107266 words/s, in_qsize 32, out_qsize 0
2025-08-19 18:06:34,450 : INFO : EPOCH 1: training on 10088198 raw words (9271542 effective words) took 2.9s, 3178203 effective words/s
2025-08-19 18:06:35,463 : INFO : EPOCH 2 - PROGRESS: at 31.83% examples, 3083726 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:06:36,468 : INFO : EPOCH 2 - PROGRESS: at 69.31% examples, 3181079 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:06:37,357 : INFO : EPOCH

Model saved to models/word2vec/interim/conservative_2021_2024_interim.model
Processing processed_comments/conservative\conservative_batch17.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch17.pkl
Processing chunk of 1000000 comments for 2021_2024
Building bigram model for 2021_2024 with 1000000 comments...


2025-08-19 18:06:49,798 : INFO : collecting all words and their counts
2025-08-19 18:06:49,799 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 18:06:49,849 : INFO : PROGRESS: at sentence #10000, processed 106258 words and 88493 word types
2025-08-19 18:06:49,900 : INFO : PROGRESS: at sentence #20000, processed 218017 words and 161486 word types
2025-08-19 18:06:49,945 : INFO : PROGRESS: at sentence #30000, processed 318158 words and 221339 word types
2025-08-19 18:06:49,990 : INFO : PROGRESS: at sentence #40000, processed 426166 words and 286225 word types


Creating new bigram model for 2021_2024 (cannot update Phraser objects)


2025-08-19 18:06:50,037 : INFO : PROGRESS: at sentence #50000, processed 529413 words and 343630 word types
2025-08-19 18:06:50,091 : INFO : PROGRESS: at sentence #60000, processed 644519 words and 404968 word types
2025-08-19 18:06:50,140 : INFO : PROGRESS: at sentence #70000, processed 757425 words and 462580 word types
2025-08-19 18:06:50,188 : INFO : PROGRESS: at sentence #80000, processed 867997 words and 519954 word types
2025-08-19 18:06:50,249 : INFO : PROGRESS: at sentence #90000, processed 980752 words and 574992 word types
2025-08-19 18:06:50,309 : INFO : PROGRESS: at sentence #100000, processed 1101729 words and 630764 word types
2025-08-19 18:06:50,366 : INFO : PROGRESS: at sentence #110000, processed 1217619 words and 682883 word types
2025-08-19 18:06:50,432 : INFO : PROGRESS: at sentence #120000, processed 1328907 words and 732782 word types
2025-08-19 18:06:50,480 : INFO : PROGRESS: at sentence #130000, processed 1438121 words and 781184 word types
2025-08-19 18:06:50,

Updating existing Word2Vec model for 2021_2024


2025-08-19 18:07:00,176 : INFO : PROGRESS: at sentence #220000, processed 2298966 words, keeping 42605 word types
2025-08-19 18:07:00,183 : INFO : PROGRESS: at sentence #230000, processed 2390944 words, keeping 43318 word types
2025-08-19 18:07:00,194 : INFO : PROGRESS: at sentence #240000, processed 2494708 words, keeping 44145 word types
2025-08-19 18:07:00,204 : INFO : PROGRESS: at sentence #250000, processed 2578048 words, keeping 44783 word types
2025-08-19 18:07:00,211 : INFO : PROGRESS: at sentence #260000, processed 2674592 words, keeping 45390 word types
2025-08-19 18:07:00,221 : INFO : PROGRESS: at sentence #270000, processed 2760809 words, keeping 45970 word types
2025-08-19 18:07:00,229 : INFO : PROGRESS: at sentence #280000, processed 2870711 words, keeping 46673 word types
2025-08-19 18:07:00,239 : INFO : PROGRESS: at sentence #290000, processed 2988070 words, keeping 47398 word types
2025-08-19 18:07:00,247 : INFO : PROGRESS: at sentence #300000, processed 3081299 words,

Training model on 1000000 comments


2025-08-19 18:07:02,031 : INFO : EPOCH 0 - PROGRESS: at 32.88% examples, 3037528 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:07:03,032 : INFO : EPOCH 0 - PROGRESS: at 71.25% examples, 3178805 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:07:03,729 : INFO : EPOCH 0: training on 9455670 raw words (8606465 effective words) took 2.7s, 3185684 effective words/s
2025-08-19 18:07:04,745 : INFO : EPOCH 1 - PROGRESS: at 33.51% examples, 3102629 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:07:05,747 : INFO : EPOCH 1 - PROGRESS: at 70.71% examples, 3154369 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:07:06,471 : INFO : EPOCH 1: training on 9455670 raw words (8605625 effective words) took 2.7s, 3153817 effective words/s
2025-08-19 18:07:07,492 : INFO : EPOCH 2 - PROGRESS: at 32.79% examples, 3013852 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:07:08,492 : INFO : EPOCH 2 - PROGRESS: at 71.03% examples, 3162589 words/s, in_qsize 32, out_qsize 0
2025-08-19 18:07:09,195 : INFO : EPOCH 2

Model saved to models/word2vec/interim/conservative_2021_2024_interim.model
Processing processed_comments/conservative\conservative_batch18.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch18.pkl


2025-08-19 18:07:21,139 : INFO : collecting all words and their counts
2025-08-19 18:07:21,139 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 18:07:21,177 : INFO : PROGRESS: at sentence #10000, processed 76632 words and 63612 word types
2025-08-19 18:07:21,210 : INFO : PROGRESS: at sentence #20000, processed 152718 words and 114934 word types


Processing chunk of 1000000 comments for 2021_2024
Building bigram model for 2021_2024 with 1000000 comments...
Creating new bigram model for 2021_2024 (cannot update Phraser objects)


2025-08-19 18:07:21,244 : INFO : PROGRESS: at sentence #30000, processed 230717 words and 163995 word types
2025-08-19 18:07:21,284 : INFO : PROGRESS: at sentence #40000, processed 310952 words and 211625 word types
2025-08-19 18:07:21,324 : INFO : PROGRESS: at sentence #50000, processed 408708 words and 267608 word types
2025-08-19 18:07:21,355 : INFO : PROGRESS: at sentence #60000, processed 480565 words and 304368 word types
2025-08-19 18:07:21,410 : INFO : PROGRESS: at sentence #70000, processed 575021 words and 352288 word types
2025-08-19 18:07:21,453 : INFO : PROGRESS: at sentence #80000, processed 663139 words and 395748 word types
2025-08-19 18:07:21,484 : INFO : PROGRESS: at sentence #90000, processed 730863 words and 427068 word types
2025-08-19 18:07:21,519 : INFO : PROGRESS: at sentence #100000, processed 808290 words and 463166 word types
2025-08-19 18:07:21,552 : INFO : PROGRESS: at sentence #110000, processed 878997 words and 495456 word types
2025-08-19 18:07:21,576 : 

Updating existing Word2Vec model for 2021_2024


2025-08-19 18:07:31,843 : INFO : PROGRESS: at sentence #310000, processed 2341711 words, keeping 40693 word types
2025-08-19 18:07:31,850 : INFO : PROGRESS: at sentence #320000, processed 2405742 words, keeping 41150 word types
2025-08-19 18:07:31,858 : INFO : PROGRESS: at sentence #330000, processed 2485914 words, keeping 41695 word types
2025-08-19 18:07:31,866 : INFO : PROGRESS: at sentence #340000, processed 2570450 words, keeping 42362 word types
2025-08-19 18:07:31,874 : INFO : PROGRESS: at sentence #350000, processed 2660965 words, keeping 43027 word types
2025-08-19 18:07:31,881 : INFO : PROGRESS: at sentence #360000, processed 2736195 words, keeping 43564 word types
2025-08-19 18:07:31,887 : INFO : PROGRESS: at sentence #370000, processed 2802562 words, keeping 44028 word types
2025-08-19 18:07:31,893 : INFO : PROGRESS: at sentence #380000, processed 2874066 words, keeping 44512 word types
2025-08-19 18:07:31,899 : INFO : PROGRESS: at sentence #390000, processed 2951119 words,

Training model on 1000000 comments


2025-08-19 18:07:33,543 : INFO : EPOCH 0 - PROGRESS: at 45.43% examples, 3009816 words/s, in_qsize 32, out_qsize 0
2025-08-19 18:07:34,548 : INFO : EPOCH 0 - PROGRESS: at 88.13% examples, 3016688 words/s, in_qsize 32, out_qsize 0
2025-08-19 18:07:34,775 : INFO : EPOCH 0: training on 7673566 raw words (6796188 effective words) took 2.2s, 3040872 effective words/s
2025-08-19 18:07:35,798 : INFO : EPOCH 1 - PROGRESS: at 40.65% examples, 2697563 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:07:36,801 : INFO : EPOCH 1 - PROGRESS: at 86.60% examples, 2938259 words/s, in_qsize 32, out_qsize 0
2025-08-19 18:07:37,074 : INFO : EPOCH 1: training on 7673566 raw words (6795280 effective words) took 2.3s, 2974587 effective words/s
2025-08-19 18:07:38,094 : INFO : EPOCH 2 - PROGRESS: at 45.71% examples, 3032060 words/s, in_qsize 32, out_qsize 0
2025-08-19 18:07:39,099 : INFO : EPOCH 2 - PROGRESS: at 90.54% examples, 3097120 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:07:39,284 : INFO : EPOCH 2

Model saved to models/word2vec/interim/conservative_2021_2024_interim.model
Processing processed_comments/conservative\conservative_batch19.pkl
Loaded 788076 comments from processed_comments/conservative\conservative_batch19.pkl
Processing processed_comments/conservative\conservative_batch2.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch2.pkl
Processing processed_comments/conservative\conservative_batch3.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch3.pkl


2025-08-19 18:08:14,560 : INFO : collecting all words and their counts
2025-08-19 18:08:14,561 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 18:08:14,647 : INFO : PROGRESS: at sentence #10000, processed 201505 words and 152955 word types


Processing chunk of 1000000 comments for 2017_2020
Building bigram model for 2017_2020 with 1000000 comments...
Creating new bigram model for 2017_2020 (cannot update Phraser objects)


2025-08-19 18:08:14,739 : INFO : PROGRESS: at sentence #20000, processed 401809 words and 274149 word types
2025-08-19 18:08:14,819 : INFO : PROGRESS: at sentence #30000, processed 575781 words and 369812 word types
2025-08-19 18:08:14,902 : INFO : PROGRESS: at sentence #40000, processed 767536 words and 468505 word types
2025-08-19 18:08:14,984 : INFO : PROGRESS: at sentence #50000, processed 954735 words and 555429 word types
2025-08-19 18:08:15,065 : INFO : PROGRESS: at sentence #60000, processed 1138895 words and 640567 word types
2025-08-19 18:08:15,163 : INFO : PROGRESS: at sentence #70000, processed 1319531 words and 721260 word types
2025-08-19 18:08:15,258 : INFO : PROGRESS: at sentence #80000, processed 1529186 words and 808736 word types
2025-08-19 18:08:15,347 : INFO : PROGRESS: at sentence #90000, processed 1721057 words and 887188 word types
2025-08-19 18:08:15,438 : INFO : PROGRESS: at sentence #100000, processed 1921934 words and 967420 word types
2025-08-19 18:08:15,53

Updating existing Word2Vec model for 2017_2020


2025-08-19 18:08:33,455 : INFO : PROGRESS: at sentence #160000, processed 3088806 words, keeping 43188 word types
2025-08-19 18:08:33,470 : INFO : PROGRESS: at sentence #170000, processed 3282986 words, keeping 44452 word types
2025-08-19 18:08:33,486 : INFO : PROGRESS: at sentence #180000, processed 3482757 words, keeping 45722 word types
2025-08-19 18:08:33,500 : INFO : PROGRESS: at sentence #190000, processed 3680433 words, keeping 47034 word types
2025-08-19 18:08:33,518 : INFO : PROGRESS: at sentence #200000, processed 3869088 words, keeping 48075 word types
2025-08-19 18:08:33,532 : INFO : PROGRESS: at sentence #210000, processed 4075295 words, keeping 49226 word types
2025-08-19 18:08:33,545 : INFO : PROGRESS: at sentence #220000, processed 4268131 words, keeping 50270 word types
2025-08-19 18:08:33,561 : INFO : PROGRESS: at sentence #230000, processed 4453521 words, keeping 51092 word types
2025-08-19 18:08:33,575 : INFO : PROGRESS: at sentence #240000, processed 4643776 words,

Training model on 1000000 comments


2025-08-19 18:08:35,884 : INFO : EPOCH 0 - PROGRESS: at 17.07% examples, 3081130 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:08:36,884 : INFO : EPOCH 0 - PROGRESS: at 34.87% examples, 3127389 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:08:37,889 : INFO : EPOCH 0 - PROGRESS: at 53.98% examples, 3151211 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:08:38,889 : INFO : EPOCH 0 - PROGRESS: at 74.19% examples, 3168577 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:08:39,897 : INFO : EPOCH 0 - PROGRESS: at 95.76% examples, 3192307 words/s, in_qsize 32, out_qsize 0
2025-08-19 18:08:40,083 : INFO : EPOCH 0: training on 17775789 raw words (16650727 effective words) took 5.2s, 3200733 effective words/s
2025-08-19 18:08:41,092 : INFO : EPOCH 1 - PROGRESS: at 16.83% examples, 3044601 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:08:42,092 : INFO : EPOCH 1 - PROGRESS: at 34.31% examples, 3080736 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:08:43,093 : INFO : EPOCH 1 - PROGRESS: at 52

Model saved to models/word2vec/interim/conservative_2017_2020_interim.model
Processing processed_comments/conservative\conservative_batch4.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch4.pkl


2025-08-19 18:09:08,851 : INFO : collecting all words and their counts
2025-08-19 18:09:08,852 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 18:09:08,914 : INFO : PROGRESS: at sentence #10000, processed 156909 words and 122646 word types


Processing chunk of 1000000 comments for 2017_2020
Building bigram model for 2017_2020 with 1000000 comments...
Creating new bigram model for 2017_2020 (cannot update Phraser objects)


2025-08-19 18:09:08,990 : INFO : PROGRESS: at sentence #20000, processed 323460 words and 230000 word types
2025-08-19 18:09:09,066 : INFO : PROGRESS: at sentence #30000, processed 488103 words and 325272 word types
2025-08-19 18:09:09,148 : INFO : PROGRESS: at sentence #40000, processed 667088 words and 420799 word types
2025-08-19 18:09:09,215 : INFO : PROGRESS: at sentence #50000, processed 821513 words and 497902 word types
2025-08-19 18:09:09,285 : INFO : PROGRESS: at sentence #60000, processed 976867 words and 573735 word types
2025-08-19 18:09:09,355 : INFO : PROGRESS: at sentence #70000, processed 1128213 words and 645105 word types
2025-08-19 18:09:09,444 : INFO : PROGRESS: at sentence #80000, processed 1291118 words and 719808 word types
2025-08-19 18:09:09,510 : INFO : PROGRESS: at sentence #90000, processed 1432652 words and 781951 word types
2025-08-19 18:09:09,581 : INFO : PROGRESS: at sentence #100000, processed 1586680 words and 850327 word types
2025-08-19 18:09:09,655

Updating existing Word2Vec model for 2017_2020


2025-08-19 18:09:23,856 : INFO : PROGRESS: at sentence #180000, processed 2703121 words, keeping 44553 word types
2025-08-19 18:09:23,865 : INFO : PROGRESS: at sentence #190000, processed 2833144 words, keeping 45568 word types
2025-08-19 18:09:23,873 : INFO : PROGRESS: at sentence #200000, processed 2959302 words, keeping 46515 word types
2025-08-19 18:09:23,887 : INFO : PROGRESS: at sentence #210000, processed 3109318 words, keeping 47703 word types
2025-08-19 18:09:23,897 : INFO : PROGRESS: at sentence #220000, processed 3253522 words, keeping 48799 word types
2025-08-19 18:09:23,907 : INFO : PROGRESS: at sentence #230000, processed 3396502 words, keeping 49871 word types
2025-08-19 18:09:23,918 : INFO : PROGRESS: at sentence #240000, processed 3536078 words, keeping 50786 word types
2025-08-19 18:09:23,929 : INFO : PROGRESS: at sentence #250000, processed 3673524 words, keeping 51675 word types
2025-08-19 18:09:23,940 : INFO : PROGRESS: at sentence #260000, processed 3831959 words,

Training model on 1000000 comments


2025-08-19 18:09:26,040 : INFO : EPOCH 0 - PROGRESS: at 22.60% examples, 3116992 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:09:27,041 : INFO : EPOCH 0 - PROGRESS: at 48.96% examples, 3198727 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:09:28,048 : INFO : EPOCH 0 - PROGRESS: at 74.37% examples, 3228035 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:09:29,025 : INFO : EPOCH 0: training on 13915731 raw words (12981682 effective words) took 4.0s, 3256194 effective words/s
2025-08-19 18:09:30,040 : INFO : EPOCH 1 - PROGRESS: at 22.54% examples, 3096590 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:09:31,040 : INFO : EPOCH 1 - PROGRESS: at 49.14% examples, 3203107 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:09:32,041 : INFO : EPOCH 1 - PROGRESS: at 73.94% examples, 3211723 words/s, in_qsize 30, out_qsize 1
2025-08-19 18:09:33,045 : INFO : EPOCH 1 - PROGRESS: at 99.20% examples, 3212161 words/s, in_qsize 11, out_qsize 1
2025-08-19 18:09:33,065 : INFO : EPOCH 1: training on 1391

Model saved to models/word2vec/interim/conservative_2017_2020_interim.model
Processing processed_comments/conservative\conservative_batch5.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch5.pkl
Processing chunk of 1000000 comments for 2017_2020
Building bigram model for 2017_2020 with 1000000 comments...


2025-08-19 18:09:52,747 : INFO : collecting all words and their counts
2025-08-19 18:09:52,748 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 18:09:52,804 : INFO : PROGRESS: at sentence #10000, processed 128480 words and 103650 word types
2025-08-19 18:09:52,861 : INFO : PROGRESS: at sentence #20000, processed 258448 words and 187299 word types
2025-08-19 18:09:52,918 : INFO : PROGRESS: at sentence #30000, processed 399648 words and 272040 word types


Creating new bigram model for 2017_2020 (cannot update Phraser objects)


2025-08-19 18:09:52,973 : INFO : PROGRESS: at sentence #40000, processed 526177 words and 342843 word types
2025-08-19 18:09:53,033 : INFO : PROGRESS: at sentence #50000, processed 658854 words and 414263 word types
2025-08-19 18:09:53,093 : INFO : PROGRESS: at sentence #60000, processed 793503 words and 484128 word types
2025-08-19 18:09:53,153 : INFO : PROGRESS: at sentence #70000, processed 930753 words and 553308 word types
2025-08-19 18:09:53,216 : INFO : PROGRESS: at sentence #80000, processed 1073208 words and 623268 word types
2025-08-19 18:09:53,286 : INFO : PROGRESS: at sentence #90000, processed 1228654 words and 693172 word types
2025-08-19 18:09:53,363 : INFO : PROGRESS: at sentence #100000, processed 1370444 words and 758533 word types
2025-08-19 18:09:53,423 : INFO : PROGRESS: at sentence #110000, processed 1505608 words and 817965 word types
2025-08-19 18:09:53,481 : INFO : PROGRESS: at sentence #120000, processed 1630139 words and 870464 word types
2025-08-19 18:09:53,

Updating existing Word2Vec model for 2017_2020


2025-08-19 18:10:08,133 : INFO : PROGRESS: at sentence #170000, processed 2323598 words, keeping 42803 word types
2025-08-19 18:10:08,147 : INFO : PROGRESS: at sentence #180000, processed 2476599 words, keeping 43971 word types
2025-08-19 18:10:08,160 : INFO : PROGRESS: at sentence #190000, processed 2620394 words, keeping 45155 word types
2025-08-19 18:10:08,175 : INFO : PROGRESS: at sentence #200000, processed 2779784 words, keeping 46300 word types
2025-08-19 18:10:08,187 : INFO : PROGRESS: at sentence #210000, processed 2923977 words, keeping 47313 word types
2025-08-19 18:10:08,200 : INFO : PROGRESS: at sentence #220000, processed 3067561 words, keeping 48357 word types
2025-08-19 18:10:08,211 : INFO : PROGRESS: at sentence #230000, processed 3200535 words, keeping 49266 word types
2025-08-19 18:10:08,221 : INFO : PROGRESS: at sentence #240000, processed 3339171 words, keeping 50184 word types
2025-08-19 18:10:08,231 : INFO : PROGRESS: at sentence #250000, processed 3469104 words,

Training model on 1000000 comments


2025-08-19 18:10:10,420 : INFO : EPOCH 0 - PROGRESS: at 23.66% examples, 3056901 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:10:11,422 : INFO : EPOCH 0 - PROGRESS: at 49.29% examples, 3189226 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:10:12,424 : INFO : EPOCH 0 - PROGRESS: at 73.08% examples, 3231419 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:10:13,424 : INFO : EPOCH 0 - PROGRESS: at 95.72% examples, 3250877 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:10:13,591 : INFO : EPOCH 0: training on 14594798 raw words (13608994 effective words) took 4.2s, 3259789 effective words/s
2025-08-19 18:10:14,605 : INFO : EPOCH 1 - PROGRESS: at 24.37% examples, 3147609 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:10:15,605 : INFO : EPOCH 1 - PROGRESS: at 49.43% examples, 3201348 words/s, in_qsize 30, out_qsize 1
2025-08-19 18:10:16,606 : INFO : EPOCH 1 - PROGRESS: at 73.27% examples, 3243155 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:10:17,606 : INFO : EPOCH 1 - PROGRESS: at 95

Model saved to models/word2vec/interim/conservative_2017_2020_interim.model
Processing processed_comments/conservative\conservative_batch6.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch6.pkl
Processing chunk of 1000000 comments for 2017_2020
Building bigram model for 2017_2020 with 1000000 comments...


2025-08-19 18:10:38,238 : INFO : collecting all words and their counts
2025-08-19 18:10:38,239 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 18:10:38,302 : INFO : PROGRESS: at sentence #10000, processed 150595 words and 119991 word types
2025-08-19 18:10:38,371 : INFO : PROGRESS: at sentence #20000, processed 309712 words and 220721 word types


Creating new bigram model for 2017_2020 (cannot update Phraser objects)


2025-08-19 18:10:38,439 : INFO : PROGRESS: at sentence #30000, processed 466494 words and 309619 word types
2025-08-19 18:10:38,519 : INFO : PROGRESS: at sentence #40000, processed 634600 words and 398801 word types
2025-08-19 18:10:38,584 : INFO : PROGRESS: at sentence #50000, processed 788436 words and 475509 word types
2025-08-19 18:10:38,654 : INFO : PROGRESS: at sentence #60000, processed 955552 words and 553824 word types
2025-08-19 18:10:38,723 : INFO : PROGRESS: at sentence #70000, processed 1109870 words and 623871 word types
2025-08-19 18:10:38,797 : INFO : PROGRESS: at sentence #80000, processed 1270473 words and 694741 word types
2025-08-19 18:10:38,882 : INFO : PROGRESS: at sentence #90000, processed 1429524 words and 761075 word types
2025-08-19 18:10:38,953 : INFO : PROGRESS: at sentence #100000, processed 1582842 words and 823683 word types
2025-08-19 18:10:39,036 : INFO : PROGRESS: at sentence #110000, processed 1758508 words and 892621 word types
2025-08-19 18:10:39,1

Updating existing Word2Vec model for 2017_2020


2025-08-19 18:10:53,750 : INFO : PROGRESS: at sentence #180000, processed 2883884 words, keeping 43690 word types
2025-08-19 18:10:53,760 : INFO : PROGRESS: at sentence #190000, processed 3044073 words, keeping 44580 word types
2025-08-19 18:10:53,770 : INFO : PROGRESS: at sentence #200000, processed 3193577 words, keeping 45329 word types
2025-08-19 18:10:53,782 : INFO : PROGRESS: at sentence #210000, processed 3355616 words, keeping 46214 word types
2025-08-19 18:10:53,792 : INFO : PROGRESS: at sentence #220000, processed 3492480 words, keeping 46958 word types
2025-08-19 18:10:53,803 : INFO : PROGRESS: at sentence #230000, processed 3627310 words, keeping 47863 word types
2025-08-19 18:10:53,815 : INFO : PROGRESS: at sentence #240000, processed 3776024 words, keeping 48788 word types
2025-08-19 18:10:53,827 : INFO : PROGRESS: at sentence #250000, processed 3923494 words, keeping 49541 word types
2025-08-19 18:10:53,837 : INFO : PROGRESS: at sentence #260000, processed 4075775 words,

Training model on 1000000 comments


2025-08-19 18:10:55,987 : INFO : EPOCH 0 - PROGRESS: at 20.43% examples, 3042491 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:10:56,996 : INFO : EPOCH 0 - PROGRESS: at 44.32% examples, 3170350 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:10:57,997 : INFO : EPOCH 0 - PROGRESS: at 69.27% examples, 3213063 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:10:58,998 : INFO : EPOCH 0 - PROGRESS: at 92.75% examples, 3217441 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:10:59,297 : INFO : EPOCH 0: training on 14865629 raw words (13838329 effective words) took 4.3s, 3209907 effective words/s
2025-08-19 18:11:00,313 : INFO : EPOCH 1 - PROGRESS: at 21.25% examples, 3160758 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:11:01,314 : INFO : EPOCH 1 - PROGRESS: at 44.72% examples, 3209515 words/s, in_qsize 32, out_qsize 1
2025-08-19 18:11:02,317 : INFO : EPOCH 1 - PROGRESS: at 69.63% examples, 3236506 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:11:03,318 : INFO : EPOCH 1 - PROGRESS: at 93

Model saved to models/word2vec/interim/conservative_2017_2020_interim.model
Processing processed_comments/conservative\conservative_batch7.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch7.pkl


2025-08-19 18:11:24,077 : INFO : collecting all words and their counts
2025-08-19 18:11:24,077 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 18:11:24,136 : INFO : PROGRESS: at sentence #10000, processed 143231 words and 115598 word types


Processing chunk of 1000000 comments for 2017_2020
Building bigram model for 2017_2020 with 1000000 comments...
Creating new bigram model for 2017_2020 (cannot update Phraser objects)


2025-08-19 18:11:24,201 : INFO : PROGRESS: at sentence #20000, processed 295451 words and 213998 word types
2025-08-19 18:11:24,276 : INFO : PROGRESS: at sentence #30000, processed 464836 words and 311445 word types
2025-08-19 18:11:24,347 : INFO : PROGRESS: at sentence #40000, processed 623116 words and 392221 word types
2025-08-19 18:11:24,403 : INFO : PROGRESS: at sentence #50000, processed 752463 words and 455163 word types
2025-08-19 18:11:24,467 : INFO : PROGRESS: at sentence #60000, processed 897710 words and 524267 word types
2025-08-19 18:11:24,528 : INFO : PROGRESS: at sentence #70000, processed 1037425 words and 588631 word types
2025-08-19 18:11:24,594 : INFO : PROGRESS: at sentence #80000, processed 1178861 words and 650575 word types
2025-08-19 18:11:24,668 : INFO : PROGRESS: at sentence #90000, processed 1311586 words and 707511 word types
2025-08-19 18:11:24,731 : INFO : PROGRESS: at sentence #100000, processed 1449286 words and 765637 word types
2025-08-19 18:11:24,804

Updating existing Word2Vec model for 2017_2020


2025-08-19 18:11:41,395 : INFO : PROGRESS: at sentence #190000, processed 2692805 words, keeping 41232 word types
2025-08-19 18:11:41,406 : INFO : PROGRESS: at sentence #200000, processed 2837480 words, keeping 42395 word types
2025-08-19 18:11:41,417 : INFO : PROGRESS: at sentence #210000, processed 2964993 words, keeping 43254 word types
2025-08-19 18:11:41,428 : INFO : PROGRESS: at sentence #220000, processed 3103153 words, keeping 44235 word types
2025-08-19 18:11:41,437 : INFO : PROGRESS: at sentence #230000, processed 3244160 words, keeping 45153 word types
2025-08-19 18:11:41,450 : INFO : PROGRESS: at sentence #240000, processed 3381669 words, keeping 46038 word types
2025-08-19 18:11:41,461 : INFO : PROGRESS: at sentence #250000, processed 3536533 words, keeping 46944 word types
2025-08-19 18:11:41,471 : INFO : PROGRESS: at sentence #260000, processed 3675667 words, keeping 47893 word types
2025-08-19 18:11:41,481 : INFO : PROGRESS: at sentence #270000, processed 3803837 words,

Training model on 1000000 comments


2025-08-19 18:11:43,539 : INFO : EPOCH 0 - PROGRESS: at 24.12% examples, 3152621 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:11:44,541 : INFO : EPOCH 0 - PROGRESS: at 50.97% examples, 3250190 words/s, in_qsize 32, out_qsize 0
2025-08-19 18:11:45,542 : INFO : EPOCH 0 - PROGRESS: at 78.20% examples, 3286353 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:11:46,226 : INFO : EPOCH 0: training on 13082990 raw words (12131602 effective words) took 3.7s, 3286785 effective words/s
2025-08-19 18:11:47,236 : INFO : EPOCH 1 - PROGRESS: at 23.99% examples, 3145016 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:11:48,238 : INFO : EPOCH 1 - PROGRESS: at 48.84% examples, 3140683 words/s, in_qsize 30, out_qsize 1
2025-08-19 18:11:49,242 : INFO : EPOCH 1 - PROGRESS: at 74.53% examples, 3134165 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:11:50,065 : INFO : EPOCH 1: training on 13082990 raw words (12131904 effective words) took 3.8s, 3167747 effective words/s
2025-08-19 18:11:51,076 : INFO : EPO

Model saved to models/word2vec/interim/conservative_2017_2020_interim.model
Processing processed_comments/conservative\conservative_batch8.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch8.pkl
Processing chunk of 1000000 comments for 2017_2020


2025-08-19 18:12:05,401 : INFO : collecting all words and their counts
2025-08-19 18:12:05,401 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 18:12:05,465 : INFO : PROGRESS: at sentence #10000, processed 130024 words and 103151 word types
2025-08-19 18:12:05,522 : INFO : PROGRESS: at sentence #20000, processed 256094 words and 183123 word types


Building bigram model for 2017_2020 with 1000000 comments...
Creating new bigram model for 2017_2020 (cannot update Phraser objects)


2025-08-19 18:12:05,579 : INFO : PROGRESS: at sentence #30000, processed 387954 words and 260271 word types
2025-08-19 18:12:05,644 : INFO : PROGRESS: at sentence #40000, processed 525844 words and 333831 word types
2025-08-19 18:12:05,708 : INFO : PROGRESS: at sentence #50000, processed 654896 words and 399546 word types
2025-08-19 18:12:05,772 : INFO : PROGRESS: at sentence #60000, processed 785913 words and 463303 word types
2025-08-19 18:12:05,834 : INFO : PROGRESS: at sentence #70000, processed 920811 words and 526375 word types
2025-08-19 18:12:05,893 : INFO : PROGRESS: at sentence #80000, processed 1051418 words and 582758 word types
2025-08-19 18:12:05,946 : INFO : PROGRESS: at sentence #90000, processed 1166235 words and 630701 word types
2025-08-19 18:12:05,999 : INFO : PROGRESS: at sentence #100000, processed 1275205 words and 676345 word types
2025-08-19 18:12:06,057 : INFO : PROGRESS: at sentence #110000, processed 1375879 words and 717962 word types
2025-08-19 18:12:06,11

Updating existing Word2Vec model for 2017_2020


2025-08-19 18:12:20,088 : INFO : PROGRESS: at sentence #230000, processed 2694126 words, keeping 42567 word types
2025-08-19 18:12:20,098 : INFO : PROGRESS: at sentence #240000, processed 2798135 words, keeping 43317 word types
2025-08-19 18:12:20,109 : INFO : PROGRESS: at sentence #250000, processed 2911168 words, keeping 44087 word types
2025-08-19 18:12:20,117 : INFO : PROGRESS: at sentence #260000, processed 3018730 words, keeping 44735 word types
2025-08-19 18:12:20,129 : INFO : PROGRESS: at sentence #270000, processed 3165106 words, keeping 45483 word types
2025-08-19 18:12:20,139 : INFO : PROGRESS: at sentence #280000, processed 3298527 words, keeping 46434 word types
2025-08-19 18:12:20,150 : INFO : PROGRESS: at sentence #290000, processed 3424356 words, keeping 47159 word types
2025-08-19 18:12:20,159 : INFO : PROGRESS: at sentence #300000, processed 3557943 words, keeping 47843 word types
2025-08-19 18:12:20,167 : INFO : PROGRESS: at sentence #310000, processed 3661801 words,

Training model on 1000000 comments


2025-08-19 18:12:22,027 : INFO : EPOCH 0 - PROGRESS: at 29.23% examples, 3153002 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:12:23,028 : INFO : EPOCH 0 - PROGRESS: at 72.12% examples, 3191643 words/s, in_qsize 29, out_qsize 2
2025-08-19 18:12:23,952 : INFO : EPOCH 0: training on 10411478 raw words (9417178 effective words) took 2.9s, 3218268 effective words/s
2025-08-19 18:12:24,966 : INFO : EPOCH 1 - PROGRESS: at 29.15% examples, 3137097 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:12:25,967 : INFO : EPOCH 1 - PROGRESS: at 72.38% examples, 3197276 words/s, in_qsize 30, out_qsize 1
2025-08-19 18:12:26,900 : INFO : EPOCH 1: training on 10411478 raw words (9417223 effective words) took 2.9s, 3206358 effective words/s
2025-08-19 18:12:27,916 : INFO : EPOCH 2 - PROGRESS: at 28.12% examples, 3016172 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:12:28,917 : INFO : EPOCH 2 - PROGRESS: at 70.42% examples, 3125268 words/s, in_qsize 30, out_qsize 1
2025-08-19 18:12:29,890 : INFO : EPOCH

Model saved to models/word2vec/interim/conservative_2017_2020_interim.model
Processing processed_comments/conservative\conservative_batch9.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch9.pkl
Processing chunk of 1000000 comments for 2021_2024
Building bigram model for 2021_2024 with 1000000 comments...


2025-08-19 18:12:40,000 : INFO : collecting all words and their counts
2025-08-19 18:12:40,001 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 18:12:40,035 : INFO : PROGRESS: at sentence #10000, processed 76878 words and 62699 word types
2025-08-19 18:12:40,066 : INFO : PROGRESS: at sentence #20000, processed 142433 words and 104917 word types
2025-08-19 18:12:40,090 : INFO : PROGRESS: at sentence #30000, processed 200711 words and 137975 word types
2025-08-19 18:12:40,114 : INFO : PROGRESS: at sentence #40000, processed 255451 words and 168716 word types
2025-08-19 18:12:40,138 : INFO : PROGRESS: at sentence #50000, processed 312203 words and 199924 word types
2025-08-19 18:12:40,164 : INFO : PROGRESS: at sentence #60000, processed 368642 words and 229330 word types


Creating new bigram model for 2021_2024 (cannot update Phraser objects)


2025-08-19 18:12:40,189 : INFO : PROGRESS: at sentence #70000, processed 426228 words and 259008 word types
2025-08-19 18:12:40,211 : INFO : PROGRESS: at sentence #80000, processed 481774 words and 287245 word types
2025-08-19 18:12:40,242 : INFO : PROGRESS: at sentence #90000, processed 540479 words and 315900 word types
2025-08-19 18:12:40,266 : INFO : PROGRESS: at sentence #100000, processed 596929 words and 342281 word types
2025-08-19 18:12:40,293 : INFO : PROGRESS: at sentence #110000, processed 651657 words and 366531 word types
2025-08-19 18:12:40,319 : INFO : PROGRESS: at sentence #120000, processed 710335 words and 393028 word types
2025-08-19 18:12:40,346 : INFO : PROGRESS: at sentence #130000, processed 772019 words and 420339 word types
2025-08-19 18:12:40,372 : INFO : PROGRESS: at sentence #140000, processed 832470 words and 447081 word types
2025-08-19 18:12:40,400 : INFO : PROGRESS: at sentence #150000, processed 891895 words and 473887 word types
2025-08-19 18:12:40,43

Updating existing Word2Vec model for 2021_2024


2025-08-19 18:12:50,432 : INFO : PROGRESS: at sentence #380000, processed 2244699 words, keeping 37371 word types
2025-08-19 18:12:50,436 : INFO : PROGRESS: at sentence #390000, processed 2298569 words, keeping 37792 word types
2025-08-19 18:12:50,441 : INFO : PROGRESS: at sentence #400000, processed 2354365 words, keeping 38172 word types
2025-08-19 18:12:50,447 : INFO : PROGRESS: at sentence #410000, processed 2413866 words, keeping 38531 word types
2025-08-19 18:12:50,455 : INFO : PROGRESS: at sentence #420000, processed 2472570 words, keeping 38901 word types
2025-08-19 18:12:50,461 : INFO : PROGRESS: at sentence #430000, processed 2530393 words, keeping 39260 word types
2025-08-19 18:12:50,466 : INFO : PROGRESS: at sentence #440000, processed 2580108 words, keeping 39575 word types
2025-08-19 18:12:50,472 : INFO : PROGRESS: at sentence #450000, processed 2627285 words, keeping 39931 word types
2025-08-19 18:12:50,477 : INFO : PROGRESS: at sentence #460000, processed 2685157 words,

Training model on 1000000 comments


2025-08-19 18:12:52,050 : INFO : EPOCH 0 - PROGRESS: at 57.88% examples, 2834658 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:12:52,877 : INFO : EPOCH 0: training on 6137709 raw words (5250679 effective words) took 1.8s, 2871996 effective words/s
2025-08-19 18:12:53,897 : INFO : EPOCH 1 - PROGRESS: at 54.96% examples, 2708906 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:12:54,748 : INFO : EPOCH 1: training on 6137709 raw words (5251348 effective words) took 1.9s, 2835290 effective words/s
2025-08-19 18:12:55,769 : INFO : EPOCH 2 - PROGRESS: at 52.82% examples, 2610755 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:12:56,707 : INFO : EPOCH 2: training on 6137709 raw words (5251100 effective words) took 1.9s, 2707644 effective words/s
2025-08-19 18:12:57,733 : INFO : EPOCH 3 - PROGRESS: at 54.01% examples, 2645596 words/s, in_qsize 30, out_qsize 1
2025-08-19 18:12:58,621 : INFO : EPOCH 3: training on 6137709 raw words (5250919 effective words) took 1.9s, 2774061 effective words/s


Model saved to models/word2vec/interim/conservative_2021_2024_interim.model
Processing final 478038 comments for before_2016
Building bigram model for before_2016 with 478038 comments...
Creating new bigram model for before_2016 (cannot update Phraser objects)


2025-08-19 18:13:00,827 : INFO : PROGRESS: at sentence #30000, processed 528392 words and 322804 word types
2025-08-19 18:13:00,898 : INFO : PROGRESS: at sentence #40000, processed 672912 words and 389351 word types
2025-08-19 18:13:00,973 : INFO : PROGRESS: at sentence #50000, processed 842463 words and 464329 word types
2025-08-19 18:13:01,039 : INFO : PROGRESS: at sentence #60000, processed 996686 words and 529677 word types
2025-08-19 18:13:01,111 : INFO : PROGRESS: at sentence #70000, processed 1156654 words and 595287 word types
2025-08-19 18:13:01,202 : INFO : PROGRESS: at sentence #80000, processed 1325683 words and 663497 word types
2025-08-19 18:13:01,289 : INFO : PROGRESS: at sentence #90000, processed 1488569 words and 724066 word types
2025-08-19 18:13:01,369 : INFO : PROGRESS: at sentence #100000, processed 1672246 words and 793104 word types
2025-08-19 18:13:01,448 : INFO : PROGRESS: at sentence #110000, processed 1853201 words and 860649 word types
2025-08-19 18:13:01,5

Updating existing Word2Vec model for before_2016


2025-08-19 18:13:09,801 : INFO : PROGRESS: at sentence #170000, processed 2981326 words, keeping 40181 word types
2025-08-19 18:13:09,814 : INFO : PROGRESS: at sentence #180000, processed 3159648 words, keeping 41282 word types
2025-08-19 18:13:09,827 : INFO : PROGRESS: at sentence #190000, processed 3324223 words, keeping 42041 word types
2025-08-19 18:13:09,840 : INFO : PROGRESS: at sentence #200000, processed 3503401 words, keeping 43057 word types
2025-08-19 18:13:09,853 : INFO : PROGRESS: at sentence #210000, processed 3697615 words, keeping 44439 word types
2025-08-19 18:13:09,867 : INFO : PROGRESS: at sentence #220000, processed 3898208 words, keeping 45906 word types
2025-08-19 18:13:09,881 : INFO : PROGRESS: at sentence #230000, processed 4091801 words, keeping 47116 word types
2025-08-19 18:13:09,895 : INFO : PROGRESS: at sentence #240000, processed 4307434 words, keeping 48386 word types
2025-08-19 18:13:09,911 : INFO : PROGRESS: at sentence #250000, processed 4521973 words,

Training model on 478038 comments


2025-08-19 18:13:11,427 : INFO : EPOCH 0 - PROGRESS: at 40.16% examples, 3071032 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:13:12,428 : INFO : EPOCH 0 - PROGRESS: at 79.01% examples, 3169955 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:13:12,971 : INFO : EPOCH 0: training on 8845459 raw words (8185484 effective words) took 2.6s, 3208611 effective words/s
2025-08-19 18:13:13,990 : INFO : EPOCH 1 - PROGRESS: at 40.85% examples, 3134959 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:13:14,990 : INFO : EPOCH 1 - PROGRESS: at 79.52% examples, 3197935 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:13:15,542 : INFO : EPOCH 1: training on 8845459 raw words (8185189 effective words) took 2.6s, 3202847 effective words/s
2025-08-19 18:13:16,556 : INFO : EPOCH 2 - PROGRESS: at 40.95% examples, 3139453 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:13:17,558 : INFO : EPOCH 2 - PROGRESS: at 79.30% examples, 3184880 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:13:18,137 : INFO : EPOCH 2

Model saved to models/word2vec/conservative_before_2016.model
Processing final 675232 comments for 2017_2020
Building bigram model for 2017_2020 with 675232 comments...
Creating new bigram model for 2017_2020 (cannot update Phraser objects)


2025-08-19 18:13:23,703 : INFO : PROGRESS: at sentence #40000, processed 586200 words and 361718 word types
2025-08-19 18:13:23,757 : INFO : PROGRESS: at sentence #50000, processed 713265 words and 423350 word types
2025-08-19 18:13:23,816 : INFO : PROGRESS: at sentence #60000, processed 840740 words and 481075 word types
2025-08-19 18:13:23,878 : INFO : PROGRESS: at sentence #70000, processed 975448 words and 540805 word types
2025-08-19 18:13:23,916 : INFO : PROGRESS: at sentence #80000, processed 1055560 words and 574057 word types
2025-08-19 18:13:23,960 : INFO : PROGRESS: at sentence #90000, processed 1140119 words and 609862 word types
2025-08-19 18:13:24,023 : INFO : PROGRESS: at sentence #100000, processed 1259898 words and 659961 word types
2025-08-19 18:13:24,098 : INFO : PROGRESS: at sentence #110000, processed 1388059 words and 712952 word types
2025-08-19 18:13:24,155 : INFO : PROGRESS: at sentence #120000, processed 1514204 words and 761380 word types
2025-08-19 18:13:24,

Updating existing Word2Vec model for 2017_2020


2025-08-19 18:13:32,420 : INFO : PROGRESS: at sentence #210000, processed 2724088 words, keeping 40653 word types
2025-08-19 18:13:32,430 : INFO : PROGRESS: at sentence #220000, processed 2872738 words, keeping 41652 word types
2025-08-19 18:13:32,440 : INFO : PROGRESS: at sentence #230000, processed 2986008 words, keeping 42401 word types
2025-08-19 18:13:32,451 : INFO : PROGRESS: at sentence #240000, processed 3104222 words, keeping 43081 word types
2025-08-19 18:13:32,461 : INFO : PROGRESS: at sentence #250000, processed 3233907 words, keeping 43836 word types
2025-08-19 18:13:32,471 : INFO : PROGRESS: at sentence #260000, processed 3366978 words, keeping 44662 word types
2025-08-19 18:13:32,481 : INFO : PROGRESS: at sentence #270000, processed 3483770 words, keeping 45426 word types
2025-08-19 18:13:32,490 : INFO : PROGRESS: at sentence #280000, processed 3596875 words, keeping 46060 word types
2025-08-19 18:13:32,498 : INFO : PROGRESS: at sentence #290000, processed 3700702 words,

Training model on 675232 comments


2025-08-19 18:13:34,133 : INFO : EPOCH 0 - PROGRESS: at 38.14% examples, 3065881 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:13:35,134 : INFO : EPOCH 0 - PROGRESS: at 81.54% examples, 3131325 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:13:35,552 : INFO : EPOCH 0: training on 8286200 raw words (7620305 effective words) took 2.4s, 3147668 effective words/s
2025-08-19 18:13:36,563 : INFO : EPOCH 1 - PROGRESS: at 38.02% examples, 3056943 words/s, in_qsize 32, out_qsize 0
2025-08-19 18:13:37,563 : INFO : EPOCH 1 - PROGRESS: at 81.54% examples, 3132625 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:13:37,964 : INFO : EPOCH 1: training on 8286200 raw words (7618843 effective words) took 2.4s, 3171191 effective words/s
2025-08-19 18:13:38,975 : INFO : EPOCH 2 - PROGRESS: at 38.26% examples, 3077981 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:13:39,975 : INFO : EPOCH 2 - PROGRESS: at 82.39% examples, 3165199 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:13:40,363 : INFO : EPOCH 2

Model saved to models/word2vec/conservative_2017_2020.model
Processing final 634806 comments for 2021_2024
Building bigram model for 2021_2024 with 634806 comments...
Creating new bigram model for 2021_2024 (cannot update Phraser objects)


2025-08-19 18:13:45,477 : INFO : PROGRESS: at sentence #40000, processed 400830 words and 257813 word types
2025-08-19 18:13:45,523 : INFO : PROGRESS: at sentence #50000, processed 497926 words and 309375 word types
2025-08-19 18:13:45,570 : INFO : PROGRESS: at sentence #60000, processed 586681 words and 354537 word types
2025-08-19 18:13:45,628 : INFO : PROGRESS: at sentence #70000, processed 716889 words and 420293 word types
2025-08-19 18:13:45,690 : INFO : PROGRESS: at sentence #80000, processed 855215 words and 486433 word types
2025-08-19 18:13:45,753 : INFO : PROGRESS: at sentence #90000, processed 989809 words and 546612 word types
2025-08-19 18:13:45,811 : INFO : PROGRESS: at sentence #100000, processed 1105961 words and 596777 word types
2025-08-19 18:13:45,874 : INFO : PROGRESS: at sentence #110000, processed 1221311 words and 643834 word types
2025-08-19 18:13:45,942 : INFO : PROGRESS: at sentence #120000, processed 1344606 words and 693545 word types
2025-08-19 18:13:46,02

Updating existing Word2Vec model for 2021_2024


2025-08-19 18:13:54,302 : INFO : PROGRESS: at sentence #220000, processed 2714310 words, keeping 40653 word types
2025-08-19 18:13:54,313 : INFO : PROGRESS: at sentence #230000, processed 2846601 words, keeping 41469 word types
2025-08-19 18:13:54,324 : INFO : PROGRESS: at sentence #240000, processed 2993331 words, keeping 42295 word types
2025-08-19 18:13:54,334 : INFO : PROGRESS: at sentence #250000, processed 3134951 words, keeping 43241 word types
2025-08-19 18:13:54,344 : INFO : PROGRESS: at sentence #260000, processed 3285462 words, keeping 44170 word types
2025-08-19 18:13:54,356 : INFO : PROGRESS: at sentence #270000, processed 3428515 words, keeping 45015 word types
2025-08-19 18:13:54,366 : INFO : PROGRESS: at sentence #280000, processed 3584388 words, keeping 45794 word types
2025-08-19 18:13:54,376 : INFO : PROGRESS: at sentence #290000, processed 3719920 words, keeping 46531 word types
2025-08-19 18:13:54,387 : INFO : PROGRESS: at sentence #300000, processed 3868718 words,

Training model on 634806 comments


2025-08-19 18:13:55,971 : INFO : EPOCH 0 - PROGRESS: at 42.57% examples, 3142924 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:13:56,971 : INFO : EPOCH 0 - PROGRESS: at 84.10% examples, 3212198 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:13:57,316 : INFO : EPOCH 0: training on 8195401 raw words (7564594 effective words) took 2.4s, 3216640 effective words/s
2025-08-19 18:13:58,330 : INFO : EPOCH 1 - PROGRESS: at 41.92% examples, 3107012 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:13:59,330 : INFO : EPOCH 1 - PROGRESS: at 83.27% examples, 3191801 words/s, in_qsize 32, out_qsize 1
2025-08-19 18:13:59,680 : INFO : EPOCH 1: training on 8195401 raw words (7564591 effective words) took 2.4s, 3217364 effective words/s
2025-08-19 18:14:00,698 : INFO : EPOCH 2 - PROGRESS: at 42.46% examples, 3133862 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:14:01,701 : INFO : EPOCH 2 - PROGRESS: at 83.27% examples, 3177854 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:14:02,051 : INFO : EPOCH 2

Model saved to models/word2vec/conservative_2021_2024.model
Model saved to models/word2vec/conservative_before_2016.model
Model saved to models/word2vec/conservative_2017_2020.model


2025-08-19 18:14:07,036 : INFO : saved models/word2vec/conservative_2021_2024.model


Model saved to models/word2vec/conservative_2021_2024.model
Completed building models for conservative
Building models for liberal
Processing processed_comments/liberal\liberal_batch1.pkl
Loaded 490661 comments from processed_comments/liberal\liberal_batch1.pkl


2025-08-19 18:14:09,620 : INFO : collecting all words and their counts
2025-08-19 18:14:09,621 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 18:14:09,727 : INFO : PROGRESS: at sentence #10000, processed 251711 words and 187567 word types


Processing final 131498 comments for before_2016
Building bigram model for before_2016 with 131498 comments...
Creating new bigram model for before_2016 (cannot update Phraser objects)


2025-08-19 18:14:09,827 : INFO : PROGRESS: at sentence #20000, processed 488462 words and 322106 word types
2025-08-19 18:14:09,950 : INFO : PROGRESS: at sentence #30000, processed 740641 words and 450867 word types
2025-08-19 18:14:10,074 : INFO : PROGRESS: at sentence #40000, processed 1016719 words and 581776 word types
2025-08-19 18:14:10,211 : INFO : PROGRESS: at sentence #50000, processed 1297447 words and 705665 word types
2025-08-19 18:14:10,324 : INFO : PROGRESS: at sentence #60000, processed 1559454 words and 816669 word types
2025-08-19 18:14:10,439 : INFO : PROGRESS: at sentence #70000, processed 1823770 words and 924397 word types
2025-08-19 18:14:10,559 : INFO : PROGRESS: at sentence #80000, processed 2086149 words and 1029031 word types
2025-08-19 18:14:10,679 : INFO : PROGRESS: at sentence #90000, processed 2351445 words and 1129720 word types
2025-08-19 18:14:10,785 : INFO : PROGRESS: at sentence #100000, processed 2584764 words and 1215138 word types
2025-08-19 18:14:

Updating existing Word2Vec model for before_2016


2025-08-19 18:14:14,932 : INFO : PROGRESS: at sentence #100000, processed 2554867 words, keeping 40219 word types
2025-08-19 18:14:14,952 : INFO : PROGRESS: at sentence #110000, processed 2800030 words, keeping 41822 word types
2025-08-19 18:14:14,970 : INFO : PROGRESS: at sentence #120000, processed 3030713 words, keeping 43233 word types
2025-08-19 18:14:14,986 : INFO : PROGRESS: at sentence #130000, processed 3258222 words, keeping 44625 word types
2025-08-19 18:14:14,989 : INFO : collected 44818 word types from a corpus of 3291254 raw words and 131498 sentences
2025-08-19 18:14:14,989 : INFO : Updating model with new vocabulary
2025-08-19 18:14:15,036 : INFO : Word2Vec lifecycle event {'msg': 'added 358 new unique words (0.80% of original 44818) and increased the count of 12045 pre-existing words (26.88% of original 44818)', 'datetime': '2025-08-19T18:14:15.036564', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'pla

Training model on 131498 comments


2025-08-19 18:14:16,181 : INFO : EPOCH 0 - PROGRESS: at 100.00% examples, 3069286 words/s, in_qsize 0, out_qsize 1
2025-08-19 18:14:16,181 : INFO : EPOCH 0: training on 3291254 raw words (3070438 effective words) took 1.0s, 3067428 effective words/s
2025-08-19 18:14:17,188 : INFO : EPOCH 1 - PROGRESS: at 98.30% examples, 3014901 words/s, in_qsize 6, out_qsize 1
2025-08-19 18:14:17,201 : INFO : EPOCH 1: training on 3291254 raw words (3070565 effective words) took 1.0s, 3034014 effective words/s
2025-08-19 18:14:18,216 : INFO : EPOCH 2 - PROGRESS: at 95.77% examples, 2925748 words/s, in_qsize 14, out_qsize 1
2025-08-19 18:14:18,242 : INFO : EPOCH 2: training on 3291254 raw words (3070240 effective words) took 1.0s, 2973354 effective words/s
2025-08-19 18:14:19,254 : INFO : EPOCH 3 - PROGRESS: at 94.21% examples, 2899104 words/s, in_qsize 18, out_qsize 0
2025-08-19 18:14:19,295 : INFO : EPOCH 3: training on 3291254 raw words (3070855 effective words) took 1.0s, 2940392 effective words/s
2

Model saved to models/word2vec/liberal_before_2016.model
Processing final 145815 comments for 2017_2020
Building bigram model for 2017_2020 with 145815 comments...
Creating new bigram model for 2017_2020 (cannot update Phraser objects)


2025-08-19 18:14:20,628 : INFO : PROGRESS: at sentence #30000, processed 553621 words and 353661 word types
2025-08-19 18:14:20,707 : INFO : PROGRESS: at sentence #40000, processed 725736 words and 438387 word types
2025-08-19 18:14:20,786 : INFO : PROGRESS: at sentence #50000, processed 902025 words and 521363 word types
2025-08-19 18:14:20,875 : INFO : PROGRESS: at sentence #60000, processed 1101521 words and 611199 word types
2025-08-19 18:14:20,954 : INFO : PROGRESS: at sentence #70000, processed 1270419 words and 683362 word types
2025-08-19 18:14:21,035 : INFO : PROGRESS: at sentence #80000, processed 1414919 words and 744521 word types
2025-08-19 18:14:21,110 : INFO : PROGRESS: at sentence #90000, processed 1578032 words and 810093 word types
2025-08-19 18:14:21,177 : INFO : PROGRESS: at sentence #100000, processed 1719189 words and 865749 word types
2025-08-19 18:14:21,238 : INFO : PROGRESS: at sentence #110000, processed 1852273 words and 916260 word types
2025-08-19 18:14:21,

Updating existing Word2Vec model for 2017_2020


2025-08-19 18:14:23,133 : INFO : Word2Vec lifecycle event {'msg': 'added 347 new unique words (0.88% of original 39553) and increased the count of 10187 pre-existing words (25.76% of original 39553)', 'datetime': '2025-08-19T18:14:23.133833', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'prepare_vocab'}
2025-08-19 18:14:23,156 : INFO : deleting the raw counts dictionary of 39553 items
2025-08-19 18:14:23,156 : INFO : sample=0.001 downsamples 40 most-common words
2025-08-19 18:14:23,157 : INFO : Word2Vec lifecycle event {'msg': 'downsampling leaves estimated 2119603.320767114 word corpus (93.0%% of prior 2279283)', 'datetime': '2025-08-19T18:14:23.157163', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'prepare_vocab'}
2025-08-19 18:14:23,267 : INFO : estim

Training model on 145815 comments


2025-08-19 18:14:24,014 : INFO : EPOCH 0: training on 2348291 raw words (2168498 effective words) took 0.7s, 3065962 effective words/s
2025-08-19 18:14:24,739 : INFO : EPOCH 1: training on 2348291 raw words (2168623 effective words) took 0.7s, 3028047 effective words/s
2025-08-19 18:14:25,447 : INFO : EPOCH 2: training on 2348291 raw words (2168295 effective words) took 0.7s, 3096373 effective words/s
2025-08-19 18:14:26,154 : INFO : EPOCH 3: training on 2348291 raw words (2167810 effective words) took 0.7s, 3102317 effective words/s
2025-08-19 18:14:26,867 : INFO : EPOCH 4: training on 2348291 raw words (2168291 effective words) took 0.7s, 3072039 effective words/s
2025-08-19 18:14:26,868 : INFO : Word2Vec lifecycle event {'msg': 'training on 11741455 raw words (10841517 effective words) took 3.6s, 3036436 effective words/s', 'datetime': '2025-08-19T18:14:26.868879', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platf

Model saved to models/word2vec/liberal_2017_2020.model
Processing final 213348 comments for 2021_2024
Building bigram model for 2021_2024 with 213348 comments...
Creating new bigram model for 2021_2024 (cannot update Phraser objects)


2025-08-19 18:14:27,156 : INFO : PROGRESS: at sentence #40000, processed 490693 words and 308726 word types
2025-08-19 18:14:27,209 : INFO : PROGRESS: at sentence #50000, processed 606446 words and 366797 word types
2025-08-19 18:14:27,262 : INFO : PROGRESS: at sentence #60000, processed 737116 words and 430130 word types
2025-08-19 18:14:27,322 : INFO : PROGRESS: at sentence #70000, processed 873037 words and 494633 word types
2025-08-19 18:14:27,382 : INFO : PROGRESS: at sentence #80000, processed 995875 words and 552398 word types
2025-08-19 18:14:27,441 : INFO : PROGRESS: at sentence #90000, processed 1119419 words and 609806 word types
2025-08-19 18:14:27,515 : INFO : PROGRESS: at sentence #100000, processed 1243381 words and 665266 word types
2025-08-19 18:14:27,614 : INFO : PROGRESS: at sentence #110000, processed 1416876 words and 741595 word types
2025-08-19 18:14:27,692 : INFO : PROGRESS: at sentence #120000, processed 1591239 words and 816371 word types
2025-08-19 18:14:27,7

Updating existing Word2Vec model for 2021_2024


2025-08-19 18:14:30,238 : INFO : PROGRESS: at sentence #190000, processed 2567027 words, keeping 41500 word types
2025-08-19 18:14:30,250 : INFO : PROGRESS: at sentence #200000, processed 2707019 words, keeping 42295 word types
2025-08-19 18:14:30,259 : INFO : PROGRESS: at sentence #210000, processed 2834346 words, keeping 43097 word types
2025-08-19 18:14:30,263 : INFO : collected 43336 word types from a corpus of 2871462 raw words and 213348 sentences
2025-08-19 18:14:30,263 : INFO : Updating model with new vocabulary
2025-08-19 18:14:30,319 : INFO : Word2Vec lifecycle event {'msg': 'added 334 new unique words (0.77% of original 43336) and increased the count of 11314 pre-existing words (26.11% of original 43336)', 'datetime': '2025-08-19T18:14:30.319400', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'prepare_vocab'}
2025-08-19 18:14:30,341 : INFO : deleting the raw c

Training model on 213348 comments


2025-08-19 18:14:31,321 : INFO : EPOCH 0: training on 2871462 raw words (2627234 effective words) took 0.8s, 3154661 effective words/s
2025-08-19 18:14:32,173 : INFO : EPOCH 1: training on 2871462 raw words (2627778 effective words) took 0.8s, 3134195 effective words/s
2025-08-19 18:14:33,014 : INFO : EPOCH 2: training on 2871462 raw words (2627460 effective words) took 0.8s, 3164383 effective words/s
2025-08-19 18:14:33,866 : INFO : EPOCH 3: training on 2871462 raw words (2627159 effective words) took 0.8s, 3128696 effective words/s
2025-08-19 18:14:34,705 : INFO : EPOCH 4: training on 2871462 raw words (2627192 effective words) took 0.8s, 3175144 effective words/s
2025-08-19 18:14:34,705 : INFO : Word2Vec lifecycle event {'msg': 'training on 14357310 raw words (13136823 effective words) took 4.2s, 3105813 effective words/s', 'datetime': '2025-08-19T18:14:34.705445', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platf

Model saved to models/word2vec/liberal_2021_2024.model
Model saved to models/word2vec/liberal_before_2016.model
Model saved to models/word2vec/liberal_2017_2020.model
Model saved to models/word2vec/liberal_2021_2024.model
Completed building models for liberal
Building models for backpacking
Processing processed_comments/backpacking\backpacking_batch1.pkl
Loaded 814804 comments from processed_comments/backpacking\backpacking_batch1.pkl


2025-08-19 18:14:40,011 : INFO : collecting all words and their counts
2025-08-19 18:14:40,012 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 18:14:40,130 : INFO : PROGRESS: at sentence #10000, processed 254420 words and 179356 word types


Processing final 160438 comments for before_2016
Building bigram model for before_2016 with 160438 comments...
Creating new bigram model for before_2016 (cannot update Phraser objects)


2025-08-19 18:14:40,235 : INFO : PROGRESS: at sentence #20000, processed 505613 words and 314414 word types
2025-08-19 18:14:40,342 : INFO : PROGRESS: at sentence #30000, processed 744061 words and 426599 word types
2025-08-19 18:14:40,441 : INFO : PROGRESS: at sentence #40000, processed 978616 words and 529212 word types
2025-08-19 18:14:40,540 : INFO : PROGRESS: at sentence #50000, processed 1207351 words and 623497 word types
2025-08-19 18:14:40,658 : INFO : PROGRESS: at sentence #60000, processed 1438633 words and 710926 word types
2025-08-19 18:14:40,771 : INFO : PROGRESS: at sentence #70000, processed 1684797 words and 802910 word types
2025-08-19 18:14:40,880 : INFO : PROGRESS: at sentence #80000, processed 1923982 words and 890031 word types
2025-08-19 18:14:40,991 : INFO : PROGRESS: at sentence #90000, processed 2171228 words and 976489 word types
2025-08-19 18:14:41,096 : INFO : PROGRESS: at sentence #100000, processed 2402993 words and 1053833 word types
2025-08-19 18:14:41,

Updating existing Word2Vec model for before_2016


2025-08-19 18:14:44,028 : INFO : PROGRESS: at sentence #120000, processed 2844014 words, keeping 48238 word types
2025-08-19 18:14:44,045 : INFO : PROGRESS: at sentence #130000, processed 3082931 words, keeping 50586 word types
2025-08-19 18:14:44,063 : INFO : PROGRESS: at sentence #140000, processed 3323227 words, keeping 52714 word types
2025-08-19 18:14:44,075 : INFO : PROGRESS: at sentence #150000, processed 3546039 words, keeping 54413 word types
2025-08-19 18:14:44,093 : INFO : PROGRESS: at sentence #160000, processed 3778460 words, keeping 56254 word types
2025-08-19 18:14:44,094 : INFO : collected 56341 word types from a corpus of 3791556 raw words and 160438 sentences
2025-08-19 18:14:44,094 : INFO : Updating model with new vocabulary
2025-08-19 18:14:44,145 : INFO : Word2Vec lifecycle event {'msg': 'added 3211 new unique words (5.70% of original 56341) and increased the count of 9025 pre-existing words (16.02% of original 56341)', 'datetime': '2025-08-19T18:14:44.145278', 'ge

Training model on 160438 comments


2025-08-19 18:14:45,306 : INFO : EPOCH 0 - PROGRESS: at 91.27% examples, 3109988 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:14:45,386 : INFO : EPOCH 0: training on 3791556 raw words (3410385 effective words) took 1.1s, 3148741 effective words/s
2025-08-19 18:14:46,394 : INFO : EPOCH 1 - PROGRESS: at 91.79% examples, 3132137 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:14:46,478 : INFO : EPOCH 1: training on 3791556 raw words (3409785 effective words) took 1.1s, 3140407 effective words/s
2025-08-19 18:14:47,489 : INFO : EPOCH 2 - PROGRESS: at 91.84% examples, 3131355 words/s, in_qsize 30, out_qsize 1
2025-08-19 18:14:47,575 : INFO : EPOCH 2: training on 3791556 raw words (3410829 effective words) took 1.1s, 3135342 effective words/s
2025-08-19 18:14:48,585 : INFO : EPOCH 3 - PROGRESS: at 88.24% examples, 3014437 words/s, in_qsize 30, out_qsize 1
2025-08-19 18:14:48,718 : INFO : EPOCH 3: training on 3791556 raw words (3410354 effective words) took 1.1s, 3002878 effective words/s


Model saved to models/word2vec/backpacking_before_2016.model
Processing final 225801 comments for 2017_2020
Building bigram model for 2017_2020 with 225801 comments...
Creating new bigram model for 2017_2020 (cannot update Phraser objects)


2025-08-19 18:14:50,112 : INFO : PROGRESS: at sentence #20000, processed 482587 words and 305483 word types
2025-08-19 18:14:50,204 : INFO : PROGRESS: at sentence #30000, processed 683507 words and 401314 word types
2025-08-19 18:14:50,282 : INFO : PROGRESS: at sentence #40000, processed 870181 words and 481216 word types
2025-08-19 18:14:50,380 : INFO : PROGRESS: at sentence #50000, processed 1078297 words and 568421 word types
2025-08-19 18:14:50,476 : INFO : PROGRESS: at sentence #60000, processed 1265408 words and 638425 word types
2025-08-19 18:14:50,554 : INFO : PROGRESS: at sentence #70000, processed 1425978 words and 693426 word types
2025-08-19 18:14:50,632 : INFO : PROGRESS: at sentence #80000, processed 1574346 words and 741991 word types
2025-08-19 18:14:50,696 : INFO : PROGRESS: at sentence #90000, processed 1722335 words and 788571 word types
2025-08-19 18:14:50,761 : INFO : PROGRESS: at sentence #100000, processed 1868872 words and 834253 word types
2025-08-19 18:14:50,8

Updating existing Word2Vec model for 2017_2020


2025-08-19 18:14:54,132 : INFO : PROGRESS: at sentence #170000, processed 2885442 words, keeping 52629 word types
2025-08-19 18:14:54,143 : INFO : PROGRESS: at sentence #180000, processed 3045442 words, keeping 54339 word types
2025-08-19 18:14:54,159 : INFO : PROGRESS: at sentence #190000, processed 3212183 words, keeping 56061 word types
2025-08-19 18:14:54,170 : INFO : PROGRESS: at sentence #200000, processed 3374095 words, keeping 57399 word types
2025-08-19 18:14:54,183 : INFO : PROGRESS: at sentence #210000, processed 3543541 words, keeping 58687 word types
2025-08-19 18:14:54,196 : INFO : PROGRESS: at sentence #220000, processed 3696096 words, keeping 59970 word types
2025-08-19 18:14:54,203 : INFO : collected 60765 word types from a corpus of 3786224 raw words and 225801 sentences
2025-08-19 18:14:54,204 : INFO : Updating model with new vocabulary
2025-08-19 18:14:54,264 : INFO : Word2Vec lifecycle event {'msg': 'added 2969 new unique words (4.89% of original 60765) and increas

Training model on 225801 comments


2025-08-19 18:14:55,462 : INFO : EPOCH 0 - PROGRESS: at 87.92% examples, 3026194 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:14:55,585 : INFO : EPOCH 0: training on 3786224 raw words (3426555 effective words) took 1.1s, 3045998 effective words/s
2025-08-19 18:14:56,596 : INFO : EPOCH 1 - PROGRESS: at 82.73% examples, 2853410 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:14:56,767 : INFO : EPOCH 1: training on 3786224 raw words (3426461 effective words) took 1.2s, 2923528 effective words/s
2025-08-19 18:14:57,776 : INFO : EPOCH 2 - PROGRESS: at 89.46% examples, 3079708 words/s, in_qsize 30, out_qsize 1
2025-08-19 18:14:57,875 : INFO : EPOCH 2: training on 3786224 raw words (3426598 effective words) took 1.1s, 3110385 effective words/s
2025-08-19 18:14:58,886 : INFO : EPOCH 3 - PROGRESS: at 80.06% examples, 2762938 words/s, in_qsize 29, out_qsize 2
2025-08-19 18:14:59,101 : INFO : EPOCH 3: training on 3786224 raw words (3426692 effective words) took 1.2s, 2816144 effective words/s


Model saved to models/word2vec/backpacking_2017_2020.model
Processing final 428565 comments for 2021_2024
Building bigram model for 2021_2024 with 428565 comments...
Creating new bigram model for 2021_2024 (cannot update Phraser objects)


2025-08-19 18:15:00,526 : INFO : PROGRESS: at sentence #20000, processed 329580 words and 206629 word types
2025-08-19 18:15:00,599 : INFO : PROGRESS: at sentence #30000, processed 507202 words and 291600 word types
2025-08-19 18:15:00,674 : INFO : PROGRESS: at sentence #40000, processed 659002 words and 361615 word types
2025-08-19 18:15:00,738 : INFO : PROGRESS: at sentence #50000, processed 815126 words and 430962 word types
2025-08-19 18:15:00,804 : INFO : PROGRESS: at sentence #60000, processed 968284 words and 496592 word types
2025-08-19 18:15:00,878 : INFO : PROGRESS: at sentence #70000, processed 1123343 words and 558342 word types
2025-08-19 18:15:00,962 : INFO : PROGRESS: at sentence #80000, processed 1297982 words and 630481 word types
2025-08-19 18:15:01,049 : INFO : PROGRESS: at sentence #90000, processed 1473235 words and 698440 word types
2025-08-19 18:15:01,149 : INFO : PROGRESS: at sentence #100000, processed 1650346 words and 765118 word types
2025-08-19 18:15:01,235

Updating existing Word2Vec model for 2021_2024


2025-08-19 18:15:10,538 : INFO : PROGRESS: at sentence #170000, processed 2760553 words, keeping 51182 word types
2025-08-19 18:15:10,549 : INFO : PROGRESS: at sentence #180000, processed 2934728 words, keeping 53044 word types
2025-08-19 18:15:10,561 : INFO : PROGRESS: at sentence #190000, processed 3123478 words, keeping 54829 word types
2025-08-19 18:15:10,571 : INFO : PROGRESS: at sentence #200000, processed 3300524 words, keeping 56391 word types
2025-08-19 18:15:10,583 : INFO : PROGRESS: at sentence #210000, processed 3484873 words, keeping 58031 word types
2025-08-19 18:15:10,595 : INFO : PROGRESS: at sentence #220000, processed 3682419 words, keeping 59842 word types
2025-08-19 18:15:10,608 : INFO : PROGRESS: at sentence #230000, processed 3893854 words, keeping 61194 word types
2025-08-19 18:15:10,621 : INFO : PROGRESS: at sentence #240000, processed 4105895 words, keeping 62551 word types
2025-08-19 18:15:10,636 : INFO : PROGRESS: at sentence #250000, processed 4308622 words,

Training model on 428565 comments


2025-08-19 18:15:12,130 : INFO : EPOCH 0 - PROGRESS: at 46.91% examples, 3041229 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:15:13,129 : INFO : EPOCH 0 - PROGRESS: at 87.61% examples, 3123096 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:15:13,405 : INFO : EPOCH 0: training on 7820609 raw words (7163838 effective words) took 2.3s, 3147250 effective words/s
2025-08-19 18:15:14,417 : INFO : EPOCH 1 - PROGRESS: at 47.34% examples, 3065054 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:15:15,422 : INFO : EPOCH 1 - PROGRESS: at 87.03% examples, 3089907 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:15:15,716 : INFO : EPOCH 1: training on 7820609 raw words (7165548 effective words) took 2.3s, 3113472 effective words/s
2025-08-19 18:15:16,731 : INFO : EPOCH 2 - PROGRESS: at 47.48% examples, 3067211 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:15:17,731 : INFO : EPOCH 2 - PROGRESS: at 87.16% examples, 3099499 words/s, in_qsize 31, out_qsize 1
2025-08-19 18:15:18,050 : INFO : EPOCH 2

Model saved to models/word2vec/backpacking_2021_2024.model
Model saved to models/word2vec/backpacking_before_2016.model
Model saved to models/word2vec/backpacking_2017_2020.model


2025-08-19 18:15:22,955 : INFO : saved models/word2vec/backpacking_2021_2024.model


Model saved to models/word2vec/backpacking_2021_2024.model
Completed building models for backpacking
Building models for vagabond
Processing processed_comments/vagabond\vagabond_batch1.pkl
Loaded 488480 comments from processed_comments/vagabond\vagabond_batch1.pkl


2025-08-19 18:15:25,220 : INFO : collecting all words and their counts
2025-08-19 18:15:25,221 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 18:15:25,336 : INFO : PROGRESS: at sentence #10000, processed 250720 words and 185947 word types


Skipping final 39323 comments for before_2016 (less than minimum required)
Processing final 156983 comments for 2017_2020
Building bigram model for 2017_2020 with 156983 comments...
Creating new bigram model for 2017_2020 (cannot update Phraser objects)


2025-08-19 18:15:25,436 : INFO : PROGRESS: at sentence #20000, processed 481415 words and 322089 word types
2025-08-19 18:15:25,540 : INFO : PROGRESS: at sentence #30000, processed 692433 words and 435696 word types
2025-08-19 18:15:25,618 : INFO : PROGRESS: at sentence #40000, processed 873164 words and 524116 word types
2025-08-19 18:15:25,699 : INFO : PROGRESS: at sentence #50000, processed 1054456 words and 609004 word types
2025-08-19 18:15:25,781 : INFO : PROGRESS: at sentence #60000, processed 1228379 words and 687474 word types
2025-08-19 18:15:25,872 : INFO : PROGRESS: at sentence #70000, processed 1386605 words and 755956 word types
2025-08-19 18:15:25,958 : INFO : PROGRESS: at sentence #80000, processed 1561709 words and 828944 word types
2025-08-19 18:15:26,031 : INFO : PROGRESS: at sentence #90000, processed 1719013 words and 893349 word types
2025-08-19 18:15:26,119 : INFO : PROGRESS: at sentence #100000, processed 1903426 words and 969452 word types
2025-08-19 18:15:26,2

Updating existing Word2Vec model for 2017_2020


2025-08-19 18:15:28,623 : INFO : PROGRESS: at sentence #140000, processed 2623616 words, keeping 50921 word types
2025-08-19 18:15:28,638 : INFO : PROGRESS: at sentence #150000, processed 2794570 words, keeping 52918 word types
2025-08-19 18:15:28,650 : INFO : collected 54173 word types from a corpus of 2920360 raw words and 156983 sentences
2025-08-19 18:15:28,650 : INFO : Updating model with new vocabulary
2025-08-19 18:15:28,717 : INFO : Word2Vec lifecycle event {'msg': 'added 1016 new unique words (1.88% of original 54173) and increased the count of 10923 pre-existing words (20.16% of original 54173)', 'datetime': '2025-08-19T18:15:28.717886', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'prepare_vocab'}
2025-08-19 18:15:28,742 : INFO : deleting the raw counts dictionary of 54173 items
2025-08-19 18:15:28,742 : INFO : sample=0.001 downsamples 46 most-common words
20

Training model on 156983 comments


2025-08-19 18:15:29,825 : INFO : EPOCH 0: training on 2920360 raw words (2682686 effective words) took 0.9s, 2924729 effective words/s
2025-08-19 18:15:30,785 : INFO : EPOCH 1: training on 2920360 raw words (2683784 effective words) took 1.0s, 2822328 effective words/s
2025-08-19 18:15:31,673 : INFO : EPOCH 2: training on 2920360 raw words (2683920 effective words) took 0.9s, 3049610 effective words/s
2025-08-19 18:15:32,587 : INFO : EPOCH 3: training on 2920360 raw words (2683412 effective words) took 0.9s, 2961114 effective words/s
2025-08-19 18:15:33,502 : INFO : EPOCH 4: training on 2920360 raw words (2683540 effective words) took 0.9s, 2956743 effective words/s
2025-08-19 18:15:33,503 : INFO : Word2Vec lifecycle event {'msg': 'training on 14601800 raw words (13417342 effective words) took 4.6s, 2914287 effective words/s', 'datetime': '2025-08-19T18:15:33.503815', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platf

Model saved to models/word2vec/vagabond_2017_2020.model
Processing final 292174 comments for 2021_2024
Building bigram model for 2021_2024 with 292174 comments...
Creating new bigram model for 2021_2024 (cannot update Phraser objects)


2025-08-19 18:15:33,832 : INFO : PROGRESS: at sentence #30000, processed 518249 words and 348410 word types
2025-08-19 18:15:33,901 : INFO : PROGRESS: at sentence #40000, processed 657225 words and 421407 word types
2025-08-19 18:15:33,971 : INFO : PROGRESS: at sentence #50000, processed 805191 words and 494969 word types
2025-08-19 18:15:34,060 : INFO : PROGRESS: at sentence #60000, processed 966078 words and 572252 word types
2025-08-19 18:15:34,144 : INFO : PROGRESS: at sentence #70000, processed 1116880 words and 640648 word types
2025-08-19 18:15:34,246 : INFO : PROGRESS: at sentence #80000, processed 1277480 words and 712812 word types
2025-08-19 18:15:34,316 : INFO : PROGRESS: at sentence #90000, processed 1431221 words and 781971 word types
2025-08-19 18:15:34,379 : INFO : PROGRESS: at sentence #100000, processed 1567646 words and 839096 word types
2025-08-19 18:15:34,452 : INFO : PROGRESS: at sentence #110000, processed 1723259 words and 901659 word types
2025-08-19 18:15:34,5

Updating existing Word2Vec model for 2021_2024


2025-08-19 18:15:38,479 : INFO : PROGRESS: at sentence #190000, processed 2896970 words, keeping 54754 word types
2025-08-19 18:15:38,489 : INFO : PROGRESS: at sentence #200000, processed 3058078 words, keeping 56284 word types
2025-08-19 18:15:38,499 : INFO : PROGRESS: at sentence #210000, processed 3220630 words, keeping 57632 word types
2025-08-19 18:15:38,509 : INFO : PROGRESS: at sentence #220000, processed 3374625 words, keeping 58980 word types
2025-08-19 18:15:38,519 : INFO : PROGRESS: at sentence #230000, processed 3534509 words, keeping 60345 word types
2025-08-19 18:15:38,528 : INFO : PROGRESS: at sentence #240000, processed 3678957 words, keeping 61582 word types
2025-08-19 18:15:38,537 : INFO : PROGRESS: at sentence #250000, processed 3831387 words, keeping 62957 word types
2025-08-19 18:15:38,546 : INFO : PROGRESS: at sentence #260000, processed 3969920 words, keeping 64332 word types
2025-08-19 18:15:38,555 : INFO : PROGRESS: at sentence #270000, processed 4102486 words,

Training model on 292174 comments


2025-08-19 18:15:39,847 : INFO : EPOCH 0 - PROGRESS: at 68.27% examples, 2800711 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:15:40,219 : INFO : EPOCH 0: training on 4374540 raw words (4025870 effective words) took 1.4s, 2930474 effective words/s
2025-08-19 18:15:41,234 : INFO : EPOCH 1 - PROGRESS: at 70.97% examples, 2909227 words/s, in_qsize 32, out_qsize 0
2025-08-19 18:15:41,598 : INFO : EPOCH 1: training on 4374540 raw words (4026024 effective words) took 1.4s, 2937452 effective words/s
2025-08-19 18:15:42,608 : INFO : EPOCH 2 - PROGRESS: at 73.56% examples, 3034292 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:15:42,899 : INFO : EPOCH 2: training on 4374540 raw words (4026590 effective words) took 1.3s, 3115963 effective words/s
2025-08-19 18:15:43,911 : INFO : EPOCH 3 - PROGRESS: at 72.07% examples, 2963225 words/s, in_qsize 31, out_qsize 0
2025-08-19 18:15:44,237 : INFO : EPOCH 3: training on 4374540 raw words (4025967 effective words) took 1.3s, 3028520 effective words/s


Model saved to models/word2vec/vagabond_2021_2024.model
Model saved to models/word2vec/vagabond_before_2016.model
Model saved to models/word2vec/vagabond_2017_2020.model


2025-08-19 18:15:45,793 : INFO : saved models/word2vec/vagabond_2021_2024.model


Model saved to models/word2vec/vagabond_2021_2024.model
Completed building models for vagabond


In [None]:
import os
import pickle
import datetime
import glob
import numpy as np
from collections import defaultdict
from tqdm import tqdm
import logging

# For NLP processing and modeling
import gensim
from gensim.models import Word2Vec
from gensim.models.phrases import Phrases, Phraser

# Configure logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

class RedditWord2VecBuilder:
    def __init__(self, 
                 base_data_dir="processed_comments",
                 model_dir="models/yearly_models",  # Changed location
                 vector_size=300,
                 window=5,
                 min_count=5,
                 epochs=5,
                 workers=16,
                 sg=1,  # Skip-gram (1) vs CBOW (0)
                 min_comments_to_train=100000,
                 start_year=2024,
                 end_year=2024):
        """
        Initialize the model builder with parameters
        
        Args:
            base_data_dir: Directory containing processed comment pickle files
            model_dir: Directory to save trained models
            vector_size: Dimension of word vectors
            window: Maximum distance between current and predicted word
            min_count: Minimum word count for inclusion in vocabulary
            epochs: Number of training epochs
            workers: Number of worker threads
            sg: Training algorithm: 1 for skip-gram, 0 for CBOW
            min_comments_to_train: Minimum number of comments required for training
            start_year: First year to analyze
            end_year: Last year to analyze
        """
        self.base_data_dir = base_data_dir
        self.model_dir = model_dir
        self.vector_size = vector_size
        self.window = window
        self.min_count = min_count
        self.epochs = epochs
        self.workers = workers
        self.sg = sg
        self.min_comments_to_train = min_comments_to_train
        self.start_year = start_year
        self.end_year = end_year
        
        # Ensure model directories exist
        os.makedirs(f"{self.model_dir}/interim", exist_ok=True)
        
        # Generate years for analysis
        self.years = list(range(start_year, end_year + 1))
        
        # Initialize models for each year
        self.models = {str(year): None for year in self.years}
        self.bigram_models = {str(year): None for year in self.years}
        
    def _get_date_from_comment(self, comment):
        """Extract date from a comment dictionary"""
        try:
            return datetime.datetime.strptime(comment["date"], "%Y-%m-%d").date()
        except (KeyError, ValueError):
            # If date cannot be parsed, try using timestamp
            try:
                return datetime.datetime.fromtimestamp(int(comment["timestamp"])).date()
            except (KeyError, ValueError):
                return None
    
    def _get_year(self, date):
        """Extract year from date"""
        if date is None:
            return None
        
        year = date.year
        if self.start_year <= year <= self.end_year:
            return str(year)
        return None
    
    def _build_bigram_model(self, comments, year, min_count=5):
        """Build a bigram model for the given comments"""
        print(f"Building bigram model for {year} with {len(comments)} comments...")
        
        # Extract just the processed text from comments
        sentences = [comment["processed_text"] for comment in comments if "processed_text" in comment]
        
        # Create or update bigram model
        if self.bigram_models[year] is None:
            # First time creation
            phrases = Phrases(sentences, min_count=min_count, threshold=0.7, scoring='npmi')
            self.bigram_models[year] = Phraser(phrases)
        else:
            # For updates, we need to create a new Phrases model with all sentences
            print(f"Creating new bigram model for {year} (cannot update Phraser objects)")
            phrases = Phrases(sentences, min_count=min_count, threshold=0.7, scoring='npmi')
            self.bigram_models[year] = Phraser(phrases)
            
        return self.bigram_models[year]
    
    def _apply_bigrams(self, comments, bigram_model):
        """Apply bigram model to comments"""
        processed_comments = []
        
        for comment in comments:
            if "processed_text" in comment:
                # Apply bigram transformation to processed text
                bigrammed_text = bigram_model[comment["processed_text"]]
                processed_comments.append(bigrammed_text)
                
        return processed_comments
    
    def _create_or_update_model(self, comments, year):
        """Create a new model or update an existing one"""
        if self.models[year] is None:
            # Create new model
            print(f"Creating new Word2Vec model for {year}")
            model = Word2Vec(
                vector_size=self.vector_size,
                window=self.window,
                min_count=self.min_count,
                workers=self.workers,
                sg=self.sg
            )
            model.build_vocab(comments)
        else:
            # Update existing model
            print(f"Updating existing Word2Vec model for {year}")
            model = self.models[year]
            model.build_vocab(comments, update=True)
            
        # Train model
        print(f"Training model on {len(comments)} comments")
        model.train(
            comments,
            total_examples=len(comments),
            epochs=self.epochs
        )
        
        self.models[year] = model
        return model
    
    def _save_model(self, model, subreddit, year, is_interim=False):
        """Save model to disk"""
        if is_interim:
            path = f"{self.model_dir}/interim/{subreddit}_{year}_interim.model"
        else:
            path = f"{self.model_dir}/{subreddit}_{year}.model"
            
        model.save(path)
        print(f"Model saved to {path}")
    
    def find_pickle_files(self, subreddit):
        """Find all pickle files for a subreddit"""
        pattern = f"{self.base_data_dir}/{subreddit}/{subreddit}_batch*.pkl"
        files = sorted(glob.glob(pattern))
        if not files:
            print(f"No pickle files found for {subreddit} in {self.base_data_dir}/{subreddit}/")
        return files
    
    def process_pickle_file(self, file_path):
        """Load and process a pickle file"""
        try:
            with open(file_path, 'rb') as f:
                comments = pickle.load(f)
            print(f"Loaded {len(comments)} comments from {file_path}")
            return comments
        except Exception as e:
            print(f"Error loading {file_path}: {e}")
            return []
    
    def build_models(self, subreddit, chunk_size=1000000):
        """
        Build Word2Vec models for a subreddit by year
        
        Args:
            subreddit: Name of the subreddit to process
            chunk_size: Number of comments to process in each chunk
        """
        print(f"Building yearly models for {subreddit}")
        
        # Find all pickle files
        pickle_files = self.find_pickle_files(subreddit)
        if not pickle_files:
            return
        
        # Process each file
        comments_by_year = {str(year): [] for year in self.years}
        year_counts = {str(year): 0 for year in self.years}
        
        for file_path in pickle_files:
            print(f"Processing {file_path}")
            comments = self.process_pickle_file(file_path)
            
            # Sort comments by year
            for comment in comments:
                date = self._get_date_from_comment(comment)
                year = self._get_year(date)
                
                if year:
                    comments_by_year[year].append(comment)
                    year_counts[year] += 1
            
            # Process each year separately
            for year in [str(y) for y in self.years]:
                year_comments = comments_by_year[year]
                
                # If we have enough comments, train in chunks
                while len(year_comments) >= chunk_size:
                    print(f"Processing chunk of {chunk_size} comments for {year}")
                    
                    # Take a chunk of comments
                    chunk = year_comments[:chunk_size]
                    year_comments = year_comments[chunk_size:]
                    
                    # Build or update bigram model
                    bigram_model = self._build_bigram_model(chunk, year)
                    
                    # Apply bigrams
                    processed_chunk = self._apply_bigrams(chunk, bigram_model)
                    
                    if len(processed_chunk) > self.min_comments_to_train:
                        # Create or update model
                        model = self._create_or_update_model(processed_chunk, year)
                        
                        # Save interim model
                        self._save_model(model, subreddit, year, is_interim=True)
                
                # Store remaining comments for next file
                comments_by_year[year] = year_comments
        
        print("\n=== Comment Counts by Year ===")
        for year, count in year_counts.items():
            if count > 0:
                print(f"{year}: {count} comments")
        
        # Process any remaining comments that didn't make a full chunk
        for year in [str(y) for y in self.years]:
            remaining_comments = comments_by_year[year]
            if len(remaining_comments) > self.min_comments_to_train:
                print(f"Processing final {len(remaining_comments)} comments for {year}")
                
                # Build or update bigram model
                bigram_model = self._build_bigram_model(remaining_comments, year)
                
                # Apply bigrams
                processed_chunk = self._apply_bigrams(remaining_comments, bigram_model)
                
                # Create or update model
                model = self._create_or_update_model(processed_chunk, year)
                
                # Save final model
                if model is not None:
                    self._save_model(model, subreddit, year, is_interim=False)
            else:
                print(f"Skipping final {len(remaining_comments)} comments for {year} (less than minimum required)")
        
        # Save final models
        for year, model in self.models.items():
            if model is not None and year_counts[year] > 0:
                self._save_model(model, subreddit, year, is_interim=False)
        
        print(f"Completed building yearly models for {subreddit}")


def main():
    # Initialize the model builder
    builder = RedditWord2VecBuilder(
        base_data_dir="processed_comments",
        model_dir="models/yearly_models",  # Changed to yearly models directory
        vector_size=300,
        window=5,
        min_count=10,  # Minimum word frequency
        epochs=5,
        workers=16,
        sg=1,  # Skip-gram model
        min_comments_to_train=100000,
        start_year=2024,  # Adjust based on your data availability
        end_year=2024
    )
    
    # List of subreddits to process
    subreddits = ["democrats", "republican", "conservative", "liberal", "backpacking", "vagabond"]
    
    # Build models for each subreddit
    for subreddit in subreddits:
        builder.build_models(subreddit, chunk_size=1000000)


if __name__ == "__main__":
    main()

Building yearly models for democrats
Processing processed_comments/democrats\democrats_batch1.pkl
Loaded 1000000 comments from processed_comments/democrats\democrats_batch1.pkl
Processing processed_comments/democrats\democrats_batch2.pkl
Loaded 935437 comments from processed_comments/democrats\democrats_batch2.pkl


2025-08-19 19:28:41,916 : INFO : collecting all words and their counts
2025-08-19 19:28:41,916 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 19:28:41,979 : INFO : PROGRESS: at sentence #10000, processed 160933 words and 123961 word types
2025-08-19 19:28:42,054 : INFO : PROGRESS: at sentence #20000, processed 322626 words and 223613 word types
2025-08-19 19:28:42,116 : INFO : PROGRESS: at sentence #30000, processed 491211 words and 315058 word types



=== Comment Counts by Year ===
2011: 3420 comments
2012: 8746 comments
2013: 11045 comments
2014: 11396 comments
2015: 21153 comments
2016: 71526 comments
2017: 56790 comments
2018: 110499 comments
2019: 108852 comments
2020: 197274 comments
2021: 194381 comments
2022: 213242 comments
2023: 179248 comments
Skipping final 3420 comments for 2011 (less than minimum required)
Skipping final 8746 comments for 2012 (less than minimum required)
Skipping final 11045 comments for 2013 (less than minimum required)
Skipping final 11396 comments for 2014 (less than minimum required)
Skipping final 21153 comments for 2015 (less than minimum required)
Skipping final 71526 comments for 2016 (less than minimum required)
Skipping final 56790 comments for 2017 (less than minimum required)
Processing final 110499 comments for 2018
Building bigram model for 2018 with 110499 comments...


2025-08-19 19:28:42,193 : INFO : PROGRESS: at sentence #40000, processed 656919 words and 400681 word types
2025-08-19 19:28:42,254 : INFO : PROGRESS: at sentence #50000, processed 814707 words and 477957 word types
2025-08-19 19:28:42,329 : INFO : PROGRESS: at sentence #60000, processed 991988 words and 555034 word types
2025-08-19 19:28:42,395 : INFO : PROGRESS: at sentence #70000, processed 1149841 words and 624027 word types
2025-08-19 19:28:42,454 : INFO : PROGRESS: at sentence #80000, processed 1301045 words and 690532 word types
2025-08-19 19:28:42,534 : INFO : PROGRESS: at sentence #90000, processed 1447450 words and 750681 word types
2025-08-19 19:28:42,593 : INFO : PROGRESS: at sentence #100000, processed 1586148 words and 806963 word types
2025-08-19 19:28:42,654 : INFO : PROGRESS: at sentence #110000, processed 1735770 words and 865716 word types
2025-08-19 19:28:42,661 : INFO : collected 869482 token types (unigram + bigrams) from a corpus of 1744903 words and 110499 sente

Creating new Word2Vec model for 2018
Training model on 110499 comments


2025-08-19 19:28:44,295 : INFO : EPOCH 0: training on 1724980 raw words (1547557 effective words) took 0.5s, 3275235 effective words/s
2025-08-19 19:28:44,779 : INFO : EPOCH 1: training on 1724980 raw words (1547699 effective words) took 0.5s, 3241660 effective words/s
2025-08-19 19:28:45,254 : INFO : EPOCH 2: training on 1724980 raw words (1548006 effective words) took 0.5s, 3298054 effective words/s
2025-08-19 19:28:45,737 : INFO : EPOCH 3: training on 1724980 raw words (1547615 effective words) took 0.5s, 3290417 effective words/s
2025-08-19 19:28:46,213 : INFO : EPOCH 4: training on 1724980 raw words (1547272 effective words) took 0.5s, 3304058 effective words/s
2025-08-19 19:28:46,214 : INFO : Word2Vec lifecycle event {'msg': 'training on 8624900 raw words (7738149 effective words) took 2.4s, 3223976 effective words/s', 'datetime': '2025-08-19T19:28:46.214386', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platfor

Model saved to models/yearly_models/democrats_2018.model
Processing final 108852 comments for 2019
Building bigram model for 2019 with 108852 comments...


2025-08-19 19:28:46,454 : INFO : PROGRESS: at sentence #40000, processed 564474 words and 354547 word types
2025-08-19 19:28:46,510 : INFO : PROGRESS: at sentence #50000, processed 697607 words and 419595 word types
2025-08-19 19:28:46,554 : INFO : PROGRESS: at sentence #60000, processed 838762 words and 486616 word types
2025-08-19 19:28:46,626 : INFO : PROGRESS: at sentence #70000, processed 987671 words and 553545 word types
2025-08-19 19:28:46,681 : INFO : PROGRESS: at sentence #80000, processed 1128277 words and 614178 word types
2025-08-19 19:28:46,733 : INFO : PROGRESS: at sentence #90000, processed 1250921 words and 665900 word types
2025-08-19 19:28:46,794 : INFO : PROGRESS: at sentence #100000, processed 1373918 words and 716067 word types
2025-08-19 19:28:46,838 : INFO : collected 760617 token types (unigram + bigrams) from a corpus of 1485239 words and 108852 sentences
2025-08-19 19:28:46,838 : INFO : merged Phrases<760617 vocab, min_count=5, threshold=0.7, max_vocab_size=4

Creating new Word2Vec model for 2019
Training model on 108852 comments


2025-08-19 19:28:48,279 : INFO : EPOCH 0: training on 1469660 raw words (1303885 effective words) took 0.4s, 3057615 effective words/s
2025-08-19 19:28:48,696 : INFO : EPOCH 1: training on 1469660 raw words (1304082 effective words) took 0.4s, 3204455 effective words/s
2025-08-19 19:28:49,112 : INFO : EPOCH 2: training on 1469660 raw words (1303731 effective words) took 0.4s, 3210334 effective words/s
2025-08-19 19:28:49,521 : INFO : EPOCH 3: training on 1469660 raw words (1303842 effective words) took 0.4s, 3242673 effective words/s
2025-08-19 19:28:49,935 : INFO : EPOCH 4: training on 1469660 raw words (1303777 effective words) took 0.4s, 3227742 effective words/s
2025-08-19 19:28:49,935 : INFO : Word2Vec lifecycle event {'msg': 'training on 7348300 raw words (6519317 effective words) took 2.1s, 3113403 effective words/s', 'datetime': '2025-08-19T19:28:49.935133', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platfor

Model saved to models/yearly_models/democrats_2019.model
Processing final 197274 comments for 2020
Building bigram model for 2020 with 197274 comments...


2025-08-19 19:28:50,196 : INFO : PROGRESS: at sentence #50000, processed 623691 words and 367427 word types
2025-08-19 19:28:50,252 : INFO : PROGRESS: at sentence #60000, processed 754580 words and 430396 word types
2025-08-19 19:28:50,296 : INFO : PROGRESS: at sentence #70000, processed 879742 words and 488565 word types
2025-08-19 19:28:50,349 : INFO : PROGRESS: at sentence #80000, processed 994465 words and 539317 word types
2025-08-19 19:28:50,393 : INFO : PROGRESS: at sentence #90000, processed 1117636 words and 592403 word types
2025-08-19 19:28:50,440 : INFO : PROGRESS: at sentence #100000, processed 1238584 words and 642935 word types
2025-08-19 19:28:50,496 : INFO : PROGRESS: at sentence #110000, processed 1348403 words and 686853 word types
2025-08-19 19:28:50,557 : INFO : PROGRESS: at sentence #120000, processed 1462523 words and 732678 word types
2025-08-19 19:28:50,607 : INFO : PROGRESS: at sentence #130000, processed 1581513 words and 777650 word types
2025-08-19 19:28:50

Creating new Word2Vec model for 2020


2025-08-19 19:28:52,402 : INFO : resetting layer weights
2025-08-19 19:28:52,414 : INFO : Word2Vec lifecycle event {'update': False, 'trim_rule': 'None', 'datetime': '2025-08-19T19:28:52.414076', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'build_vocab'}
2025-08-19 19:28:52,414 : INFO : Word2Vec lifecycle event {'msg': 'training model with 16 workers on 10223 vocabulary and 300 features, using sg=1 hs=0 sample=0.001 negative=5 window=5 shrink_windows=True', 'datetime': '2025-08-19T19:28:52.414076', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'train'}


Training model on 197274 comments


2025-08-19 19:28:53,035 : INFO : EPOCH 0: training on 2305092 raw words (2045280 effective words) took 0.6s, 3354037 effective words/s
2025-08-19 19:28:53,642 : INFO : EPOCH 1: training on 2305092 raw words (2045851 effective words) took 0.6s, 3396325 effective words/s
2025-08-19 19:28:54,270 : INFO : EPOCH 2: training on 2305092 raw words (2045400 effective words) took 0.6s, 3338110 effective words/s
2025-08-19 19:28:54,891 : INFO : EPOCH 3: training on 2305092 raw words (2045874 effective words) took 0.6s, 3382026 effective words/s
2025-08-19 19:28:55,493 : INFO : EPOCH 4: training on 2305092 raw words (2045770 effective words) took 0.6s, 3415561 effective words/s
2025-08-19 19:28:55,493 : INFO : Word2Vec lifecycle event {'msg': 'training on 11525460 raw words (10228175 effective words) took 3.1s, 3311327 effective words/s', 'datetime': '2025-08-19T19:28:55.493391', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platf

Model saved to models/yearly_models/democrats_2020.model
Processing final 194381 comments for 2021
Building bigram model for 2021 with 194381 comments...


2025-08-19 19:28:55,730 : INFO : PROGRESS: at sentence #50000, processed 531060 words and 335115 word types
2025-08-19 19:28:55,779 : INFO : PROGRESS: at sentence #60000, processed 648297 words and 391050 word types
2025-08-19 19:28:55,834 : INFO : PROGRESS: at sentence #70000, processed 775037 words and 452188 word types
2025-08-19 19:28:55,879 : INFO : PROGRESS: at sentence #80000, processed 889776 words and 505802 word types
2025-08-19 19:28:55,932 : INFO : PROGRESS: at sentence #90000, processed 1010201 words and 559857 word types
2025-08-19 19:28:55,988 : INFO : PROGRESS: at sentence #100000, processed 1129841 words and 610802 word types
2025-08-19 19:28:56,035 : INFO : PROGRESS: at sentence #110000, processed 1249037 words and 662914 word types
2025-08-19 19:28:56,098 : INFO : PROGRESS: at sentence #120000, processed 1364191 words and 711825 word types
2025-08-19 19:28:56,139 : INFO : PROGRESS: at sentence #130000, processed 1474238 words and 758193 word types
2025-08-19 19:28:56

Creating new Word2Vec model for 2021


2025-08-19 19:29:00,270 : INFO : deleting the raw counts dictionary of 39739 items
2025-08-19 19:29:00,270 : INFO : sample=0.001 downsamples 39 most-common words
2025-08-19 19:29:00,271 : INFO : Word2Vec lifecycle event {'msg': 'downsampling leaves estimated 2011202.5779075536 word corpus (91.9%% of prior 2188092)', 'datetime': '2025-08-19T19:29:00.271026', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'prepare_vocab'}
2025-08-19 19:29:00,298 : INFO : estimated required memory for 10342 words and 300 dimensions: 29991800 bytes
2025-08-19 19:29:00,299 : INFO : resetting layer weights
2025-08-19 19:29:00,304 : INFO : Word2Vec lifecycle event {'update': False, 'trim_rule': 'None', 'datetime': '2025-08-19T19:29:00.304357', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 

Training model on 194381 comments


2025-08-19 19:29:00,929 : INFO : EPOCH 0: training on 2258128 raw words (2010967 effective words) took 0.6s, 3272171 effective words/s
2025-08-19 19:29:01,552 : INFO : EPOCH 1: training on 2258128 raw words (2011210 effective words) took 0.6s, 3297646 effective words/s
2025-08-19 19:29:02,170 : INFO : EPOCH 2: training on 2258128 raw words (2011575 effective words) took 0.6s, 3299494 effective words/s
2025-08-19 19:29:02,787 : INFO : EPOCH 3: training on 2258128 raw words (2010607 effective words) took 0.6s, 3318491 effective words/s
2025-08-19 19:29:03,409 : INFO : EPOCH 4: training on 2258128 raw words (2011180 effective words) took 0.6s, 3290819 effective words/s
2025-08-19 19:29:03,410 : INFO : Word2Vec lifecycle event {'msg': 'training on 11290640 raw words (10055539 effective words) took 3.1s, 3239331 effective words/s', 'datetime': '2025-08-19T19:29:03.410816', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platf

Model saved to models/yearly_models/democrats_2021.model
Processing final 213242 comments for 2022
Building bigram model for 2022 with 213242 comments...


2025-08-19 19:29:03,712 : INFO : PROGRESS: at sentence #60000, processed 696969 words and 415670 word types
2025-08-19 19:29:03,777 : INFO : PROGRESS: at sentence #70000, processed 831322 words and 482388 word types
2025-08-19 19:29:03,835 : INFO : PROGRESS: at sentence #80000, processed 966456 words and 546113 word types
2025-08-19 19:29:03,890 : INFO : PROGRESS: at sentence #90000, processed 1099532 words and 604092 word types
2025-08-19 19:29:03,947 : INFO : PROGRESS: at sentence #100000, processed 1236496 words and 663348 word types
2025-08-19 19:29:04,010 : INFO : PROGRESS: at sentence #110000, processed 1364821 words and 718170 word types
2025-08-19 19:29:04,065 : INFO : PROGRESS: at sentence #120000, processed 1486676 words and 772147 word types
2025-08-19 19:29:04,120 : INFO : PROGRESS: at sentence #130000, processed 1611923 words and 824763 word types
2025-08-19 19:29:04,170 : INFO : PROGRESS: at sentence #140000, processed 1730779 words and 872462 word types
2025-08-19 19:29:

Creating new Word2Vec model for 2022


2025-08-19 19:29:06,234 : INFO : Word2Vec lifecycle event {'msg': 'effective_min_count=10 retains 11391 unique words (26.66% of original 42727, drops 31336)', 'datetime': '2025-08-19T19:29:06.234052', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'prepare_vocab'}
2025-08-19 19:29:06,234 : INFO : Word2Vec lifecycle event {'msg': 'effective_min_count=10 leaves 2505732 word corpus (97.04% of original 2582066, drops 76334)', 'datetime': '2025-08-19T19:29:06.234356', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'prepare_vocab'}
2025-08-19 19:29:06,254 : INFO : deleting the raw counts dictionary of 42727 items
2025-08-19 19:29:06,255 : INFO : sample=0.001 downsamples 34 most-common words
2025-08-19 19:29:06,255 : INFO : Word2Vec lifecycle event {'msg': 'downsam

Training model on 213242 comments


2025-08-19 19:29:06,992 : INFO : EPOCH 0: training on 2582066 raw words (2317513 effective words) took 0.7s, 3333434 effective words/s
2025-08-19 19:29:07,715 : INFO : EPOCH 1: training on 2582066 raw words (2317499 effective words) took 0.7s, 3282114 effective words/s
2025-08-19 19:29:08,426 : INFO : EPOCH 2: training on 2582066 raw words (2317656 effective words) took 0.7s, 3298276 effective words/s
2025-08-19 19:29:09,134 : INFO : EPOCH 3: training on 2582066 raw words (2317906 effective words) took 0.7s, 3306195 effective words/s
2025-08-19 19:29:09,837 : INFO : EPOCH 4: training on 2582066 raw words (2318226 effective words) took 0.7s, 3341338 effective words/s
2025-08-19 19:29:09,837 : INFO : Word2Vec lifecycle event {'msg': 'training on 12910330 raw words (11588800 effective words) took 3.5s, 3267691 effective words/s', 'datetime': '2025-08-19T19:29:09.837343', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platf

Model saved to models/yearly_models/democrats_2022.model
Processing final 179248 comments for 2023
Building bigram model for 2023 with 179248 comments...


2025-08-19 19:29:10,129 : INFO : PROGRESS: at sentence #60000, processed 684445 words and 426594 word types
2025-08-19 19:29:10,184 : INFO : PROGRESS: at sentence #70000, processed 809901 words and 488855 word types
2025-08-19 19:29:10,237 : INFO : PROGRESS: at sentence #80000, processed 940030 words and 551751 word types
2025-08-19 19:29:10,296 : INFO : PROGRESS: at sentence #90000, processed 1077557 words and 616335 word types
2025-08-19 19:29:10,353 : INFO : PROGRESS: at sentence #100000, processed 1205843 words and 672940 word types
2025-08-19 19:29:10,412 : INFO : PROGRESS: at sentence #110000, processed 1333228 words and 728808 word types
2025-08-19 19:29:10,465 : INFO : PROGRESS: at sentence #120000, processed 1460769 words and 781605 word types
2025-08-19 19:29:10,523 : INFO : PROGRESS: at sentence #130000, processed 1593568 words and 836514 word types
2025-08-19 19:29:10,577 : INFO : PROGRESS: at sentence #140000, processed 1724427 words and 888529 word types
2025-08-19 19:29:

Creating new Word2Vec model for 2023


2025-08-19 19:29:12,334 : INFO : estimated required memory for 10852 words and 300 dimensions: 31470800 bytes
2025-08-19 19:29:12,334 : INFO : resetting layer weights
2025-08-19 19:29:12,334 : INFO : Word2Vec lifecycle event {'update': False, 'trim_rule': 'None', 'datetime': '2025-08-19T19:29:12.334447', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'build_vocab'}
2025-08-19 19:29:12,334 : INFO : Word2Vec lifecycle event {'msg': 'training model with 16 workers on 10852 vocabulary and 300 features, using sg=1 hs=0 sample=0.001 negative=5 window=5 shrink_windows=True', 'datetime': '2025-08-19T19:29:12.334447', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'train'}


Training model on 179248 comments


2025-08-19 19:29:12,953 : INFO : EPOCH 0: training on 2251471 raw words (2022114 effective words) took 0.6s, 3282826 effective words/s
2025-08-19 19:29:13,592 : INFO : EPOCH 1: training on 2251471 raw words (2022382 effective words) took 0.6s, 3283293 effective words/s
2025-08-19 19:29:14,210 : INFO : EPOCH 2: training on 2251471 raw words (2022282 effective words) took 0.6s, 3338751 effective words/s
2025-08-19 19:29:14,823 : INFO : EPOCH 3: training on 2251471 raw words (2022183 effective words) took 0.6s, 3361831 effective words/s
2025-08-19 19:29:15,434 : INFO : EPOCH 4: training on 2251471 raw words (2021741 effective words) took 0.6s, 3333003 effective words/s
2025-08-19 19:29:15,434 : INFO : Word2Vec lifecycle event {'msg': 'training on 11257355 raw words (10110702 effective words) took 3.1s, 3267099 effective words/s', 'datetime': '2025-08-19T19:29:15.434344', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platf

Model saved to models/yearly_models/democrats_2023.model
Model saved to models/yearly_models/democrats_2018.model
Model saved to models/yearly_models/democrats_2019.model
Model saved to models/yearly_models/democrats_2020.model
Model saved to models/yearly_models/democrats_2021.model
Model saved to models/yearly_models/democrats_2022.model
Model saved to models/yearly_models/democrats_2023.model
Completed building yearly models for democrats
Building yearly models for republican
Processing processed_comments/republican\republican_batch1.pkl
Loaded 1000000 comments from processed_comments/republican\republican_batch1.pkl
Processing processed_comments/republican\republican_batch2.pkl
Loaded 292460 comments from processed_comments/republican\republican_batch2.pkl


2025-08-19 19:29:22,492 : INFO : collecting all words and their counts
2025-08-19 19:29:22,492 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 19:29:22,553 : INFO : PROGRESS: at sentence #10000, processed 138686 words and 111696 word types
2025-08-19 19:29:22,600 : INFO : PROGRESS: at sentence #20000, processed 266396 words and 192689 word types
2025-08-19 19:29:22,653 : INFO : PROGRESS: at sentence #30000, processed 399799 words and 268828 word types



=== Comment Counts by Year ===
2011: 6860 comments
2012: 79470 comments
2013: 36435 comments
2014: 20105 comments
2015: 37139 comments
2016: 82492 comments
2017: 69595 comments
2018: 47127 comments
2019: 69574 comments
2020: 228997 comments
2021: 222169 comments
2022: 147136 comments
2023: 68177 comments
Skipping final 6860 comments for 2011 (less than minimum required)
Skipping final 79470 comments for 2012 (less than minimum required)
Skipping final 36435 comments for 2013 (less than minimum required)
Skipping final 20105 comments for 2014 (less than minimum required)
Skipping final 37139 comments for 2015 (less than minimum required)
Skipping final 82492 comments for 2016 (less than minimum required)
Skipping final 69595 comments for 2017 (less than minimum required)
Skipping final 47127 comments for 2018 (less than minimum required)
Skipping final 69574 comments for 2019 (less than minimum required)
Processing final 228997 comments for 2020
Building bigram model for 2020 with 2289

2025-08-19 19:29:22,718 : INFO : PROGRESS: at sentence #40000, processed 544055 words and 347144 word types
2025-08-19 19:29:22,790 : INFO : PROGRESS: at sentence #50000, processed 709821 words and 430353 word types
2025-08-19 19:29:22,855 : INFO : PROGRESS: at sentence #60000, processed 862478 words and 501179 word types
2025-08-19 19:29:22,917 : INFO : PROGRESS: at sentence #70000, processed 1013230 words and 569824 word types
2025-08-19 19:29:22,978 : INFO : PROGRESS: at sentence #80000, processed 1168239 words and 637896 word types
2025-08-19 19:29:23,054 : INFO : PROGRESS: at sentence #90000, processed 1315999 words and 699157 word types
2025-08-19 19:29:23,114 : INFO : PROGRESS: at sentence #100000, processed 1449335 words and 751711 word types
2025-08-19 19:29:23,167 : INFO : PROGRESS: at sentence #110000, processed 1575752 words and 800103 word types
2025-08-19 19:29:23,228 : INFO : PROGRESS: at sentence #120000, processed 1718616 words and 855437 word types
2025-08-19 19:29:23

Updating existing Word2Vec model for 2020


2025-08-19 19:29:25,739 : INFO : PROGRESS: at sentence #220000, processed 2978008 words, keeping 43177 word types
2025-08-19 19:29:25,757 : INFO : collected 43944 word types from a corpus of 3096749 raw words and 228997 sentences
2025-08-19 19:29:25,757 : INFO : Updating model with new vocabulary
2025-08-19 19:29:25,781 : INFO : Word2Vec lifecycle event {'msg': 'added 2738 new unique words (6.23% of original 43944) and increased the count of 8618 pre-existing words (19.61% of original 43944)', 'datetime': '2025-08-19T19:29:25.781168', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'prepare_vocab'}
2025-08-19 19:29:25,802 : INFO : deleting the raw counts dictionary of 43944 items
2025-08-19 19:29:25,802 : INFO : sample=0.001 downsamples 45 most-common words
2025-08-19 19:29:25,803 : INFO : Word2Vec lifecycle event {'msg': 'downsampling leaves estimated 2811174.1457790723 w

Training model on 228997 comments


2025-08-19 19:29:26,724 : INFO : EPOCH 0: training on 3096749 raw words (2816084 effective words) took 0.9s, 3268779 effective words/s
2025-08-19 19:29:27,577 : INFO : EPOCH 1: training on 3096749 raw words (2816169 effective words) took 0.8s, 3338670 effective words/s
2025-08-19 19:29:28,448 : INFO : EPOCH 2: training on 3096749 raw words (2816517 effective words) took 0.9s, 3299184 effective words/s
2025-08-19 19:29:29,303 : INFO : EPOCH 3: training on 3096749 raw words (2816142 effective words) took 0.9s, 3287109 effective words/s
2025-08-19 19:29:30,178 : INFO : EPOCH 4: training on 3096749 raw words (2816865 effective words) took 0.9s, 3263729 effective words/s
2025-08-19 19:29:30,178 : INFO : Word2Vec lifecycle event {'msg': 'training on 15483745 raw words (14081777 effective words) took 4.3s, 3252057 effective words/s', 'datetime': '2025-08-19T19:29:30.178322', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platf

Model saved to models/yearly_models/republican_2020.model
Processing final 222169 comments for 2021
Building bigram model for 2021 with 222169 comments...
Creating new bigram model for 2021 (cannot update Phraser objects)


2025-08-19 19:29:30,426 : INFO : PROGRESS: at sentence #40000, processed 555874 words and 348336 word types
2025-08-19 19:29:30,472 : INFO : PROGRESS: at sentence #50000, processed 683413 words and 411507 word types
2025-08-19 19:29:30,528 : INFO : PROGRESS: at sentence #60000, processed 827508 words and 479931 word types
2025-08-19 19:29:30,597 : INFO : PROGRESS: at sentence #70000, processed 964834 words and 542408 word types
2025-08-19 19:29:30,654 : INFO : PROGRESS: at sentence #80000, processed 1094976 words and 599850 word types
2025-08-19 19:29:30,697 : INFO : PROGRESS: at sentence #90000, processed 1227483 words and 658228 word types
2025-08-19 19:29:30,768 : INFO : PROGRESS: at sentence #100000, processed 1345468 words and 708811 word types
2025-08-19 19:29:30,816 : INFO : PROGRESS: at sentence #110000, processed 1480234 words and 764822 word types
2025-08-19 19:29:30,878 : INFO : PROGRESS: at sentence #120000, processed 1604937 words and 814070 word types
2025-08-19 19:29:30,

Updating existing Word2Vec model for 2021


2025-08-19 19:29:33,304 : INFO : collected 44169 word types from a corpus of 2882916 raw words and 222169 sentences
2025-08-19 19:29:33,305 : INFO : Updating model with new vocabulary
2025-08-19 19:29:33,327 : INFO : Word2Vec lifecycle event {'msg': 'added 2789 new unique words (6.31% of original 44169) and increased the count of 8740 pre-existing words (19.79% of original 44169)', 'datetime': '2025-08-19T19:29:33.327035', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'prepare_vocab'}
2025-08-19 19:29:33,333 : INFO : deleting the raw counts dictionary of 44169 items
2025-08-19 19:29:33,333 : INFO : sample=0.001 downsamples 41 most-common words
2025-08-19 19:29:33,333 : INFO : Word2Vec lifecycle event {'msg': 'downsampling leaves estimated 2629457.5811727755 word corpus (93.7%% of prior 2807401)', 'datetime': '2025-08-19T19:29:33.333921', 'gensim': '4.3.3', 'python': '3.1

Training model on 222169 comments


2025-08-19 19:29:34,200 : INFO : EPOCH 0: training on 2882916 raw words (2634691 effective words) took 0.8s, 3316922 effective words/s
2025-08-19 19:29:35,009 : INFO : EPOCH 1: training on 2882916 raw words (2635075 effective words) took 0.8s, 3319646 effective words/s
2025-08-19 19:29:35,805 : INFO : EPOCH 2: training on 2882916 raw words (2635140 effective words) took 0.8s, 3351786 effective words/s
2025-08-19 19:29:36,586 : INFO : EPOCH 3: training on 2882916 raw words (2635355 effective words) took 0.8s, 3407929 effective words/s
2025-08-19 19:29:37,372 : INFO : EPOCH 4: training on 2882916 raw words (2635046 effective words) took 0.8s, 3402302 effective words/s
2025-08-19 19:29:37,373 : INFO : Word2Vec lifecycle event {'msg': 'training on 14414580 raw words (13175307 effective words) took 4.0s, 3317178 effective words/s', 'datetime': '2025-08-19T19:29:37.373595', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platf

Model saved to models/yearly_models/republican_2021.model
Processing final 147136 comments for 2022
Building bigram model for 2022 with 147136 comments...
Creating new bigram model for 2022 (cannot update Phraser objects)


2025-08-19 19:29:37,652 : INFO : PROGRESS: at sentence #50000, processed 642863 words and 397669 word types
2025-08-19 19:29:37,709 : INFO : PROGRESS: at sentence #60000, processed 771374 words and 462849 word types
2025-08-19 19:29:37,769 : INFO : PROGRESS: at sentence #70000, processed 913737 words and 531680 word types
2025-08-19 19:29:37,829 : INFO : PROGRESS: at sentence #80000, processed 1054825 words and 595120 word types
2025-08-19 19:29:37,892 : INFO : PROGRESS: at sentence #90000, processed 1198240 words and 655230 word types
2025-08-19 19:29:37,950 : INFO : PROGRESS: at sentence #100000, processed 1323985 words and 708454 word types
2025-08-19 19:29:38,017 : INFO : PROGRESS: at sentence #110000, processed 1447882 words and 761270 word types
2025-08-19 19:29:38,079 : INFO : PROGRESS: at sentence #120000, processed 1566722 words and 810122 word types
2025-08-19 19:29:38,130 : INFO : PROGRESS: at sentence #130000, processed 1671643 words and 853522 word types
2025-08-19 19:29:3

Updating existing Word2Vec model for 2022


2025-08-19 19:29:39,511 : INFO : estimated required memory for 9423 words and 300 dimensions: 27326700 bytes
2025-08-19 19:29:39,511 : INFO : updating layer weights
2025-08-19 19:29:39,529 : INFO : Word2Vec lifecycle event {'update': True, 'trim_rule': 'None', 'datetime': '2025-08-19T19:29:39.529120', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'build_vocab'}
2025-08-19 19:29:39,529 : INFO : Word2Vec lifecycle event {'msg': 'training model with 16 workers on 12715 vocabulary and 300 features, using sg=1 hs=0 sample=0.001 negative=5 window=5 shrink_windows=True', 'datetime': '2025-08-19T19:29:39.529469', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'train'}


Training model on 147136 comments


2025-08-19 19:29:40,050 : INFO : EPOCH 0: training on 1853302 raw words (1680706 effective words) took 0.5s, 3289274 effective words/s
2025-08-19 19:29:40,574 : INFO : EPOCH 1: training on 1853302 raw words (1680728 effective words) took 0.5s, 3317190 effective words/s
2025-08-19 19:29:41,088 : INFO : EPOCH 2: training on 1853302 raw words (1680824 effective words) took 0.5s, 3377715 effective words/s
2025-08-19 19:29:41,591 : INFO : EPOCH 3: training on 1853302 raw words (1680879 effective words) took 0.5s, 3331607 effective words/s
2025-08-19 19:29:42,125 : INFO : EPOCH 4: training on 1853302 raw words (1680941 effective words) took 0.5s, 3299149 effective words/s
2025-08-19 19:29:42,125 : INFO : Word2Vec lifecycle event {'msg': 'training on 9266510 raw words (8404078 effective words) took 2.6s, 3237498 effective words/s', 'datetime': '2025-08-19T19:29:42.125404', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platfor

Model saved to models/yearly_models/republican_2022.model
Skipping final 68177 comments for 2023 (less than minimum required)
Model saved to models/yearly_models/republican_2018.model
Model saved to models/yearly_models/republican_2019.model
Model saved to models/yearly_models/republican_2020.model
Model saved to models/yearly_models/republican_2021.model
Model saved to models/yearly_models/republican_2022.model
Model saved to models/yearly_models/republican_2023.model
Completed building yearly models for republican
Building yearly models for conservative
Processing processed_comments/conservative\conservative_batch1.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch1.pkl
Processing processed_comments/conservative\conservative_batch10.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch10.pkl


2025-08-19 19:29:55,310 : INFO : collecting all words and their counts
2025-08-19 19:29:55,310 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 19:29:55,369 : INFO : PROGRESS: at sentence #10000, processed 135999 words and 109702 word types
2025-08-19 19:29:55,410 : INFO : PROGRESS: at sentence #20000, processed 266146 words and 192461 word types


Processing chunk of 1000000 comments for 2021
Building bigram model for 2021 with 1000000 comments...
Creating new bigram model for 2021 (cannot update Phraser objects)


2025-08-19 19:29:55,469 : INFO : PROGRESS: at sentence #30000, processed 398874 words and 268227 word types
2025-08-19 19:29:55,527 : INFO : PROGRESS: at sentence #40000, processed 538979 words and 341661 word types
2025-08-19 19:29:55,577 : INFO : PROGRESS: at sentence #50000, processed 676027 words and 412003 word types
2025-08-19 19:29:55,638 : INFO : PROGRESS: at sentence #60000, processed 809013 words and 477238 word types
2025-08-19 19:29:55,699 : INFO : PROGRESS: at sentence #70000, processed 950017 words and 543883 word types
2025-08-19 19:29:55,749 : INFO : PROGRESS: at sentence #80000, processed 1089113 words and 606872 word types
2025-08-19 19:29:55,810 : INFO : PROGRESS: at sentence #90000, processed 1208887 words and 658730 word types
2025-08-19 19:29:55,877 : INFO : PROGRESS: at sentence #100000, processed 1323218 words and 706912 word types
2025-08-19 19:29:55,930 : INFO : PROGRESS: at sentence #110000, processed 1446537 words and 760024 word types
2025-08-19 19:29:55,99

Updating existing Word2Vec model for 2021


2025-08-19 19:30:10,596 : INFO : PROGRESS: at sentence #210000, processed 2738107 words, keeping 42193 word types
2025-08-19 19:30:10,605 : INFO : PROGRESS: at sentence #220000, processed 2872514 words, keeping 43086 word types
2025-08-19 19:30:10,612 : INFO : PROGRESS: at sentence #230000, processed 2993348 words, keeping 43990 word types
2025-08-19 19:30:10,621 : INFO : PROGRESS: at sentence #240000, processed 3127685 words, keeping 44928 word types
2025-08-19 19:30:10,630 : INFO : PROGRESS: at sentence #250000, processed 3271732 words, keeping 45823 word types
2025-08-19 19:30:10,638 : INFO : PROGRESS: at sentence #260000, processed 3398855 words, keeping 46653 word types
2025-08-19 19:30:10,646 : INFO : PROGRESS: at sentence #270000, processed 3526320 words, keeping 47413 word types
2025-08-19 19:30:10,654 : INFO : PROGRESS: at sentence #280000, processed 3666968 words, keeping 48139 word types
2025-08-19 19:30:10,664 : INFO : PROGRESS: at sentence #290000, processed 3813650 words,

Training model on 1000000 comments


2025-08-19 19:30:12,414 : INFO : EPOCH 0 - PROGRESS: at 26.29% examples, 3177385 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:30:13,415 : INFO : EPOCH 0 - PROGRESS: at 55.67% examples, 3275720 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:30:14,416 : INFO : EPOCH 0 - PROGRESS: at 85.07% examples, 3293674 words/s, in_qsize 32, out_qsize 0
2025-08-19 19:30:14,929 : INFO : EPOCH 0: training on 12610719 raw words (11645027 effective words) took 3.5s, 3301751 effective words/s
2025-08-19 19:30:15,955 : INFO : EPOCH 1 - PROGRESS: at 26.63% examples, 3203914 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:30:16,955 : INFO : EPOCH 1 - PROGRESS: at 55.04% examples, 3236294 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:30:17,955 : INFO : EPOCH 1 - PROGRESS: at 84.63% examples, 3274292 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:30:18,509 : INFO : EPOCH 1: training on 12610719 raw words (11644653 effective words) took 3.6s, 3269430 effective words/s
2025-08-19 19:30:19,521 : INFO : EPO

Model saved to models/yearly_models/interim/conservative_2021_interim.model
Processing processed_comments/conservative\conservative_batch11.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch11.pkl


2025-08-19 19:30:36,344 : INFO : collecting all words and their counts
2025-08-19 19:30:36,345 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 19:30:36,392 : INFO : PROGRESS: at sentence #10000, processed 134888 words and 108447 word types
2025-08-19 19:30:36,442 : INFO : PROGRESS: at sentence #20000, processed 250434 words and 182352 word types


Processing chunk of 1000000 comments for 2021
Building bigram model for 2021 with 1000000 comments...
Creating new bigram model for 2021 (cannot update Phraser objects)


2025-08-19 19:30:36,490 : INFO : PROGRESS: at sentence #30000, processed 375640 words and 255779 word types
2025-08-19 19:30:36,534 : INFO : PROGRESS: at sentence #40000, processed 489177 words and 316867 word types
2025-08-19 19:30:36,587 : INFO : PROGRESS: at sentence #50000, processed 589578 words and 368513 word types
2025-08-19 19:30:36,634 : INFO : PROGRESS: at sentence #60000, processed 702876 words and 425092 word types
2025-08-19 19:30:36,683 : INFO : PROGRESS: at sentence #70000, processed 821285 words and 481944 word types
2025-08-19 19:30:36,732 : INFO : PROGRESS: at sentence #80000, processed 939259 words and 538191 word types
2025-08-19 19:30:36,784 : INFO : PROGRESS: at sentence #90000, processed 1062510 words and 595789 word types
2025-08-19 19:30:36,828 : INFO : PROGRESS: at sentence #100000, processed 1195205 words and 653423 word types
2025-08-19 19:30:36,887 : INFO : PROGRESS: at sentence #110000, processed 1312335 words and 702790 word types
2025-08-19 19:30:36,949

Updating existing Word2Vec model for 2021


2025-08-19 19:30:48,122 : INFO : PROGRESS: at sentence #240000, processed 2934444 words, keeping 44657 word types
2025-08-19 19:30:48,132 : INFO : PROGRESS: at sentence #250000, processed 3052984 words, keeping 45517 word types
2025-08-19 19:30:48,143 : INFO : PROGRESS: at sentence #260000, processed 3185942 words, keeping 46332 word types
2025-08-19 19:30:48,151 : INFO : PROGRESS: at sentence #270000, processed 3312681 words, keeping 47168 word types
2025-08-19 19:30:48,153 : INFO : PROGRESS: at sentence #280000, processed 3430735 words, keeping 47988 word types
2025-08-19 19:30:48,166 : INFO : PROGRESS: at sentence #290000, processed 3548760 words, keeping 48725 word types
2025-08-19 19:30:48,174 : INFO : PROGRESS: at sentence #300000, processed 3672283 words, keeping 49606 word types
2025-08-19 19:30:48,183 : INFO : PROGRESS: at sentence #310000, processed 3791295 words, keeping 50373 word types
2025-08-19 19:30:48,193 : INFO : PROGRESS: at sentence #320000, processed 3930202 words,

Training model on 1000000 comments


2025-08-19 19:30:49,914 : INFO : EPOCH 0 - PROGRESS: at 27.60% examples, 3124946 words/s, in_qsize 30, out_qsize 0
2025-08-19 19:30:50,920 : INFO : EPOCH 0 - PROGRESS: at 57.82% examples, 3238259 words/s, in_qsize 30, out_qsize 1
2025-08-19 19:30:51,922 : INFO : EPOCH 0 - PROGRESS: at 90.42% examples, 3295097 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:30:52,172 : INFO : EPOCH 0: training on 11718748 raw words (10817150 effective words) took 3.3s, 3320377 effective words/s
2025-08-19 19:30:53,183 : INFO : EPOCH 1 - PROGRESS: at 28.58% examples, 3231755 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:30:54,186 : INFO : EPOCH 1 - PROGRESS: at 58.78% examples, 3288260 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:30:55,190 : INFO : EPOCH 1 - PROGRESS: at 90.50% examples, 3297235 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:30:55,428 : INFO : EPOCH 1: training on 11718748 raw words (10817259 effective words) took 3.3s, 3320588 effective words/s
2025-08-19 19:30:56,451 : INFO : EPO

Model saved to models/yearly_models/interim/conservative_2021_interim.model
Processing processed_comments/conservative\conservative_batch12.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch12.pkl


2025-08-19 19:31:12,307 : INFO : collecting all words and their counts
2025-08-19 19:31:12,307 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 19:31:12,344 : INFO : PROGRESS: at sentence #10000, processed 96436 words and 79654 word types
2025-08-19 19:31:12,371 : INFO : PROGRESS: at sentence #20000, processed 189739 words and 141495 word types


Processing chunk of 1000000 comments for 2021
Building bigram model for 2021 with 1000000 comments...
Creating new bigram model for 2021 (cannot update Phraser objects)


2025-08-19 19:31:12,416 : INFO : PROGRESS: at sentence #30000, processed 281767 words and 196314 word types
2025-08-19 19:31:12,451 : INFO : PROGRESS: at sentence #40000, processed 374976 words and 249021 word types
2025-08-19 19:31:12,485 : INFO : PROGRESS: at sentence #50000, processed 470873 words and 301142 word types
2025-08-19 19:31:12,524 : INFO : PROGRESS: at sentence #60000, processed 578912 words and 358230 word types
2025-08-19 19:31:12,583 : INFO : PROGRESS: at sentence #70000, processed 697299 words and 416837 word types
2025-08-19 19:31:12,628 : INFO : PROGRESS: at sentence #80000, processed 811332 words and 473364 word types
2025-08-19 19:31:12,680 : INFO : PROGRESS: at sentence #90000, processed 941584 words and 535566 word types
2025-08-19 19:31:12,724 : INFO : PROGRESS: at sentence #100000, processed 1064745 words and 592967 word types
2025-08-19 19:31:12,780 : INFO : PROGRESS: at sentence #110000, processed 1181414 words and 645154 word types
2025-08-19 19:31:12,806 

Updating existing Word2Vec model for 2021


2025-08-19 19:31:23,491 : INFO : PROGRESS: at sentence #250000, processed 2732828 words, keeping 43020 word types
2025-08-19 19:31:23,500 : INFO : PROGRESS: at sentence #260000, processed 2840603 words, keeping 43877 word types
2025-08-19 19:31:23,507 : INFO : PROGRESS: at sentence #270000, processed 2948378 words, keeping 44745 word types
2025-08-19 19:31:23,513 : INFO : PROGRESS: at sentence #280000, processed 3065121 words, keeping 45460 word types
2025-08-19 19:31:23,522 : INFO : PROGRESS: at sentence #290000, processed 3181679 words, keeping 46243 word types
2025-08-19 19:31:23,530 : INFO : PROGRESS: at sentence #300000, processed 3298751 words, keeping 47036 word types
2025-08-19 19:31:23,538 : INFO : PROGRESS: at sentence #310000, processed 3415747 words, keeping 47933 word types
2025-08-19 19:31:23,546 : INFO : PROGRESS: at sentence #320000, processed 3528074 words, keeping 48706 word types
2025-08-19 19:31:23,554 : INFO : PROGRESS: at sentence #330000, processed 3639921 words,

Training model on 1000000 comments


2025-08-19 19:31:25,279 : INFO : EPOCH 0 - PROGRESS: at 31.75% examples, 3226783 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:31:26,280 : INFO : EPOCH 0 - PROGRESS: at 65.13% examples, 3320073 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:31:27,281 : INFO : EPOCH 0 - PROGRESS: at 98.92% examples, 3358472 words/s, in_qsize 11, out_qsize 1
2025-08-19 19:31:27,302 : INFO : EPOCH 0: training on 11013409 raw words (10178801 effective words) took 3.0s, 3367459 effective words/s
2025-08-19 19:31:28,323 : INFO : EPOCH 1 - PROGRESS: at 32.29% examples, 3275303 words/s, in_qsize 32, out_qsize 0
2025-08-19 19:31:29,325 : INFO : EPOCH 1 - PROGRESS: at 65.53% examples, 3334368 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:31:30,326 : INFO : EPOCH 1 - PROGRESS: at 99.16% examples, 3360234 words/s, in_qsize 9, out_qsize 1
2025-08-19 19:31:30,329 : INFO : EPOCH 1: training on 11013409 raw words (10179215 effective words) took 3.0s, 3372579 effective words/s
2025-08-19 19:31:31,357 : INFO : EPOC

Model saved to models/yearly_models/interim/conservative_2021_interim.model
Processing processed_comments/conservative\conservative_batch13.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch13.pkl
Processing processed_comments/conservative\conservative_batch14.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch14.pkl


2025-08-19 19:31:50,406 : INFO : collecting all words and their counts
2025-08-19 19:31:50,406 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 19:31:50,448 : INFO : PROGRESS: at sentence #10000, processed 100822 words and 84149 word types
2025-08-19 19:31:50,494 : INFO : PROGRESS: at sentence #20000, processed 208686 words and 156427 word types


Processing chunk of 1000000 comments for 2022
Building bigram model for 2022 with 1000000 comments...
Creating new bigram model for 2022 (cannot update Phraser objects)


2025-08-19 19:31:50,544 : INFO : PROGRESS: at sentence #30000, processed 328144 words and 229080 word types
2025-08-19 19:31:50,592 : INFO : PROGRESS: at sentence #40000, processed 442567 words and 293678 word types
2025-08-19 19:31:50,634 : INFO : PROGRESS: at sentence #50000, processed 546123 words and 348907 word types
2025-08-19 19:31:50,687 : INFO : PROGRESS: at sentence #60000, processed 663981 words and 409719 word types
2025-08-19 19:31:50,723 : INFO : PROGRESS: at sentence #70000, processed 775831 words and 464531 word types
2025-08-19 19:31:50,784 : INFO : PROGRESS: at sentence #80000, processed 898654 words and 522666 word types
2025-08-19 19:31:50,842 : INFO : PROGRESS: at sentence #90000, processed 1014183 words and 576513 word types
2025-08-19 19:31:50,903 : INFO : PROGRESS: at sentence #100000, processed 1142894 words and 633321 word types
2025-08-19 19:31:50,966 : INFO : PROGRESS: at sentence #110000, processed 1270902 words and 688948 word types
2025-08-19 19:31:51,025

Updating existing Word2Vec model for 2022


2025-08-19 19:32:05,475 : INFO : PROGRESS: at sentence #240000, processed 2770143 words, keeping 45121 word types
2025-08-19 19:32:05,483 : INFO : PROGRESS: at sentence #250000, processed 2881385 words, keeping 45949 word types
2025-08-19 19:32:05,491 : INFO : PROGRESS: at sentence #260000, processed 2991070 words, keeping 46829 word types
2025-08-19 19:32:05,498 : INFO : PROGRESS: at sentence #270000, processed 3102240 words, keeping 47845 word types
2025-08-19 19:32:05,506 : INFO : PROGRESS: at sentence #280000, processed 3215068 words, keeping 48685 word types
2025-08-19 19:32:05,513 : INFO : PROGRESS: at sentence #290000, processed 3320740 words, keeping 49492 word types
2025-08-19 19:32:05,521 : INFO : PROGRESS: at sentence #300000, processed 3427203 words, keeping 50162 word types
2025-08-19 19:32:05,528 : INFO : PROGRESS: at sentence #310000, processed 3531463 words, keeping 51023 word types
2025-08-19 19:32:05,535 : INFO : PROGRESS: at sentence #320000, processed 3628953 words,

Training model on 1000000 comments


2025-08-19 19:32:07,267 : INFO : EPOCH 0 - PROGRESS: at 29.02% examples, 3077942 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:32:08,272 : INFO : EPOCH 0 - PROGRESS: at 60.27% examples, 3170988 words/s, in_qsize 32, out_qsize 0
2025-08-19 19:32:09,276 : INFO : EPOCH 0 - PROGRESS: at 89.90% examples, 3115799 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:32:09,664 : INFO : EPOCH 0: training on 11203339 raw words (10369847 effective words) took 3.4s, 3052079 effective words/s
2025-08-19 19:32:10,685 : INFO : EPOCH 1 - PROGRESS: at 20.85% examples, 2194033 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:32:11,688 : INFO : EPOCH 1 - PROGRESS: at 46.24% examples, 2405657 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:32:12,692 : INFO : EPOCH 1 - PROGRESS: at 70.07% examples, 2453320 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:32:13,698 : INFO : EPOCH 1 - PROGRESS: at 95.91% examples, 2475564 words/s, in_qsize 32, out_qsize 0
2025-08-19 19:32:13,842 : INFO : EPOCH 1: training on 1120

Model saved to models/yearly_models/interim/conservative_2022_interim.model
Processing processed_comments/conservative\conservative_batch15.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch15.pkl


2025-08-19 19:32:29,536 : INFO : collecting all words and their counts
2025-08-19 19:32:29,536 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 19:32:29,591 : INFO : PROGRESS: at sentence #10000, processed 108250 words and 87225 word types
2025-08-19 19:32:29,646 : INFO : PROGRESS: at sentence #20000, processed 219865 words and 157825 word types


Processing chunk of 1000000 comments for 2022
Building bigram model for 2022 with 1000000 comments...
Creating new bigram model for 2022 (cannot update Phraser objects)


2025-08-19 19:32:29,708 : INFO : PROGRESS: at sentence #30000, processed 333493 words and 224586 word types
2025-08-19 19:32:29,767 : INFO : PROGRESS: at sentence #40000, processed 444841 words and 286159 word types
2025-08-19 19:32:29,825 : INFO : PROGRESS: at sentence #50000, processed 553921 words and 344982 word types
2025-08-19 19:32:29,893 : INFO : PROGRESS: at sentence #60000, processed 672975 words and 406878 word types
2025-08-19 19:32:29,954 : INFO : PROGRESS: at sentence #70000, processed 781998 words and 460849 word types
2025-08-19 19:32:30,007 : INFO : PROGRESS: at sentence #80000, processed 885133 words and 509981 word types
2025-08-19 19:32:30,051 : INFO : PROGRESS: at sentence #90000, processed 970193 words and 548879 word types
2025-08-19 19:32:30,108 : INFO : PROGRESS: at sentence #100000, processed 1074195 words and 596278 word types
2025-08-19 19:32:30,157 : INFO : PROGRESS: at sentence #110000, processed 1176817 words and 641751 word types
2025-08-19 19:32:30,228 

Updating existing Word2Vec model for 2022


2025-08-19 19:32:44,952 : INFO : PROGRESS: at sentence #200000, processed 2045352 words, keeping 38158 word types
2025-08-19 19:32:44,960 : INFO : PROGRESS: at sentence #210000, processed 2142971 words, keeping 38913 word types
2025-08-19 19:32:44,969 : INFO : PROGRESS: at sentence #220000, processed 2235171 words, keeping 39751 word types
2025-08-19 19:32:44,979 : INFO : PROGRESS: at sentence #230000, processed 2342141 words, keeping 40638 word types
2025-08-19 19:32:44,991 : INFO : PROGRESS: at sentence #240000, processed 2448869 words, keeping 41493 word types
2025-08-19 19:32:44,999 : INFO : PROGRESS: at sentence #250000, processed 2550033 words, keeping 42283 word types
2025-08-19 19:32:45,007 : INFO : PROGRESS: at sentence #260000, processed 2651222 words, keeping 43090 word types
2025-08-19 19:32:45,019 : INFO : PROGRESS: at sentence #270000, processed 2746973 words, keeping 43826 word types
2025-08-19 19:32:45,028 : INFO : PROGRESS: at sentence #280000, processed 2851030 words,

Training model on 1000000 comments


2025-08-19 19:32:46,886 : INFO : EPOCH 0 - PROGRESS: at 31.64% examples, 2947676 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:32:47,892 : INFO : EPOCH 0 - PROGRESS: at 64.18% examples, 2981873 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:32:48,861 : INFO : EPOCH 0: training on 9727793 raw words (8898118 effective words) took 3.0s, 2989063 effective words/s
2025-08-19 19:32:49,877 : INFO : EPOCH 1 - PROGRESS: at 30.04% examples, 2800042 words/s, in_qsize 32, out_qsize 2
2025-08-19 19:32:50,879 : INFO : EPOCH 1 - PROGRESS: at 62.44% examples, 2900180 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:32:51,864 : INFO : EPOCH 1: training on 9727793 raw words (8897808 effective words) took 3.0s, 2976549 effective words/s
2025-08-19 19:32:52,880 : INFO : EPOCH 2 - PROGRESS: at 31.24% examples, 2907209 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:32:53,884 : INFO : EPOCH 2 - PROGRESS: at 63.27% examples, 2937033 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:32:54,868 : INFO : EPOCH 2

Model saved to models/yearly_models/interim/conservative_2022_interim.model
Processing processed_comments/conservative\conservative_batch16.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch16.pkl


2025-08-19 19:33:05,386 : INFO : collecting all words and their counts
2025-08-19 19:33:05,387 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 19:33:05,436 : INFO : PROGRESS: at sentence #10000, processed 104297 words and 86172 word types
2025-08-19 19:33:05,487 : INFO : PROGRESS: at sentence #20000, processed 207212 words and 151790 word types


Processing chunk of 1000000 comments for 2023
Building bigram model for 2023 with 1000000 comments...
Creating new bigram model for 2023 (cannot update Phraser objects)


2025-08-19 19:33:05,538 : INFO : PROGRESS: at sentence #30000, processed 309901 words and 211134 word types
2025-08-19 19:33:05,591 : INFO : PROGRESS: at sentence #40000, processed 410762 words and 268550 word types
2025-08-19 19:33:05,647 : INFO : PROGRESS: at sentence #50000, processed 516631 words and 329205 word types
2025-08-19 19:33:05,702 : INFO : PROGRESS: at sentence #60000, processed 616199 words and 381179 word types
2025-08-19 19:33:05,754 : INFO : PROGRESS: at sentence #70000, processed 722951 words and 434581 word types
2025-08-19 19:33:05,811 : INFO : PROGRESS: at sentence #80000, processed 837949 words and 492432 word types
2025-08-19 19:33:05,866 : INFO : PROGRESS: at sentence #90000, processed 939384 words and 542457 word types
2025-08-19 19:33:05,914 : INFO : PROGRESS: at sentence #100000, processed 1034177 words and 587280 word types
2025-08-19 19:33:05,982 : INFO : PROGRESS: at sentence #110000, processed 1155710 words and 643535 word types
2025-08-19 19:33:06,040 

Updating existing Word2Vec model for 2023


2025-08-19 19:33:20,554 : INFO : PROGRESS: at sentence #190000, processed 1993429 words, keeping 38589 word types
2025-08-19 19:33:20,565 : INFO : PROGRESS: at sentence #200000, processed 2087910 words, keeping 39401 word types
2025-08-19 19:33:20,576 : INFO : PROGRESS: at sentence #210000, processed 2185133 words, keeping 40159 word types
2025-08-19 19:33:20,586 : INFO : PROGRESS: at sentence #220000, processed 2288918 words, keeping 40939 word types
2025-08-19 19:33:20,599 : INFO : PROGRESS: at sentence #230000, processed 2403559 words, keeping 41890 word types
2025-08-19 19:33:20,611 : INFO : PROGRESS: at sentence #240000, processed 2521093 words, keeping 42697 word types
2025-08-19 19:33:20,621 : INFO : PROGRESS: at sentence #250000, processed 2627194 words, keeping 43525 word types
2025-08-19 19:33:20,632 : INFO : PROGRESS: at sentence #260000, processed 2731816 words, keeping 44319 word types
2025-08-19 19:33:20,642 : INFO : PROGRESS: at sentence #270000, processed 2836618 words,

Training model on 1000000 comments


2025-08-19 19:33:22,533 : INFO : EPOCH 0 - PROGRESS: at 32.05% examples, 3095220 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:33:23,532 : INFO : EPOCH 0 - PROGRESS: at 68.50% examples, 3150684 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:33:24,412 : INFO : EPOCH 0: training on 10112059 raw words (9256843 effective words) took 2.9s, 3212818 effective words/s
2025-08-19 19:33:25,426 : INFO : EPOCH 1 - PROGRESS: at 32.44% examples, 3133997 words/s, in_qsize 31, out_qsize 1
2025-08-19 19:33:26,429 : INFO : EPOCH 1 - PROGRESS: at 69.91% examples, 3210703 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:33:27,285 : INFO : EPOCH 1: training on 10112059 raw words (9255307 effective words) took 2.9s, 3236925 effective words/s
2025-08-19 19:33:28,299 : INFO : EPOCH 2 - PROGRESS: at 32.54% examples, 3142151 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:33:29,298 : INFO : EPOCH 2 - PROGRESS: at 69.21% examples, 3183115 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:33:30,162 : INFO : EPOCH

Model saved to models/yearly_models/interim/conservative_2023_interim.model
Processing processed_comments/conservative\conservative_batch17.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch17.pkl
Processing processed_comments/conservative\conservative_batch18.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch18.pkl
Processing processed_comments/conservative\conservative_batch19.pkl
Loaded 788076 comments from processed_comments/conservative\conservative_batch19.pkl
Processing processed_comments/conservative\conservative_batch2.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch2.pkl
Processing processed_comments/conservative\conservative_batch3.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch3.pkl
Processing processed_comments/conservative\conservative_batch4.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch4.pkl
Process

2025-08-19 19:34:14,410 : INFO : collecting all words and their counts
2025-08-19 19:34:14,410 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 19:34:14,468 : INFO : PROGRESS: at sentence #10000, processed 133643 words and 108421 word types
2025-08-19 19:34:14,529 : INFO : PROGRESS: at sentence #20000, processed 265896 words and 193938 word types
2025-08-19 19:34:14,584 : INFO : PROGRESS: at sentence #30000, processed 394773 words and 267706 word types


Creating new bigram model for 2019 (cannot update Phraser objects)


2025-08-19 19:34:14,645 : INFO : PROGRESS: at sentence #40000, processed 529398 words and 343709 word types
2025-08-19 19:34:14,707 : INFO : PROGRESS: at sentence #50000, processed 654939 words and 408300 word types
2025-08-19 19:34:14,770 : INFO : PROGRESS: at sentence #60000, processed 792681 words and 478557 word types
2025-08-19 19:34:14,823 : INFO : PROGRESS: at sentence #70000, processed 909700 words and 533571 word types
2025-08-19 19:34:14,895 : INFO : PROGRESS: at sentence #80000, processed 1061992 words and 606078 word types
2025-08-19 19:34:14,959 : INFO : PROGRESS: at sentence #90000, processed 1192986 words and 665967 word types
2025-08-19 19:34:15,033 : INFO : PROGRESS: at sentence #100000, processed 1323433 words and 725289 word types
2025-08-19 19:34:15,096 : INFO : PROGRESS: at sentence #110000, processed 1461878 words and 786690 word types
2025-08-19 19:34:15,160 : INFO : PROGRESS: at sentence #120000, processed 1593707 words and 844117 word types
2025-08-19 19:34:15,

Updating existing Word2Vec model for 2019


2025-08-19 19:34:29,752 : INFO : PROGRESS: at sentence #170000, processed 2255436 words, keeping 41110 word types
2025-08-19 19:34:29,764 : INFO : PROGRESS: at sentence #180000, processed 2389282 words, keeping 42262 word types
2025-08-19 19:34:29,780 : INFO : PROGRESS: at sentence #190000, processed 2521169 words, keeping 43473 word types
2025-08-19 19:34:29,794 : INFO : PROGRESS: at sentence #200000, processed 2658382 words, keeping 44618 word types
2025-08-19 19:34:29,806 : INFO : PROGRESS: at sentence #210000, processed 2791288 words, keeping 45664 word types
2025-08-19 19:34:29,818 : INFO : PROGRESS: at sentence #220000, processed 2928716 words, keeping 46713 word types
2025-08-19 19:34:29,832 : INFO : PROGRESS: at sentence #230000, processed 3069000 words, keeping 47708 word types
2025-08-19 19:34:29,843 : INFO : PROGRESS: at sentence #240000, processed 3202609 words, keeping 48675 word types
2025-08-19 19:34:29,859 : INFO : PROGRESS: at sentence #250000, processed 3344510 words,

Training model on 1000000 comments


2025-08-19 19:34:32,104 : INFO : EPOCH 0 - PROGRESS: at 24.63% examples, 3063005 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:34:33,107 : INFO : EPOCH 0 - PROGRESS: at 48.10% examples, 3084290 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:34:34,106 : INFO : EPOCH 0 - PROGRESS: at 73.83% examples, 3131276 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:34:35,109 : INFO : EPOCH 0 - PROGRESS: at 98.14% examples, 3149632 words/s, in_qsize 24, out_qsize 0
2025-08-19 19:34:35,174 : INFO : EPOCH 0: training on 13785848 raw words (12833496 effective words) took 4.1s, 3153257 effective words/s
2025-08-19 19:34:36,194 : INFO : EPOCH 1 - PROGRESS: at 24.34% examples, 3001659 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:34:37,195 : INFO : EPOCH 1 - PROGRESS: at 48.39% examples, 3091941 words/s, in_qsize 32, out_qsize 0
2025-08-19 19:34:38,197 : INFO : EPOCH 1 - PROGRESS: at 74.25% examples, 3140906 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:34:39,198 : INFO : EPOCH 1 - PROGRESS: at 98

Model saved to models/yearly_models/interim/conservative_2019_interim.model
Processing processed_comments/conservative\conservative_batch5.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch5.pkl


2025-08-19 19:35:01,595 : INFO : collecting all words and their counts
2025-08-19 19:35:01,596 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 19:35:01,654 : INFO : PROGRESS: at sentence #10000, processed 137901 words and 110040 word types


Processing chunk of 1000000 comments for 2020
Building bigram model for 2020 with 1000000 comments...
Creating new bigram model for 2020 (cannot update Phraser objects)


2025-08-19 19:35:01,720 : INFO : PROGRESS: at sentence #20000, processed 282045 words and 200405 word types
2025-08-19 19:35:01,784 : INFO : PROGRESS: at sentence #30000, processed 424808 words and 283003 word types
2025-08-19 19:35:01,847 : INFO : PROGRESS: at sentence #40000, processed 561303 words and 358053 word types
2025-08-19 19:35:01,906 : INFO : PROGRESS: at sentence #50000, processed 695569 words and 430063 word types
2025-08-19 19:35:01,967 : INFO : PROGRESS: at sentence #60000, processed 834911 words and 500959 word types
2025-08-19 19:35:02,028 : INFO : PROGRESS: at sentence #70000, processed 974253 words and 569874 word types
2025-08-19 19:35:02,106 : INFO : PROGRESS: at sentence #80000, processed 1131840 words and 644067 word types
2025-08-19 19:35:02,193 : INFO : PROGRESS: at sentence #90000, processed 1294621 words and 716577 word types
2025-08-19 19:35:02,273 : INFO : PROGRESS: at sentence #100000, processed 1456948 words and 786650 word types
2025-08-19 19:35:02,340 

Updating existing Word2Vec model for 2020


2025-08-19 19:35:18,161 : INFO : PROGRESS: at sentence #170000, processed 2410050 words, keeping 42514 word types
2025-08-19 19:35:18,178 : INFO : PROGRESS: at sentence #180000, processed 2574302 words, keeping 43835 word types
2025-08-19 19:35:18,194 : INFO : PROGRESS: at sentence #190000, processed 2727433 words, keeping 44932 word types
2025-08-19 19:35:18,208 : INFO : PROGRESS: at sentence #200000, processed 2882569 words, keeping 45980 word types
2025-08-19 19:35:18,223 : INFO : PROGRESS: at sentence #210000, processed 3040839 words, keeping 47069 word types
2025-08-19 19:35:18,235 : INFO : PROGRESS: at sentence #220000, processed 3180065 words, keeping 48040 word types
2025-08-19 19:35:18,251 : INFO : PROGRESS: at sentence #230000, processed 3336462 words, keeping 49074 word types
2025-08-19 19:35:18,265 : INFO : PROGRESS: at sentence #240000, processed 3492263 words, keeping 50022 word types
2025-08-19 19:35:18,283 : INFO : PROGRESS: at sentence #250000, processed 3651047 words,

Training model on 1000000 comments


2025-08-19 19:35:20,561 : INFO : EPOCH 0 - PROGRESS: at 23.01% examples, 3093654 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:35:21,563 : INFO : EPOCH 0 - PROGRESS: at 45.39% examples, 3155316 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:35:22,564 : INFO : EPOCH 0 - PROGRESS: at 67.52% examples, 3178762 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:35:23,563 : INFO : EPOCH 0 - PROGRESS: at 89.13% examples, 3183233 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:35:23,998 : INFO : EPOCH 0: training on 15255324 raw words (14206787 effective words) took 4.4s, 3198465 effective words/s
2025-08-19 19:35:25,011 : INFO : EPOCH 1 - PROGRESS: at 23.01% examples, 3102847 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:35:26,012 : INFO : EPOCH 1 - PROGRESS: at 45.13% examples, 3136083 words/s, in_qsize 32, out_qsize 0
2025-08-19 19:35:27,022 : INFO : EPOCH 1 - PROGRESS: at 67.16% examples, 3154260 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:35:28,022 : INFO : EPOCH 1 - PROGRESS: at 88

Model saved to models/yearly_models/interim/conservative_2020_interim.model
Processing processed_comments/conservative\conservative_batch6.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch6.pkl


2025-08-19 19:35:46,394 : INFO : collecting all words and their counts
2025-08-19 19:35:46,395 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 19:35:46,459 : INFO : PROGRESS: at sentence #10000, processed 144054 words and 113841 word types


Processing chunk of 1000000 comments for 2020
Building bigram model for 2020 with 1000000 comments...
Creating new bigram model for 2020 (cannot update Phraser objects)


2025-08-19 19:35:46,532 : INFO : PROGRESS: at sentence #20000, processed 297934 words and 208621 word types
2025-08-19 19:35:46,597 : INFO : PROGRESS: at sentence #30000, processed 438758 words and 287096 word types
2025-08-19 19:35:46,668 : INFO : PROGRESS: at sentence #40000, processed 582316 words and 361538 word types
2025-08-19 19:35:46,738 : INFO : PROGRESS: at sentence #50000, processed 730839 words and 436472 word types
2025-08-19 19:35:46,804 : INFO : PROGRESS: at sentence #60000, processed 868340 words and 501185 word types
2025-08-19 19:35:46,865 : INFO : PROGRESS: at sentence #70000, processed 999713 words and 560766 word types
2025-08-19 19:35:46,928 : INFO : PROGRESS: at sentence #80000, processed 1133332 words and 619319 word types
2025-08-19 19:35:47,002 : INFO : PROGRESS: at sentence #90000, processed 1293343 words and 688069 word types
2025-08-19 19:35:47,090 : INFO : PROGRESS: at sentence #100000, processed 1441989 words and 751515 word types
2025-08-19 19:35:47,165 

Updating existing Word2Vec model for 2020


2025-08-19 19:36:01,884 : INFO : PROGRESS: at sentence #160000, processed 2240440 words, keeping 38823 word types
2025-08-19 19:36:01,896 : INFO : PROGRESS: at sentence #170000, processed 2375823 words, keeping 39750 word types
2025-08-19 19:36:01,909 : INFO : PROGRESS: at sentence #180000, processed 2511935 words, keeping 40795 word types
2025-08-19 19:36:01,928 : INFO : PROGRESS: at sentence #190000, processed 2669512 words, keeping 41859 word types
2025-08-19 19:36:01,942 : INFO : PROGRESS: at sentence #200000, processed 2817014 words, keeping 43079 word types
2025-08-19 19:36:01,958 : INFO : PROGRESS: at sentence #210000, processed 2967375 words, keeping 44212 word types
2025-08-19 19:36:01,971 : INFO : PROGRESS: at sentence #220000, processed 3118241 words, keeping 45259 word types
2025-08-19 19:36:01,986 : INFO : PROGRESS: at sentence #230000, processed 3261191 words, keeping 46326 word types
2025-08-19 19:36:02,001 : INFO : PROGRESS: at sentence #240000, processed 3410958 words,

Training model on 1000000 comments


2025-08-19 19:36:04,213 : INFO : EPOCH 0 - PROGRESS: at 22.39% examples, 2950738 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:36:05,213 : INFO : EPOCH 0 - PROGRESS: at 45.72% examples, 3074297 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:36:06,214 : INFO : EPOCH 0 - PROGRESS: at 70.71% examples, 3135286 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:36:07,217 : INFO : EPOCH 0 - PROGRESS: at 94.16% examples, 3102083 words/s, in_qsize 30, out_qsize 1
2025-08-19 19:36:07,489 : INFO : EPOCH 0: training on 14170649 raw words (13168841 effective words) took 4.3s, 3078175 effective words/s
2025-08-19 19:36:08,505 : INFO : EPOCH 1 - PROGRESS: at 21.82% examples, 2861432 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:36:09,511 : INFO : EPOCH 1 - PROGRESS: at 44.93% examples, 3006909 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:36:10,513 : INFO : EPOCH 1 - PROGRESS: at 68.67% examples, 3044707 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:36:11,513 : INFO : EPOCH 1 - PROGRESS: at 92

Model saved to models/yearly_models/interim/conservative_2020_interim.model
Processing processed_comments/conservative\conservative_batch7.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch7.pkl


2025-08-19 19:36:34,984 : INFO : collecting all words and their counts
2025-08-19 19:36:34,985 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 19:36:35,038 : INFO : PROGRESS: at sentence #10000, processed 132350 words and 106459 word types


Processing chunk of 1000000 comments for 2020
Building bigram model for 2020 with 1000000 comments...
Creating new bigram model for 2020 (cannot update Phraser objects)


2025-08-19 19:36:35,107 : INFO : PROGRESS: at sentence #20000, processed 274395 words and 196232 word types
2025-08-19 19:36:35,170 : INFO : PROGRESS: at sentence #30000, processed 412304 words and 275072 word types
2025-08-19 19:36:35,247 : INFO : PROGRESS: at sentence #40000, processed 556897 words and 352157 word types
2025-08-19 19:36:35,307 : INFO : PROGRESS: at sentence #50000, processed 695859 words and 420461 word types
2025-08-19 19:36:35,375 : INFO : PROGRESS: at sentence #60000, processed 836847 words and 488439 word types
2025-08-19 19:36:35,429 : INFO : PROGRESS: at sentence #70000, processed 957370 words and 543899 word types
2025-08-19 19:36:35,482 : INFO : PROGRESS: at sentence #80000, processed 1068686 words and 593486 word types
2025-08-19 19:36:35,543 : INFO : PROGRESS: at sentence #90000, processed 1200271 words and 652351 word types
2025-08-19 19:36:35,613 : INFO : PROGRESS: at sentence #100000, processed 1313128 words and 699516 word types
2025-08-19 19:36:35,671 

Updating existing Word2Vec model for 2020


2025-08-19 19:36:48,420 : INFO : PROGRESS: at sentence #170000, processed 2242710 words, keeping 39680 word types
2025-08-19 19:36:48,435 : INFO : PROGRESS: at sentence #180000, processed 2379272 words, keeping 40726 word types
2025-08-19 19:36:48,448 : INFO : PROGRESS: at sentence #190000, processed 2498620 words, keeping 41363 word types
2025-08-19 19:36:48,459 : INFO : PROGRESS: at sentence #200000, processed 2621420 words, keeping 42070 word types
2025-08-19 19:36:48,472 : INFO : PROGRESS: at sentence #210000, processed 2745740 words, keeping 42866 word types
2025-08-19 19:36:48,486 : INFO : PROGRESS: at sentence #220000, processed 2889948 words, keeping 43718 word types
2025-08-19 19:36:48,501 : INFO : PROGRESS: at sentence #230000, processed 3034520 words, keeping 44778 word types
2025-08-19 19:36:48,515 : INFO : PROGRESS: at sentence #240000, processed 3162596 words, keeping 45508 word types
2025-08-19 19:36:48,529 : INFO : PROGRESS: at sentence #250000, processed 3297200 words,

Training model on 1000000 comments


2025-08-19 19:36:50,612 : INFO : EPOCH 0 - PROGRESS: at 25.84% examples, 3146547 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:36:51,614 : INFO : EPOCH 0 - PROGRESS: at 56.14% examples, 3217015 words/s, in_qsize 30, out_qsize 1
2025-08-19 19:36:52,616 : INFO : EPOCH 0 - PROGRESS: at 85.18% examples, 3194984 words/s, in_qsize 32, out_qsize 1
2025-08-19 19:36:53,111 : INFO : EPOCH 0: training on 12267315 raw words (11276845 effective words) took 3.5s, 3220528 effective words/s
2025-08-19 19:36:54,121 : INFO : EPOCH 1 - PROGRESS: at 25.76% examples, 3142281 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:36:55,123 : INFO : EPOCH 1 - PROGRESS: at 56.44% examples, 3240118 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:36:56,124 : INFO : EPOCH 1 - PROGRESS: at 86.70% examples, 3261028 words/s, in_qsize 32, out_qsize 0
2025-08-19 19:36:56,569 : INFO : EPOCH 1: training on 12267315 raw words (11275338 effective words) took 3.4s, 3270402 effective words/s
2025-08-19 19:36:57,580 : INFO : EPO

Model saved to models/yearly_models/interim/conservative_2020_interim.model
Processing processed_comments/conservative\conservative_batch8.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch8.pkl
Processing chunk of 1000000 comments for 2020
Building bigram model for 2020 with 1000000 comments...


2025-08-19 19:37:17,021 : INFO : collecting all words and their counts
2025-08-19 19:37:17,021 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 19:37:17,071 : INFO : PROGRESS: at sentence #10000, processed 116457 words and 91204 word types
2025-08-19 19:37:17,112 : INFO : PROGRESS: at sentence #20000, processed 213097 words and 148542 word types
2025-08-19 19:37:17,143 : INFO : PROGRESS: at sentence #30000, processed 277182 words and 177530 word types
2025-08-19 19:37:17,171 : INFO : PROGRESS: at sentence #40000, processed 338279 words and 203257 word types
2025-08-19 19:37:17,197 : INFO : PROGRESS: at sentence #50000, processed 404392 words and 227966 word types


Creating new bigram model for 2020 (cannot update Phraser objects)


2025-08-19 19:37:17,224 : INFO : PROGRESS: at sentence #60000, processed 471180 words and 252975 word types
2025-08-19 19:37:17,262 : INFO : PROGRESS: at sentence #70000, processed 552526 words and 282810 word types
2025-08-19 19:37:17,286 : INFO : PROGRESS: at sentence #80000, processed 611956 words and 305176 word types
2025-08-19 19:37:17,304 : INFO : PROGRESS: at sentence #90000, processed 650646 words and 318357 word types
2025-08-19 19:37:17,329 : INFO : PROGRESS: at sentence #100000, processed 698561 words and 336115 word types
2025-08-19 19:37:17,354 : INFO : PROGRESS: at sentence #110000, processed 743677 words and 352292 word types
2025-08-19 19:37:17,372 : INFO : PROGRESS: at sentence #120000, processed 783802 words and 365618 word types
2025-08-19 19:37:17,401 : INFO : PROGRESS: at sentence #130000, processed 841598 words and 386328 word types
2025-08-19 19:37:17,430 : INFO : PROGRESS: at sentence #140000, processed 909795 words and 411275 word types
2025-08-19 19:37:17,457

Updating existing Word2Vec model for 2020


2025-08-19 19:37:28,487 : INFO : PROGRESS: at sentence #290000, processed 1928475 words, keeping 31464 word types
2025-08-19 19:37:28,493 : INFO : PROGRESS: at sentence #300000, processed 2008709 words, keeping 32097 word types
2025-08-19 19:37:28,506 : INFO : PROGRESS: at sentence #310000, processed 2112103 words, keeping 32882 word types
2025-08-19 19:37:28,516 : INFO : PROGRESS: at sentence #320000, processed 2201639 words, keeping 33590 word types
2025-08-19 19:37:28,526 : INFO : PROGRESS: at sentence #330000, processed 2292130 words, keeping 34265 word types
2025-08-19 19:37:28,536 : INFO : PROGRESS: at sentence #340000, processed 2392415 words, keeping 35003 word types
2025-08-19 19:37:28,548 : INFO : PROGRESS: at sentence #350000, processed 2487485 words, keeping 35890 word types
2025-08-19 19:37:28,555 : INFO : PROGRESS: at sentence #360000, processed 2567546 words, keeping 36478 word types
2025-08-19 19:37:28,562 : INFO : PROGRESS: at sentence #370000, processed 2636624 words,

Training model on 1000000 comments


2025-08-19 19:37:30,550 : INFO : EPOCH 0 - PROGRESS: at 42.42% examples, 2838154 words/s, in_qsize 32, out_qsize 0
2025-08-19 19:37:31,550 : INFO : EPOCH 0 - PROGRESS: at 69.33% examples, 3058648 words/s, in_qsize 32, out_qsize 0
2025-08-19 19:37:32,556 : INFO : EPOCH 0 - PROGRESS: at 97.71% examples, 3110674 words/s, in_qsize 27, out_qsize 0
2025-08-19 19:37:32,616 : INFO : EPOCH 0: training on 10596030 raw words (9591163 effective words) took 3.1s, 3126613 effective words/s
2025-08-19 19:37:33,631 : INFO : EPOCH 1 - PROGRESS: at 43.27% examples, 2941791 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:37:34,632 : INFO : EPOCH 1 - PROGRESS: at 70.15% examples, 3100370 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:37:35,633 : INFO : EPOCH 1 - PROGRESS: at 98.48% examples, 3143499 words/s, in_qsize 17, out_qsize 0
2025-08-19 19:37:35,669 : INFO : EPOCH 1: training on 10596030 raw words (9590550 effective words) took 3.0s, 3153652 effective words/s
2025-08-19 19:37:36,685 : INFO : EPOCH

Model saved to models/yearly_models/interim/conservative_2020_interim.model
Processing processed_comments/conservative\conservative_batch9.pkl
Loaded 1000000 comments from processed_comments/conservative\conservative_batch9.pkl


2025-08-19 19:37:48,744 : INFO : collecting all words and their counts
2025-08-19 19:37:48,748 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 19:37:48,857 : INFO : PROGRESS: at sentence #10000, processed 251847 words and 182759 word types



=== Comment Counts by Year ===
2011: 45332 comments
2012: 255920 comments
2013: 188573 comments
2014: 187077 comments
2015: 254462 comments
2016: 539987 comments
2017: 644284 comments
2018: 761577 comments
2019: 1025755 comments
2020: 4243616 comments
2021: 3889668 comments
2022: 2930383 comments
2023: 1913895 comments
Skipping final 45332 comments for 2011 (less than minimum required)
Processing final 255920 comments for 2012
Building bigram model for 2012 with 255920 comments...


2025-08-19 19:37:48,961 : INFO : PROGRESS: at sentence #20000, processed 502148 words and 323870 word types
2025-08-19 19:37:49,076 : INFO : PROGRESS: at sentence #30000, processed 750538 words and 445350 word types
2025-08-19 19:37:49,181 : INFO : PROGRESS: at sentence #40000, processed 994854 words and 557325 word types
2025-08-19 19:37:49,283 : INFO : PROGRESS: at sentence #50000, processed 1219161 words and 656904 word types
2025-08-19 19:37:49,400 : INFO : PROGRESS: at sentence #60000, processed 1448079 words and 752568 word types
2025-08-19 19:37:49,514 : INFO : PROGRESS: at sentence #70000, processed 1709691 words and 851564 word types
2025-08-19 19:37:49,621 : INFO : PROGRESS: at sentence #80000, processed 1948646 words and 939432 word types
2025-08-19 19:37:49,736 : INFO : PROGRESS: at sentence #90000, processed 2192914 words and 1026252 word types
2025-08-19 19:37:49,851 : INFO : PROGRESS: at sentence #100000, processed 2439346 words and 1112435 word types
2025-08-19 19:37:49

Creating new Word2Vec model for 2012


2025-08-19 19:37:54,881 : INFO : PROGRESS: at sentence #120000, processed 2853654 words, keeping 39562 word types
2025-08-19 19:37:54,895 : INFO : PROGRESS: at sentence #130000, processed 3060698 words, keeping 40746 word types
2025-08-19 19:37:54,914 : INFO : PROGRESS: at sentence #140000, processed 3274321 words, keeping 42112 word types
2025-08-19 19:37:54,928 : INFO : PROGRESS: at sentence #150000, processed 3467264 words, keeping 43336 word types
2025-08-19 19:37:54,943 : INFO : PROGRESS: at sentence #160000, processed 3671105 words, keeping 44456 word types
2025-08-19 19:37:54,958 : INFO : PROGRESS: at sentence #170000, processed 3867217 words, keeping 45476 word types
2025-08-19 19:37:54,972 : INFO : PROGRESS: at sentence #180000, processed 4055342 words, keeping 46491 word types
2025-08-19 19:37:54,985 : INFO : PROGRESS: at sentence #190000, processed 4280982 words, keeping 47848 word types
2025-08-19 19:37:54,999 : INFO : PROGRESS: at sentence #200000, processed 4448657 words,

Training model on 255920 comments


2025-08-19 19:37:56,203 : INFO : EPOCH 0 - PROGRESS: at 57.85% examples, 3155243 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:37:56,829 : INFO : EPOCH 0: training on 5703063 raw words (5251604 effective words) took 1.6s, 3228904 effective words/s
2025-08-19 19:37:57,839 : INFO : EPOCH 1 - PROGRESS: at 57.66% examples, 3139940 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:37:58,473 : INFO : EPOCH 1: training on 5703063 raw words (5251996 effective words) took 1.6s, 3208451 effective words/s
2025-08-19 19:37:59,485 : INFO : EPOCH 2 - PROGRESS: at 57.26% examples, 3115494 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:38:00,135 : INFO : EPOCH 2: training on 5703063 raw words (5251984 effective words) took 1.7s, 3173888 effective words/s
2025-08-19 19:38:01,145 : INFO : EPOCH 3 - PROGRESS: at 57.44% examples, 3132474 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:38:01,779 : INFO : EPOCH 3: training on 5703063 raw words (5252026 effective words) took 1.6s, 3209507 effective words/s


Model saved to models/yearly_models/conservative_2012.model
Processing final 188573 comments for 2013
Building bigram model for 2013 with 188573 comments...


2025-08-19 19:38:03,740 : INFO : PROGRESS: at sentence #30000, processed 607710 words and 383696 word types
2025-08-19 19:38:03,841 : INFO : PROGRESS: at sentence #40000, processed 802690 words and 482912 word types
2025-08-19 19:38:03,926 : INFO : PROGRESS: at sentence #50000, processed 987911 words and 573937 word types
2025-08-19 19:38:04,006 : INFO : PROGRESS: at sentence #60000, processed 1160841 words and 654678 word types
2025-08-19 19:38:04,129 : INFO : PROGRESS: at sentence #70000, processed 1361387 words and 745411 word types
2025-08-19 19:38:04,221 : INFO : PROGRESS: at sentence #80000, processed 1549387 words and 828406 word types
2025-08-19 19:38:04,322 : INFO : PROGRESS: at sentence #90000, processed 1765646 words and 919839 word types
2025-08-19 19:38:04,415 : INFO : PROGRESS: at sentence #100000, processed 1965763 words and 998053 word types
2025-08-19 19:38:04,514 : INFO : PROGRESS: at sentence #110000, processed 2177615 words and 1080902 word types
2025-08-19 19:38:04

Creating new Word2Vec model for 2013


2025-08-19 19:38:07,654 : INFO : PROGRESS: at sentence #140000, processed 2745616 words, keeping 43005 word types
2025-08-19 19:38:07,670 : INFO : PROGRESS: at sentence #150000, processed 2933468 words, keeping 44105 word types
2025-08-19 19:38:07,685 : INFO : PROGRESS: at sentence #160000, processed 3131118 words, keeping 45310 word types
2025-08-19 19:38:07,699 : INFO : PROGRESS: at sentence #170000, processed 3311555 words, keeping 46608 word types
2025-08-19 19:38:07,711 : INFO : PROGRESS: at sentence #180000, processed 3488534 words, keeping 47721 word types
2025-08-19 19:38:07,724 : INFO : collected 48604 word types from a corpus of 3641079 raw words and 188573 sentences
2025-08-19 19:38:07,724 : INFO : Creating a fresh vocabulary
2025-08-19 19:38:07,747 : INFO : Word2Vec lifecycle event {'msg': 'effective_min_count=10 retains 13229 unique words (27.22% of original 48604, drops 35375)', 'datetime': '2025-08-19T19:38:07.747413', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:d

Training model on 188573 comments


2025-08-19 19:38:08,840 : INFO : EPOCH 0 - PROGRESS: at 93.91% examples, 3159566 words/s, in_qsize 21, out_qsize 0
2025-08-19 19:38:08,895 : INFO : EPOCH 0: training on 3641079 raw words (3354584 effective words) took 1.1s, 3173624 effective words/s
2025-08-19 19:38:09,907 : INFO : EPOCH 1 - PROGRESS: at 93.55% examples, 3151706 words/s, in_qsize 22, out_qsize 0
2025-08-19 19:38:09,959 : INFO : EPOCH 1: training on 3641079 raw words (3354306 effective words) took 1.1s, 3183529 effective words/s
2025-08-19 19:38:10,968 : INFO : EPOCH 2 - PROGRESS: at 93.61% examples, 3157048 words/s, in_qsize 22, out_qsize 0
2025-08-19 19:38:11,028 : INFO : EPOCH 2: training on 3641079 raw words (3354882 effective words) took 1.1s, 3162916 effective words/s
2025-08-19 19:38:12,043 : INFO : EPOCH 3 - PROGRESS: at 93.25% examples, 3143640 words/s, in_qsize 23, out_qsize 0
2025-08-19 19:38:12,102 : INFO : EPOCH 3: training on 3641079 raw words (3354637 effective words) took 1.1s, 3164350 effective words/s


Model saved to models/yearly_models/conservative_2013.model
Processing final 187077 comments for 2014
Building bigram model for 2014 with 187077 comments...


2025-08-19 19:38:13,468 : INFO : PROGRESS: at sentence #30000, processed 559868 words and 360577 word types
2025-08-19 19:38:13,563 : INFO : PROGRESS: at sentence #40000, processed 766041 words and 467464 word types
2025-08-19 19:38:13,653 : INFO : PROGRESS: at sentence #50000, processed 963296 words and 564348 word types
2025-08-19 19:38:13,754 : INFO : PROGRESS: at sentence #60000, processed 1178645 words and 662983 word types
2025-08-19 19:38:13,868 : INFO : PROGRESS: at sentence #70000, processed 1376706 words and 750556 word types
2025-08-19 19:38:13,960 : INFO : PROGRESS: at sentence #80000, processed 1564084 words and 832152 word types
2025-08-19 19:38:14,044 : INFO : PROGRESS: at sentence #90000, processed 1741095 words and 906910 word types
2025-08-19 19:38:14,134 : INFO : PROGRESS: at sentence #100000, processed 1932740 words and 987039 word types
2025-08-19 19:38:14,232 : INFO : PROGRESS: at sentence #110000, processed 2120796 words and 1062044 word types
2025-08-19 19:38:14

Creating new Word2Vec model for 2014


2025-08-19 19:38:17,387 : INFO : PROGRESS: at sentence #130000, processed 2479042 words, keeping 41150 word types
2025-08-19 19:38:17,405 : INFO : PROGRESS: at sentence #140000, processed 2684512 words, keeping 42691 word types
2025-08-19 19:38:17,420 : INFO : PROGRESS: at sentence #150000, processed 2877286 words, keeping 44116 word types
2025-08-19 19:38:17,434 : INFO : PROGRESS: at sentence #160000, processed 3066997 words, keeping 45593 word types
2025-08-19 19:38:17,446 : INFO : PROGRESS: at sentence #170000, processed 3241174 words, keeping 47251 word types
2025-08-19 19:38:17,460 : INFO : PROGRESS: at sentence #180000, processed 3429683 words, keeping 48549 word types
2025-08-19 19:38:17,471 : INFO : collected 49455 word types from a corpus of 3565238 raw words and 187077 sentences
2025-08-19 19:38:17,472 : INFO : Creating a fresh vocabulary
2025-08-19 19:38:17,491 : INFO : Word2Vec lifecycle event {'msg': 'effective_min_count=10 retains 13588 unique words (27.48% of original 49

Training model on 187077 comments


2025-08-19 19:38:18,583 : INFO : EPOCH 0 - PROGRESS: at 94.46% examples, 3098887 words/s, in_qsize 21, out_qsize 0
2025-08-19 19:38:18,636 : INFO : EPOCH 0: training on 3565238 raw words (3293169 effective words) took 1.1s, 3125467 effective words/s
2025-08-19 19:38:19,649 : INFO : EPOCH 1 - PROGRESS: at 88.61% examples, 2919983 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:38:19,750 : INFO : EPOCH 1: training on 3565238 raw words (3293293 effective words) took 1.1s, 2986804 effective words/s
2025-08-19 19:38:20,767 : INFO : EPOCH 2 - PROGRESS: at 94.46% examples, 3077603 words/s, in_qsize 21, out_qsize 0
2025-08-19 19:38:20,817 : INFO : EPOCH 2: training on 3565238 raw words (3292962 effective words) took 1.1s, 3113873 effective words/s
2025-08-19 19:38:21,830 : INFO : EPOCH 3 - PROGRESS: at 95.86% examples, 3151645 words/s, in_qsize 15, out_qsize 1
2025-08-19 19:38:21,865 : INFO : EPOCH 3: training on 3565238 raw words (3293243 effective words) took 1.0s, 3177055 effective words/s


Model saved to models/yearly_models/conservative_2014.model
Processing final 254462 comments for 2015
Building bigram model for 2015 with 254462 comments...


2025-08-19 19:38:23,226 : INFO : PROGRESS: at sentence #30000, processed 597955 words and 399065 word types
2025-08-19 19:38:23,327 : INFO : PROGRESS: at sentence #40000, processed 795707 words and 502521 word types
2025-08-19 19:38:23,428 : INFO : PROGRESS: at sentence #50000, processed 1007711 words and 602848 word types
2025-08-19 19:38:23,521 : INFO : PROGRESS: at sentence #60000, processed 1201400 words and 694236 word types
2025-08-19 19:38:23,643 : INFO : PROGRESS: at sentence #70000, processed 1390310 words and 779956 word types
2025-08-19 19:38:23,727 : INFO : PROGRESS: at sentence #80000, processed 1566480 words and 855892 word types
2025-08-19 19:38:23,815 : INFO : PROGRESS: at sentence #90000, processed 1754027 words and 929997 word types
2025-08-19 19:38:23,901 : INFO : PROGRESS: at sentence #100000, processed 1936565 words and 1001558 word types
2025-08-19 19:38:23,992 : INFO : PROGRESS: at sentence #110000, processed 2121021 words and 1071997 word types
2025-08-19 19:38:

Creating new Word2Vec model for 2015


2025-08-19 19:38:28,167 : INFO : PROGRESS: at sentence #150000, processed 2818534 words, keeping 43341 word types
2025-08-19 19:38:28,183 : INFO : PROGRESS: at sentence #160000, processed 2987995 words, keeping 44413 word types
2025-08-19 19:38:28,199 : INFO : PROGRESS: at sentence #170000, processed 3187945 words, keeping 45507 word types
2025-08-19 19:38:28,212 : INFO : PROGRESS: at sentence #180000, processed 3361357 words, keeping 46441 word types
2025-08-19 19:38:28,226 : INFO : PROGRESS: at sentence #190000, processed 3535848 words, keeping 47320 word types
2025-08-19 19:38:28,235 : INFO : PROGRESS: at sentence #200000, processed 3688261 words, keeping 48225 word types
2025-08-19 19:38:28,244 : INFO : PROGRESS: at sentence #210000, processed 3819041 words, keeping 49011 word types
2025-08-19 19:38:28,256 : INFO : PROGRESS: at sentence #220000, processed 3978742 words, keeping 49876 word types
2025-08-19 19:38:28,269 : INFO : PROGRESS: at sentence #230000, processed 4140289 words,

Training model on 254462 comments


2025-08-19 19:38:29,424 : INFO : EPOCH 0 - PROGRESS: at 72.34% examples, 3179343 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:38:29,708 : INFO : EPOCH 0: training on 4509182 raw words (4186395 effective words) took 1.3s, 3250776 effective words/s
2025-08-19 19:38:30,717 : INFO : EPOCH 1 - PROGRESS: at 71.63% examples, 3161790 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:38:31,013 : INFO : EPOCH 1: training on 4509182 raw words (4187436 effective words) took 1.3s, 3225967 effective words/s
2025-08-19 19:38:32,025 : INFO : EPOCH 2 - PROGRESS: at 72.12% examples, 3174829 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:38:32,330 : INFO : EPOCH 2: training on 4509182 raw words (4186829 effective words) took 1.3s, 3200275 effective words/s
2025-08-19 19:38:33,338 : INFO : EPOCH 3 - PROGRESS: at 71.89% examples, 3173786 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:38:33,638 : INFO : EPOCH 3: training on 4509182 raw words (4186268 effective words) took 1.3s, 3218495 effective words/s


Model saved to models/yearly_models/conservative_2015.model
Processing final 539987 comments for 2016
Building bigram model for 2016 with 539987 comments...


2025-08-19 19:38:35,228 : INFO : PROGRESS: at sentence #30000, processed 482444 words and 318022 word types
2025-08-19 19:38:35,304 : INFO : PROGRESS: at sentence #40000, processed 635672 words and 396507 word types
2025-08-19 19:38:35,378 : INFO : PROGRESS: at sentence #50000, processed 799876 words and 473919 word types
2025-08-19 19:38:35,456 : INFO : PROGRESS: at sentence #60000, processed 971734 words and 552012 word types
2025-08-19 19:38:35,540 : INFO : PROGRESS: at sentence #70000, processed 1144342 words and 625500 word types
2025-08-19 19:38:35,646 : INFO : PROGRESS: at sentence #80000, processed 1309128 words and 692550 word types
2025-08-19 19:38:35,757 : INFO : PROGRESS: at sentence #90000, processed 1493419 words and 761959 word types
2025-08-19 19:38:35,829 : INFO : PROGRESS: at sentence #100000, processed 1641333 words and 815797 word types
2025-08-19 19:38:35,911 : INFO : PROGRESS: at sentence #110000, processed 1806858 words and 874754 word types
2025-08-19 19:38:35,9

Creating new Word2Vec model for 2016


2025-08-19 19:38:51,710 : INFO : PROGRESS: at sentence #150000, processed 2439661 words, keeping 38374 word types
2025-08-19 19:38:51,724 : INFO : PROGRESS: at sentence #160000, processed 2610646 words, keeping 39342 word types
2025-08-19 19:38:51,745 : INFO : PROGRESS: at sentence #170000, processed 2790133 words, keeping 40448 word types
2025-08-19 19:38:51,763 : INFO : PROGRESS: at sentence #180000, processed 2978196 words, keeping 41567 word types
2025-08-19 19:38:51,784 : INFO : PROGRESS: at sentence #190000, processed 3168474 words, keeping 42631 word types
2025-08-19 19:38:51,803 : INFO : PROGRESS: at sentence #200000, processed 3383365 words, keeping 43685 word types
2025-08-19 19:38:51,824 : INFO : PROGRESS: at sentence #210000, processed 3573452 words, keeping 44772 word types
2025-08-19 19:38:51,840 : INFO : PROGRESS: at sentence #220000, processed 3760387 words, keeping 45746 word types
2025-08-19 19:38:51,856 : INFO : PROGRESS: at sentence #230000, processed 3934199 words,

Training model on 539987 comments


2025-08-19 19:38:53,531 : INFO : EPOCH 0 - PROGRESS: at 36.86% examples, 3086816 words/s, in_qsize 32, out_qsize 0
2025-08-19 19:38:54,536 : INFO : EPOCH 0 - PROGRESS: at 71.04% examples, 3152184 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:38:55,383 : INFO : EPOCH 0: training on 9835923 raw words (9072585 effective words) took 2.9s, 3179992 effective words/s
2025-08-19 19:38:56,394 : INFO : EPOCH 1 - PROGRESS: at 37.13% examples, 3113562 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:38:57,395 : INFO : EPOCH 1 - PROGRESS: at 71.23% examples, 3168542 words/s, in_qsize 32, out_qsize 0
2025-08-19 19:38:58,230 : INFO : EPOCH 1: training on 9835923 raw words (9074284 effective words) took 2.8s, 3198689 effective words/s
2025-08-19 19:38:59,243 : INFO : EPOCH 2 - PROGRESS: at 37.23% examples, 3124422 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:39:00,244 : INFO : EPOCH 2 - PROGRESS: at 70.69% examples, 3139351 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:39:01,103 : INFO : EPOCH 2

Model saved to models/yearly_models/conservative_2016.model
Processing final 644284 comments for 2017
Building bigram model for 2017 with 644284 comments...


2025-08-19 19:39:07,101 : INFO : PROGRESS: at sentence #20000, processed 401809 words and 274149 word types
2025-08-19 19:39:07,186 : INFO : PROGRESS: at sentence #30000, processed 575781 words and 369812 word types
2025-08-19 19:39:07,272 : INFO : PROGRESS: at sentence #40000, processed 767536 words and 468505 word types
2025-08-19 19:39:07,355 : INFO : PROGRESS: at sentence #50000, processed 954735 words and 555429 word types
2025-08-19 19:39:07,443 : INFO : PROGRESS: at sentence #60000, processed 1138895 words and 640567 word types
2025-08-19 19:39:07,557 : INFO : PROGRESS: at sentence #70000, processed 1319531 words and 721260 word types
2025-08-19 19:39:07,651 : INFO : PROGRESS: at sentence #80000, processed 1529186 words and 808736 word types
2025-08-19 19:39:07,741 : INFO : PROGRESS: at sentence #90000, processed 1721057 words and 887188 word types
2025-08-19 19:39:07,835 : INFO : PROGRESS: at sentence #100000, processed 1921934 words and 967420 word types
2025-08-19 19:39:07,93

Creating new Word2Vec model for 2017


2025-08-19 19:39:19,532 : INFO : PROGRESS: at sentence #150000, processed 2893505 words, keeping 41925 word types
2025-08-19 19:39:19,545 : INFO : PROGRESS: at sentence #160000, processed 3087479 words, keeping 43181 word types
2025-08-19 19:39:19,561 : INFO : PROGRESS: at sentence #170000, processed 3281639 words, keeping 44433 word types
2025-08-19 19:39:19,579 : INFO : PROGRESS: at sentence #180000, processed 3481399 words, keeping 45699 word types
2025-08-19 19:39:19,599 : INFO : PROGRESS: at sentence #190000, processed 3678694 words, keeping 47012 word types
2025-08-19 19:39:19,613 : INFO : PROGRESS: at sentence #200000, processed 3867159 words, keeping 48046 word types
2025-08-19 19:39:19,630 : INFO : PROGRESS: at sentence #210000, processed 4073034 words, keeping 49190 word types
2025-08-19 19:39:19,644 : INFO : PROGRESS: at sentence #220000, processed 4265699 words, keeping 50231 word types
2025-08-19 19:39:19,657 : INFO : PROGRESS: at sentence #230000, processed 4451006 words,

Training model on 644284 comments


2025-08-19 19:39:21,389 : INFO : EPOCH 0 - PROGRESS: at 26.67% examples, 3096626 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:39:22,392 : INFO : EPOCH 0 - PROGRESS: at 54.06% examples, 3115232 words/s, in_qsize 32, out_qsize 0
2025-08-19 19:39:23,394 : INFO : EPOCH 0 - PROGRESS: at 83.94% examples, 3147498 words/s, in_qsize 30, out_qsize 1
2025-08-19 19:39:23,896 : INFO : EPOCH 0: training on 11898372 raw words (11119333 effective words) took 3.5s, 3169662 effective words/s
2025-08-19 19:39:24,905 : INFO : EPOCH 1 - PROGRESS: at 26.21% examples, 3043557 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:39:25,907 : INFO : EPOCH 1 - PROGRESS: at 54.14% examples, 3121301 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:39:26,908 : INFO : EPOCH 1 - PROGRESS: at 83.84% examples, 3146667 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:39:27,419 : INFO : EPOCH 1: training on 11898372 raw words (11119491 effective words) took 3.5s, 3163903 effective words/s
2025-08-19 19:39:28,427 : INFO : EPO

Model saved to models/yearly_models/conservative_2017.model
Processing final 761577 comments for 2018
Building bigram model for 2018 with 761577 comments...
Creating new bigram model for 2018 (cannot update Phraser objects)


2025-08-19 19:39:38,353 : INFO : PROGRESS: at sentence #30000, processed 523677 words and 343595 word types
2025-08-19 19:39:38,431 : INFO : PROGRESS: at sentence #40000, processed 689932 words and 429024 word types
2025-08-19 19:39:38,497 : INFO : PROGRESS: at sentence #50000, processed 835767 words and 502583 word types
2025-08-19 19:39:38,577 : INFO : PROGRESS: at sentence #60000, processed 1015760 words and 583244 word types
2025-08-19 19:39:38,661 : INFO : PROGRESS: at sentence #70000, processed 1185283 words and 662124 word types
2025-08-19 19:39:38,763 : INFO : PROGRESS: at sentence #80000, processed 1357070 words and 739204 word types
2025-08-19 19:39:38,844 : INFO : PROGRESS: at sentence #90000, processed 1537104 words and 816179 word types
2025-08-19 19:39:38,922 : INFO : PROGRESS: at sentence #100000, processed 1704088 words and 885449 word types
2025-08-19 19:39:39,002 : INFO : PROGRESS: at sentence #110000, processed 1856270 words and 944806 word types
2025-08-19 19:39:39,

Updating existing Word2Vec model for 2018


2025-08-19 19:39:50,592 : INFO : PROGRESS: at sentence #160000, processed 2660509 words, keeping 42268 word types
2025-08-19 19:39:50,609 : INFO : PROGRESS: at sentence #170000, processed 2835795 words, keeping 43614 word types
2025-08-19 19:39:50,625 : INFO : PROGRESS: at sentence #180000, processed 3001759 words, keeping 44870 word types
2025-08-19 19:39:50,637 : INFO : PROGRESS: at sentence #190000, processed 3167716 words, keeping 46075 word types
2025-08-19 19:39:50,653 : INFO : PROGRESS: at sentence #200000, processed 3345462 words, keeping 47378 word types
2025-08-19 19:39:50,663 : INFO : PROGRESS: at sentence #210000, processed 3506100 words, keeping 48519 word types
2025-08-19 19:39:50,675 : INFO : PROGRESS: at sentence #220000, processed 3685267 words, keeping 49789 word types
2025-08-19 19:39:50,690 : INFO : PROGRESS: at sentence #230000, processed 3855466 words, keeping 50998 word types
2025-08-19 19:39:50,704 : INFO : PROGRESS: at sentence #240000, processed 4025162 words,

Training model on 761577 comments


2025-08-19 19:39:52,467 : INFO : EPOCH 0 - PROGRESS: at 26.49% examples, 3120553 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:39:53,469 : INFO : EPOCH 0 - PROGRESS: at 54.97% examples, 3192544 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:39:54,475 : INFO : EPOCH 0 - PROGRESS: at 88.28% examples, 3217350 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:39:54,813 : INFO : EPOCH 0: training on 11636890 raw words (10839714 effective words) took 3.4s, 3231910 effective words/s
2025-08-19 19:39:55,823 : INFO : EPOCH 1 - PROGRESS: at 26.19% examples, 3106644 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:39:56,825 : INFO : EPOCH 1 - PROGRESS: at 54.26% examples, 3166614 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:39:57,829 : INFO : EPOCH 1 - PROGRESS: at 87.60% examples, 3205593 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:39:58,187 : INFO : EPOCH 1: training on 11636890 raw words (10839038 effective words) took 3.4s, 3220670 effective words/s
2025-08-19 19:39:59,203 : INFO : EPO

Model saved to models/yearly_models/conservative_2018.model
Skipping final 25755 comments for 2019 (less than minimum required)
Processing final 243616 comments for 2020
Building bigram model for 2020 with 243616 comments...
Creating new bigram model for 2020 (cannot update Phraser objects)


2025-08-19 19:40:08,631 : INFO : PROGRESS: at sentence #40000, processed 470801 words and 305270 word types
2025-08-19 19:40:08,707 : INFO : PROGRESS: at sentence #50000, processed 615223 words and 380870 word types
2025-08-19 19:40:08,768 : INFO : PROGRESS: at sentence #60000, processed 754267 words and 450427 word types
2025-08-19 19:40:08,826 : INFO : PROGRESS: at sentence #70000, processed 876303 words and 509145 word types
2025-08-19 19:40:08,884 : INFO : PROGRESS: at sentence #80000, processed 997306 words and 564795 word types
2025-08-19 19:40:08,943 : INFO : PROGRESS: at sentence #90000, processed 1124738 words and 621351 word types
2025-08-19 19:40:09,018 : INFO : PROGRESS: at sentence #100000, processed 1252388 words and 674413 word types
2025-08-19 19:40:09,093 : INFO : PROGRESS: at sentence #110000, processed 1362508 words and 719370 word types
2025-08-19 19:40:09,156 : INFO : PROGRESS: at sentence #120000, processed 1491836 words and 768939 word types
2025-08-19 19:40:09,2

Updating existing Word2Vec model for 2020


2025-08-19 19:40:11,938 : INFO : PROGRESS: at sentence #160000, processed 1956829 words, keeping 35195 word types
2025-08-19 19:40:11,952 : INFO : PROGRESS: at sentence #170000, processed 2088498 words, keeping 36196 word types
2025-08-19 19:40:11,965 : INFO : PROGRESS: at sentence #180000, processed 2223578 words, keeping 37268 word types
2025-08-19 19:40:11,980 : INFO : PROGRESS: at sentence #190000, processed 2360565 words, keeping 38215 word types
2025-08-19 19:40:11,992 : INFO : PROGRESS: at sentence #200000, processed 2488789 words, keeping 39111 word types
2025-08-19 19:40:12,007 : INFO : PROGRESS: at sentence #210000, processed 2622298 words, keeping 40023 word types
2025-08-19 19:40:12,019 : INFO : PROGRESS: at sentence #220000, processed 2742069 words, keeping 40767 word types
2025-08-19 19:40:12,027 : INFO : PROGRESS: at sentence #230000, processed 2833844 words, keeping 41323 word types
2025-08-19 19:40:12,034 : INFO : PROGRESS: at sentence #240000, processed 2912802 words,

Training model on 243616 comments


2025-08-19 19:40:13,099 : INFO : EPOCH 0: training on 2940124 raw words (2690199 effective words) took 0.9s, 3157324 effective words/s
2025-08-19 19:40:13,980 : INFO : EPOCH 1: training on 2940124 raw words (2690534 effective words) took 0.9s, 3119278 effective words/s
2025-08-19 19:40:14,864 : INFO : EPOCH 2: training on 2940124 raw words (2689616 effective words) took 0.9s, 3084411 effective words/s
2025-08-19 19:40:15,740 : INFO : EPOCH 3: training on 2940124 raw words (2690443 effective words) took 0.9s, 3130826 effective words/s
2025-08-19 19:40:16,596 : INFO : EPOCH 4: training on 2940124 raw words (2690929 effective words) took 0.8s, 3184131 effective words/s
2025-08-19 19:40:16,597 : INFO : Word2Vec lifecycle event {'msg': 'training on 14700620 raw words (13451721 effective words) took 4.4s, 3081676 effective words/s', 'datetime': '2025-08-19T19:40:16.597858', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platf

Model saved to models/yearly_models/conservative_2020.model
Processing final 889668 comments for 2021
Building bigram model for 2021 with 889668 comments...
Creating new bigram model for 2021 (cannot update Phraser objects)


2025-08-19 19:40:16,848 : INFO : PROGRESS: at sentence #20000, processed 223974 words and 165610 word types
2025-08-19 19:40:16,903 : INFO : PROGRESS: at sentence #30000, processed 339757 words and 236237 word types
2025-08-19 19:40:16,953 : INFO : PROGRESS: at sentence #40000, processed 456542 words and 303523 word types
2025-08-19 19:40:16,991 : INFO : PROGRESS: at sentence #50000, processed 534157 words and 344743 word types
2025-08-19 19:40:17,031 : INFO : PROGRESS: at sentence #60000, processed 608694 words and 383071 word types
2025-08-19 19:40:17,055 : INFO : PROGRESS: at sentence #70000, processed 662439 words and 407644 word types
2025-08-19 19:40:17,091 : INFO : PROGRESS: at sentence #80000, processed 734009 words and 442045 word types
2025-08-19 19:40:17,124 : INFO : PROGRESS: at sentence #90000, processed 803424 words and 471985 word types
2025-08-19 19:40:17,144 : INFO : PROGRESS: at sentence #100000, processed 848376 words and 488990 word types
2025-08-19 19:40:17,161 : I

Updating existing Word2Vec model for 2021


2025-08-19 19:40:28,448 : INFO : PROGRESS: at sentence #250000, processed 1921835 words, keeping 35283 word types
2025-08-19 19:40:28,456 : INFO : PROGRESS: at sentence #260000, processed 2016084 words, keeping 35942 word types
2025-08-19 19:40:28,471 : INFO : PROGRESS: at sentence #270000, processed 2134065 words, keeping 36743 word types
2025-08-19 19:40:28,482 : INFO : PROGRESS: at sentence #280000, processed 2225295 words, keeping 37434 word types
2025-08-19 19:40:28,491 : INFO : PROGRESS: at sentence #290000, processed 2308944 words, keeping 38149 word types
2025-08-19 19:40:28,504 : INFO : PROGRESS: at sentence #300000, processed 2411412 words, keeping 38806 word types
2025-08-19 19:40:28,512 : INFO : PROGRESS: at sentence #310000, processed 2503228 words, keeping 39506 word types
2025-08-19 19:40:28,523 : INFO : PROGRESS: at sentence #320000, processed 2610330 words, keeping 40295 word types
2025-08-19 19:40:28,538 : INFO : PROGRESS: at sentence #330000, processed 2744928 words,

Training model on 889668 comments


2025-08-19 19:40:30,529 : INFO : EPOCH 0 - PROGRESS: at 40.60% examples, 2807963 words/s, in_qsize 30, out_qsize 1
2025-08-19 19:40:31,532 : INFO : EPOCH 0 - PROGRESS: at 68.20% examples, 2959949 words/s, in_qsize 32, out_qsize 1
2025-08-19 19:40:32,535 : INFO : EPOCH 0 - PROGRESS: at 98.35% examples, 3036668 words/s, in_qsize 19, out_qsize 0
2025-08-19 19:40:32,576 : INFO : EPOCH 0: training on 10160714 raw words (9296545 effective words) took 3.0s, 3050957 effective words/s
2025-08-19 19:40:33,590 : INFO : EPOCH 1 - PROGRESS: at 42.94% examples, 3040010 words/s, in_qsize 32, out_qsize 1
2025-08-19 19:40:34,591 : INFO : EPOCH 1 - PROGRESS: at 70.91% examples, 3099016 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:40:35,574 : INFO : EPOCH 1: training on 10160714 raw words (9296338 effective words) took 3.0s, 3110940 effective words/s
2025-08-19 19:40:36,586 : INFO : EPOCH 2 - PROGRESS: at 42.77% examples, 3028717 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:40:37,587 : INFO : EPOCH

Model saved to models/yearly_models/conservative_2021.model
Processing final 930383 comments for 2022
Building bigram model for 2022 with 930383 comments...
Creating new bigram model for 2022 (cannot update Phraser objects)


2025-08-19 19:40:44,696 : INFO : PROGRESS: at sentence #40000, processed 349019 words and 236114 word types
2025-08-19 19:40:44,731 : INFO : PROGRESS: at sentence #50000, processed 430730 words and 282544 word types
2025-08-19 19:40:44,766 : INFO : PROGRESS: at sentence #60000, processed 509456 words and 323518 word types
2025-08-19 19:40:44,798 : INFO : PROGRESS: at sentence #70000, processed 569073 words and 352178 word types
2025-08-19 19:40:44,836 : INFO : PROGRESS: at sentence #80000, processed 656991 words and 397556 word types
2025-08-19 19:40:44,879 : INFO : PROGRESS: at sentence #90000, processed 759945 words and 450010 word types
2025-08-19 19:40:44,925 : INFO : PROGRESS: at sentence #100000, processed 861301 words and 499946 word types
2025-08-19 19:40:44,965 : INFO : PROGRESS: at sentence #110000, processed 950845 words and 543679 word types
2025-08-19 19:40:45,008 : INFO : PROGRESS: at sentence #120000, processed 1046901 words and 589684 word types
2025-08-19 19:40:45,055 

Updating existing Word2Vec model for 2022


2025-08-19 19:40:53,473 : INFO : PROGRESS: at sentence #260000, processed 2374445 words, keeping 43424 word types
2025-08-19 19:40:53,481 : INFO : PROGRESS: at sentence #270000, processed 2477205 words, keeping 44390 word types
2025-08-19 19:40:53,489 : INFO : PROGRESS: at sentence #280000, processed 2578717 words, keeping 45182 word types
2025-08-19 19:40:53,495 : INFO : PROGRESS: at sentence #290000, processed 2683283 words, keeping 45875 word types
2025-08-19 19:40:53,502 : INFO : PROGRESS: at sentence #300000, processed 2782725 words, keeping 46428 word types
2025-08-19 19:40:53,510 : INFO : PROGRESS: at sentence #310000, processed 2883675 words, keeping 47163 word types
2025-08-19 19:40:53,517 : INFO : PROGRESS: at sentence #320000, processed 2990628 words, keeping 48004 word types
2025-08-19 19:40:53,524 : INFO : PROGRESS: at sentence #330000, processed 3094432 words, keeping 48716 word types
2025-08-19 19:40:53,530 : INFO : PROGRESS: at sentence #340000, processed 3182636 words,

Training model on 930383 comments


2025-08-19 19:40:55,109 : INFO : EPOCH 0 - PROGRESS: at 39.54% examples, 3140535 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:40:56,110 : INFO : EPOCH 0 - PROGRESS: at 82.77% examples, 3234024 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:40:56,490 : INFO : EPOCH 0: training on 8532160 raw words (7748082 effective words) took 2.4s, 3248017 effective words/s
2025-08-19 19:40:57,507 : INFO : EPOCH 1 - PROGRESS: at 40.08% examples, 3195005 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:40:58,511 : INFO : EPOCH 1 - PROGRESS: at 82.08% examples, 3205944 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:40:58,932 : INFO : EPOCH 1: training on 8532160 raw words (7748152 effective words) took 2.4s, 3191906 effective words/s
2025-08-19 19:40:59,947 : INFO : EPOCH 2 - PROGRESS: at 40.08% examples, 3198659 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:41:00,949 : INFO : EPOCH 2 - PROGRESS: at 82.78% examples, 3239023 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:41:01,383 : INFO : EPOCH 2

Model saved to models/yearly_models/conservative_2022.model
Processing final 913895 comments for 2023
Building bigram model for 2023 with 913895 comments...
Creating new bigram model for 2023 (cannot update Phraser objects)


2025-08-19 19:41:06,737 : INFO : PROGRESS: at sentence #30000, processed 289958 words and 209280 word types
2025-08-19 19:41:06,784 : INFO : PROGRESS: at sentence #40000, processed 387358 words and 266684 word types
2025-08-19 19:41:06,840 : INFO : PROGRESS: at sentence #50000, processed 505833 words and 331396 word types
2025-08-19 19:41:06,891 : INFO : PROGRESS: at sentence #60000, processed 603150 words and 383136 word types
2025-08-19 19:41:06,940 : INFO : PROGRESS: at sentence #70000, processed 712995 words and 441922 word types
2025-08-19 19:41:06,986 : INFO : PROGRESS: at sentence #80000, processed 817351 words and 494853 word types
2025-08-19 19:41:07,047 : INFO : PROGRESS: at sentence #90000, processed 938412 words and 552377 word types
2025-08-19 19:41:07,115 : INFO : PROGRESS: at sentence #100000, processed 1047879 words and 605783 word types
2025-08-19 19:41:07,175 : INFO : PROGRESS: at sentence #110000, processed 1153397 words and 656928 word types
2025-08-19 19:41:07,251 

Updating existing Word2Vec model for 2023


2025-08-19 19:41:16,140 : INFO : PROGRESS: at sentence #190000, processed 2051416 words, keeping 40987 word types
2025-08-19 19:41:16,149 : INFO : PROGRESS: at sentence #200000, processed 2156467 words, keeping 41867 word types
2025-08-19 19:41:16,159 : INFO : PROGRESS: at sentence #210000, processed 2276970 words, keeping 42860 word types
2025-08-19 19:41:16,168 : INFO : PROGRESS: at sentence #220000, processed 2377083 words, keeping 43599 word types
2025-08-19 19:41:16,176 : INFO : PROGRESS: at sentence #230000, processed 2443876 words, keeping 44059 word types
2025-08-19 19:41:16,182 : INFO : PROGRESS: at sentence #240000, processed 2505795 words, keeping 44510 word types
2025-08-19 19:41:16,188 : INFO : PROGRESS: at sentence #250000, processed 2574196 words, keeping 44954 word types
2025-08-19 19:41:16,196 : INFO : PROGRESS: at sentence #260000, processed 2669824 words, keeping 45695 word types
2025-08-19 19:41:16,206 : INFO : PROGRESS: at sentence #270000, processed 2780511 words,

Training model on 913895 comments


2025-08-19 19:41:17,922 : INFO : EPOCH 0 - PROGRESS: at 36.18% examples, 3075718 words/s, in_qsize 28, out_qsize 4
2025-08-19 19:41:18,923 : INFO : EPOCH 0 - PROGRESS: at 77.36% examples, 3130519 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:41:19,428 : INFO : EPOCH 0: training on 8730383 raw words (7932372 effective words) took 2.5s, 3159946 effective words/s
2025-08-19 19:41:20,447 : INFO : EPOCH 1 - PROGRESS: at 36.54% examples, 3110975 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:41:21,449 : INFO : EPOCH 1 - PROGRESS: at 79.02% examples, 3201367 words/s, in_qsize 31, out_qsize 0
2025-08-19 19:41:21,889 : INFO : EPOCH 1: training on 8730383 raw words (7931631 effective words) took 2.4s, 3245795 effective words/s
2025-08-19 19:41:22,908 : INFO : EPOCH 2 - PROGRESS: at 38.14% examples, 3211072 words/s, in_qsize 32, out_qsize 0
2025-08-19 19:41:23,908 : INFO : EPOCH 2 - PROGRESS: at 79.96% examples, 3252018 words/s, in_qsize 32, out_qsize 0
2025-08-19 19:41:24,330 : INFO : EPOCH 2

Model saved to models/yearly_models/conservative_2023.model
Model saved to models/yearly_models/conservative_2012.model
Model saved to models/yearly_models/conservative_2013.model
Model saved to models/yearly_models/conservative_2014.model
Model saved to models/yearly_models/conservative_2015.model
Model saved to models/yearly_models/conservative_2016.model
Model saved to models/yearly_models/conservative_2017.model
Model saved to models/yearly_models/conservative_2018.model


2025-08-19 19:41:29,466 : INFO : Word2Vec lifecycle event {'fname_or_handle': 'models/yearly_models/conservative_2020.model', 'separately': 'None', 'sep_limit': 10485760, 'ignore': frozenset(), 'datetime': '2025-08-19T19:41:29.466788', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'saving'}
2025-08-19 19:41:29,466 : INFO : not storing attribute cum_table
2025-08-19 19:41:29,518 : INFO : saved models/yearly_models/conservative_2020.model
2025-08-19 19:41:29,519 : INFO : Word2Vec lifecycle event {'fname_or_handle': 'models/yearly_models/conservative_2021.model', 'separately': 'None', 'sep_limit': 10485760, 'ignore': frozenset(), 'datetime': '2025-08-19T19:41:29.519755', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'saving'}
2025-08-19 19:41:29,519 : INFO : 

Model saved to models/yearly_models/conservative_2019.model
Model saved to models/yearly_models/conservative_2020.model
Model saved to models/yearly_models/conservative_2021.model
Model saved to models/yearly_models/conservative_2022.model
Model saved to models/yearly_models/conservative_2023.model
Completed building yearly models for conservative
Building yearly models for liberal
Processing processed_comments/liberal\liberal_batch1.pkl
Loaded 490661 comments from processed_comments/liberal\liberal_batch1.pkl


2025-08-19 19:41:34,192 : INFO : Word2Vec lifecycle event {'fname_or_handle': 'models/yearly_models/liberal_2012.model', 'separately': 'None', 'sep_limit': 10485760, 'ignore': frozenset(), 'datetime': '2025-08-19T19:41:34.192592', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'saving'}
2025-08-19 19:41:34,193 : INFO : not storing attribute cum_table
2025-08-19 19:41:34,210 : INFO : saved models/yearly_models/liberal_2012.model
2025-08-19 19:41:34,210 : INFO : Word2Vec lifecycle event {'fname_or_handle': 'models/yearly_models/liberal_2013.model', 'separately': 'None', 'sep_limit': 10485760, 'ignore': frozenset(), 'datetime': '2025-08-19T19:41:34.210403', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'saving'}
2025-08-19 19:41:34,211 : INFO : not storing att


=== Comment Counts by Year ===
2011: 3059 comments
2012: 16587 comments
2013: 27742 comments
2014: 27654 comments
2015: 27351 comments
2016: 28976 comments
2017: 24871 comments
2018: 51074 comments
2019: 26999 comments
2020: 42871 comments
2021: 46570 comments
2022: 39692 comments
2023: 47289 comments
Skipping final 3059 comments for 2011 (less than minimum required)
Skipping final 16587 comments for 2012 (less than minimum required)
Skipping final 27742 comments for 2013 (less than minimum required)
Skipping final 27654 comments for 2014 (less than minimum required)
Skipping final 27351 comments for 2015 (less than minimum required)
Skipping final 28976 comments for 2016 (less than minimum required)
Skipping final 24871 comments for 2017 (less than minimum required)
Skipping final 51074 comments for 2018 (less than minimum required)
Skipping final 26999 comments for 2019 (less than minimum required)
Skipping final 42871 comments for 2020 (less than minimum required)
Skipping final 46

2025-08-19 19:41:34,388 : INFO : Word2Vec lifecycle event {'fname_or_handle': 'models/yearly_models/liberal_2020.model', 'separately': 'None', 'sep_limit': 10485760, 'ignore': frozenset(), 'datetime': '2025-08-19T19:41:34.388222', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'saving'}
2025-08-19 19:41:34,388 : INFO : not storing attribute cum_table
2025-08-19 19:41:34,438 : INFO : saved models/yearly_models/liberal_2020.model
2025-08-19 19:41:34,439 : INFO : Word2Vec lifecycle event {'fname_or_handle': 'models/yearly_models/liberal_2021.model', 'separately': 'None', 'sep_limit': 10485760, 'ignore': frozenset(), 'datetime': '2025-08-19T19:41:34.439499', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'saving'}
2025-08-19 19:41:34,439 : INFO : not storing att

Model saved to models/yearly_models/liberal_2019.model
Model saved to models/yearly_models/liberal_2020.model
Model saved to models/yearly_models/liberal_2021.model
Model saved to models/yearly_models/liberal_2022.model
Model saved to models/yearly_models/liberal_2023.model
Completed building yearly models for liberal
Building yearly models for backpacking
Processing processed_comments/backpacking\backpacking_batch1.pkl
Loaded 814804 comments from processed_comments/backpacking\backpacking_batch1.pkl


2025-08-19 19:41:40,985 : INFO : collecting all words and their counts
2025-08-19 19:41:40,986 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2025-08-19 19:41:41,051 : INFO : PROGRESS: at sentence #10000, processed 174792 words and 130612 word types
2025-08-19 19:41:41,120 : INFO : PROGRESS: at sentence #20000, processed 348496 words and 229941 word types
2025-08-19 19:41:41,188 : INFO : PROGRESS: at sentence #30000, processed 521334 words and 317164 word types



=== Comment Counts by Year ===
2011: 2656 comments
2012: 13659 comments
2013: 26962 comments
2014: 37432 comments
2015: 42328 comments
2016: 37314 comments
2017: 43095 comments
2018: 51332 comments
2019: 64864 comments
2020: 66510 comments
2021: 76440 comments
2022: 102529 comments
2023: 105253 comments
Skipping final 2656 comments for 2011 (less than minimum required)
Skipping final 13659 comments for 2012 (less than minimum required)
Skipping final 26962 comments for 2013 (less than minimum required)
Skipping final 37432 comments for 2014 (less than minimum required)
Skipping final 42328 comments for 2015 (less than minimum required)
Skipping final 37314 comments for 2016 (less than minimum required)
Skipping final 43095 comments for 2017 (less than minimum required)
Skipping final 51332 comments for 2018 (less than minimum required)
Skipping final 64864 comments for 2019 (less than minimum required)
Skipping final 66510 comments for 2020 (less than minimum required)
Skipping final 

2025-08-19 19:41:41,260 : INFO : PROGRESS: at sentence #40000, processed 693754 words and 399277 word types
2025-08-19 19:41:41,330 : INFO : PROGRESS: at sentence #50000, processed 860304 words and 472340 word types
2025-08-19 19:41:41,408 : INFO : PROGRESS: at sentence #60000, processed 1047631 words and 549668 word types
2025-08-19 19:41:41,485 : INFO : PROGRESS: at sentence #70000, processed 1224956 words and 621079 word types
2025-08-19 19:41:41,559 : INFO : PROGRESS: at sentence #80000, processed 1391930 words and 686004 word types
2025-08-19 19:41:41,642 : INFO : PROGRESS: at sentence #90000, processed 1568647 words and 751080 word types
2025-08-19 19:41:41,721 : INFO : PROGRESS: at sentence #100000, processed 1748802 words and 818932 word types
2025-08-19 19:41:41,739 : INFO : collected 835022 token types (unigram + bigrams) from a corpus of 1792878 words and 102529 sentences
2025-08-19 19:41:41,740 : INFO : merged Phrases<835022 vocab, min_count=5, threshold=0.7, max_vocab_size

Updating existing Word2Vec model for 2022


2025-08-19 19:41:42,974 : INFO : estimated required memory for 8577 words and 300 dimensions: 24873300 bytes
2025-08-19 19:41:42,975 : INFO : updating layer weights
2025-08-19 19:41:43,002 : INFO : Word2Vec lifecycle event {'update': True, 'trim_rule': 'None', 'datetime': '2025-08-19T19:41:43.001359', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'build_vocab'}
2025-08-19 19:41:43,003 : INFO : Word2Vec lifecycle event {'msg': 'training model with 16 workers on 27531 vocabulary and 300 features, using sg=1 hs=0 sample=0.001 negative=5 window=5 shrink_windows=True', 'datetime': '2025-08-19T19:41:43.002357', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'train'}


Training model on 102529 comments


2025-08-19 19:41:43,515 : INFO : EPOCH 0: training on 1728793 raw words (1560045 effective words) took 0.5s, 3101691 effective words/s
2025-08-19 19:41:44,008 : INFO : EPOCH 1: training on 1728793 raw words (1559625 effective words) took 0.5s, 3225927 effective words/s
2025-08-19 19:41:44,505 : INFO : EPOCH 2: training on 1728793 raw words (1560435 effective words) took 0.5s, 3190526 effective words/s
2025-08-19 19:41:45,002 : INFO : EPOCH 3: training on 1728793 raw words (1559955 effective words) took 0.5s, 3199129 effective words/s
2025-08-19 19:41:45,497 : INFO : EPOCH 4: training on 1728793 raw words (1559976 effective words) took 0.5s, 3208934 effective words/s
2025-08-19 19:41:45,497 : INFO : Word2Vec lifecycle event {'msg': 'training on 8643965 raw words (7800036 effective words) took 2.5s, 3126552 effective words/s', 'datetime': '2025-08-19T19:41:45.497784', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platfor

Model saved to models/yearly_models/backpacking_2022.model
Processing final 105253 comments for 2023
Building bigram model for 2023 with 105253 comments...
Creating new bigram model for 2023 (cannot update Phraser objects)


2025-08-19 19:41:45,782 : INFO : PROGRESS: at sentence #30000, processed 566314 words and 339139 word types
2025-08-19 19:41:45,874 : INFO : PROGRESS: at sentence #40000, processed 770999 words and 434591 word types
2025-08-19 19:41:45,963 : INFO : PROGRESS: at sentence #50000, processed 993187 words and 520846 word types
2025-08-19 19:41:46,053 : INFO : PROGRESS: at sentence #60000, processed 1225354 words and 602263 word types
2025-08-19 19:41:46,146 : INFO : PROGRESS: at sentence #70000, processed 1437206 words and 679739 word types
2025-08-19 19:41:46,245 : INFO : PROGRESS: at sentence #80000, processed 1657426 words and 760241 word types
2025-08-19 19:41:46,350 : INFO : PROGRESS: at sentence #90000, processed 1868370 words and 837641 word types
2025-08-19 19:41:46,443 : INFO : PROGRESS: at sentence #100000, processed 2084732 words and 909947 word types
2025-08-19 19:41:46,494 : INFO : collected 948770 token types (unigram + bigrams) from a corpus of 2202079 words and 105253 senten

Updating existing Word2Vec model for 2023


2025-08-19 19:41:47,860 : INFO : sample=0.001 downsamples 50 most-common words
2025-08-19 19:41:47,860 : INFO : Word2Vec lifecycle event {'msg': 'downsampling leaves estimated 1858109.7563966953 word corpus (92.1%% of prior 2016618)', 'datetime': '2025-08-19T19:41:47.860542', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'prepare_vocab'}
2025-08-19 19:41:47,932 : INFO : estimated required memory for 9324 words and 300 dimensions: 27039600 bytes
2025-08-19 19:41:47,933 : INFO : updating layer weights
2025-08-19 19:41:47,957 : INFO : Word2Vec lifecycle event {'update': True, 'trim_rule': 'None', 'datetime': '2025-08-19T19:41:47.957578', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'build_vocab'}
2025-08-19 19:41:47,958 : INFO : Word2Vec lifecycle event {'ms

Training model on 105253 comments


2025-08-19 19:41:48,602 : INFO : EPOCH 0: training on 2093074 raw words (1885710 effective words) took 0.6s, 2973825 effective words/s
2025-08-19 19:41:49,224 : INFO : EPOCH 1: training on 2093074 raw words (1886086 effective words) took 0.6s, 3073330 effective words/s
2025-08-19 19:41:49,833 : INFO : EPOCH 2: training on 2093074 raw words (1885856 effective words) took 0.6s, 3139475 effective words/s
2025-08-19 19:41:50,433 : INFO : EPOCH 3: training on 2093074 raw words (1886117 effective words) took 0.6s, 3196129 effective words/s
2025-08-19 19:41:51,028 : INFO : EPOCH 4: training on 2093074 raw words (1886112 effective words) took 0.6s, 3215800 effective words/s
2025-08-19 19:41:51,029 : INFO : Word2Vec lifecycle event {'msg': 'training on 10465370 raw words (9429881 effective words) took 3.1s, 3071057 effective words/s', 'datetime': '2025-08-19T19:41:51.029594', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platfo

Model saved to models/yearly_models/backpacking_2023.model
Model saved to models/yearly_models/backpacking_2012.model
Model saved to models/yearly_models/backpacking_2013.model
Model saved to models/yearly_models/backpacking_2014.model
Model saved to models/yearly_models/backpacking_2015.model
Model saved to models/yearly_models/backpacking_2016.model
Model saved to models/yearly_models/backpacking_2017.model
Model saved to models/yearly_models/backpacking_2018.model


2025-08-19 19:41:51,269 : INFO : saved models/yearly_models/backpacking_2019.model
2025-08-19 19:41:51,270 : INFO : Word2Vec lifecycle event {'fname_or_handle': 'models/yearly_models/backpacking_2020.model', 'separately': 'None', 'sep_limit': 10485760, 'ignore': frozenset(), 'datetime': '2025-08-19T19:41:51.270646', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'saving'}
2025-08-19 19:41:51,270 : INFO : not storing attribute cum_table
2025-08-19 19:41:51,318 : INFO : saved models/yearly_models/backpacking_2020.model
2025-08-19 19:41:51,318 : INFO : Word2Vec lifecycle event {'fname_or_handle': 'models/yearly_models/backpacking_2021.model', 'separately': 'None', 'sep_limit': 10485760, 'ignore': frozenset(), 'datetime': '2025-08-19T19:41:51.318926', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': '

Model saved to models/yearly_models/backpacking_2019.model
Model saved to models/yearly_models/backpacking_2020.model
Model saved to models/yearly_models/backpacking_2021.model
Model saved to models/yearly_models/backpacking_2022.model
Model saved to models/yearly_models/backpacking_2023.model
Completed building yearly models for backpacking
Building yearly models for vagabond
Processing processed_comments/vagabond\vagabond_batch1.pkl
Loaded 488480 comments from processed_comments/vagabond\vagabond_batch1.pkl


2025-08-19 19:41:53,559 : INFO : Word2Vec lifecycle event {'fname_or_handle': 'models/yearly_models/vagabond_2013.model', 'separately': 'None', 'sep_limit': 10485760, 'ignore': frozenset(), 'datetime': '2025-08-19T19:41:53.559631', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'saving'}
2025-08-19 19:41:53,560 : INFO : not storing attribute cum_table
2025-08-19 19:41:53,573 : INFO : saved models/yearly_models/vagabond_2013.model
2025-08-19 19:41:53,574 : INFO : Word2Vec lifecycle event {'fname_or_handle': 'models/yearly_models/vagabond_2014.model', 'separately': 'None', 'sep_limit': 10485760, 'ignore': frozenset(), 'datetime': '2025-08-19T19:41:53.574725', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'saving'}
2025-08-19 19:41:53,574 : INFO : not storing 


=== Comment Counts by Year ===
2013: 94 comments
2014: 614 comments
2015: 21598 comments
2016: 17017 comments
2017: 13738 comments
2018: 33522 comments
2019: 58153 comments
2020: 51570 comments
2021: 62415 comments
2022: 59095 comments
2023: 86296 comments
Skipping final 0 comments for 2011 (less than minimum required)
Skipping final 0 comments for 2012 (less than minimum required)
Skipping final 94 comments for 2013 (less than minimum required)
Skipping final 614 comments for 2014 (less than minimum required)
Skipping final 21598 comments for 2015 (less than minimum required)
Skipping final 17017 comments for 2016 (less than minimum required)
Skipping final 13738 comments for 2017 (less than minimum required)
Skipping final 33522 comments for 2018 (less than minimum required)
Skipping final 58153 comments for 2019 (less than minimum required)
Skipping final 51570 comments for 2020 (less than minimum required)
Skipping final 62415 comments for 2021 (less than minimum required)
Skippin

2025-08-19 19:41:53,784 : INFO : saved models/yearly_models/vagabond_2020.model
2025-08-19 19:41:53,785 : INFO : Word2Vec lifecycle event {'fname_or_handle': 'models/yearly_models/vagabond_2021.model', 'separately': 'None', 'sep_limit': 10485760, 'ignore': frozenset(), 'datetime': '2025-08-19T19:41:53.785387', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'saving'}
2025-08-19 19:41:53,785 : INFO : not storing attribute cum_table
2025-08-19 19:41:53,830 : INFO : saved models/yearly_models/vagabond_2021.model
2025-08-19 19:41:53,831 : INFO : Word2Vec lifecycle event {'fname_or_handle': 'models/yearly_models/vagabond_2022.model', 'separately': 'None', 'sep_limit': 10485760, 'ignore': frozenset(), 'datetime': '2025-08-19T19:41:53.831317', 'gensim': '4.3.3', 'python': '3.11.9 (tags/v3.11.9:de54cf5, Apr  2 2024, 10:12:12) [MSC v.1938 64 bit (AMD64)]', 'platform': 'Windows-10-1

Model saved to models/yearly_models/vagabond_2020.model
Model saved to models/yearly_models/vagabond_2021.model
Model saved to models/yearly_models/vagabond_2022.model
Model saved to models/yearly_models/vagabond_2023.model
Completed building yearly models for vagabond
