## **Epochs Testing**  

In [1]:

import pandas as pd
import os
import time
import matplotlib.pyplot as plt
from gensim.models import Word2Vec
from gensim.models.callbacks import CallbackAny2Vec
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

vector_size_num = 100
window_num = 5
workers_num = 4
negative_sampling = 10
epoches_num = 10
min_count = 2
w2v_min_count = 2
w2v_topn = 10

class LossCallback(CallbackAny2Vec):
 
    def __init__(self):
        self.epoch = 0
        self.previous_loss = 0
        self.loss_history = []
        
    def on_epoch_end(self, model):
        current_loss = model.get_latest_training_loss()
        
       
        if self.epoch == 0:
            loss = current_loss
        else:
            loss = current_loss - self.previous_loss
            
        self.loss_history.append(loss)
        self.previous_loss = current_loss
        print(f'Epoch {self.epoch}, Loss: {loss}')
        self.epoch += 1

def load_transaction_data(file_path):
 
    print(f"Loading data from {file_path}")

    data = pd.read_excel(file_path)
    data.dropna(subset=['Itemname'], inplace=True)
    
   
    basket = data.groupby('BillNo')['Itemname'].apply(list)
    transactions = basket.values.tolist()
    
    print(f"Loaded {len(transactions)} transactions")
    return transactions

def train_model_with_epochs(transactions, epochs, vector_size=100, window=5, min_count=2, workers=4, negative=10):
  
    print(f"Training model with {epochs} epochs...")
    

    loss_callback = LossCallback()
    

    start_time = time.time()
    model = Word2Vec(
        sentences=transactions,
        vector_size=vector_size,
        window=window,
        sg=1,  
        negative=negative,
        min_count=min_count,
        workers=workers,
        epochs=epochs,
        compute_loss=True,
        callbacks=[loss_callback]
    )
    
    training_time = time.time() - start_time
    print(f"Training completed in {training_time:.2f} seconds")
    
    return {
        'epochs': epochs,
        'training_time': training_time,
        'loss_history': loss_callback.loss_history,
        'final_loss': loss_callback.loss_history[-1] if loss_callback.loss_history else None
    }

def test_multiple_epochs(transactions, epoch_values):

    results = {}
    
    for epochs in epoch_values:
        print(f"\n===== Testing {epochs} epochs =====")
        result = train_model_with_epochs(transactions, epochs)
        results[epochs] = result
    
    return results

def calculate_convergence(results):
 
    epochs = sorted(list(results.keys()))
    final_losses = [results[e]['final_loss'] for e in epochs]
    
   
    loss_decreases = []
    for i in range(1, len(epochs)):
        if final_losses[i-1] > 0 and final_losses[i] > 0:
            percentage_decrease = (final_losses[i-1] - final_losses[i]) / final_losses[i-1] * 100
            loss_decreases.append((epochs[i], percentage_decrease))
  
    print("\nLoss decrease between epochs:")
    for epoch, decrease in loss_decreases:
        print(f"From {epochs[epochs.index(epoch)-1]} to {epoch} epochs: {decrease:.2f}% decrease")
    

    threshold = 5.0  
    for epoch, decrease in loss_decreases:
        if decrease < threshold:
            print(f"\nDiminishing returns detected at {epoch} epochs (only {decrease:.2f}% improvement)")
            recommended_epochs = epoch
            break
    else:
   
        recommended_epochs = epochs[-1]
        print(f"\nNo clear diminishing returns detected. Consider testing with more epochs.")
    

    print("\nEpoch testing summary:")
    summary_data = {
        'Epochs': epochs,
        'Final Loss': [round(loss, 4) for loss in final_losses],
        'Training Time (s)': [round(results[e]['training_time'], 2) for e in epochs]
    }
    summary_df = pd.DataFrame(summary_data)
    print(summary_df.to_string(index=False))
    
    print(f"\nRecommended number of epochs based on convergence: {recommended_epochs}")
    return recommended_epochs

def main():

    data_path = r'C:\Users\moham\Apriori_VS_Word2Vec\Dataset'
    excel_file = 'df_merged_items_category.xlsx'
    file_path = os.path.join(data_path, excel_file)

    epoch_values = [5, 10, 15, 20, 25, 30]

    transactions = load_transaction_data(file_path)

    results = test_multiple_epochs(transactions, epoch_values)
    
    optimal_epochs = calculate_convergence(results)

    print(f"\nTo use this value in your original code, replace:")
    print("epoches_num = 10  # Current value")
    print(f"with:")
    print(f"epoches_num = {optimal_epochs}  # Optimized value based on convergence")

if __name__ == "__main__":
    main()
    


Loading data from C:\Users\moham\Apriori_VS_Word2Vec\Dataset\df_merged_items_category.xlsx


2025-04-15 21:49:30,914 : INFO : collecting all words and their counts
2025-04-15 21:49:30,915 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2025-04-15 21:49:30,927 : INFO : PROGRESS: at sentence #10000, processed 244376 words, keeping 3614 word types
2025-04-15 21:49:30,944 : INFO : PROGRESS: at sentence #20000, processed 513626 words, keeping 4179 word types
2025-04-15 21:49:30,944 : INFO : collected 4185 word types from a corpus of 520609 raw words and 20208 sentences
2025-04-15 21:49:30,945 : INFO : Creating a fresh vocabulary
2025-04-15 21:49:30,953 : INFO : Word2Vec lifecycle event {'msg': 'effective_min_count=2 retains 3877 unique words (92.64% of original 4185, drops 308)', 'datetime': '2025-04-15T21:49:30.953856', 'gensim': '4.3.3', 'python': '3.10.16 | packaged by Anaconda, Inc. | (main, Dec 11 2024, 16:19:12) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'prepare_vocab'}
2025-04-15 21:49:30,953 : INFO : Word2Vec 

Loaded 20208 transactions

===== Testing 5 epochs =====
Training model with 5 epochs...


2025-04-15 21:49:31,748 : INFO : EPOCH 0: training on 520609 raw words (518229 effective words) took 0.8s, 686587 effective words/s


Epoch 0, Loss: 2030243.375


2025-04-15 21:49:32,514 : INFO : EPOCH 1: training on 520609 raw words (518243 effective words) took 0.8s, 679454 effective words/s


Epoch 1, Loss: 1589111.125


2025-04-15 21:49:33,352 : INFO : EPOCH 2: training on 520609 raw words (518222 effective words) took 0.8s, 621379 effective words/s


Epoch 2, Loss: 1475680.5


2025-04-15 21:49:34,155 : INFO : EPOCH 3: training on 520609 raw words (518296 effective words) took 0.8s, 647375 effective words/s


Epoch 3, Loss: 1400490.0


2025-04-15 21:49:34,982 : INFO : EPOCH 4: training on 520609 raw words (518259 effective words) took 0.8s, 630051 effective words/s
2025-04-15 21:49:34,983 : INFO : Word2Vec lifecycle event {'msg': 'training on 2603045 raw words (2591249 effective words) took 4.0s, 648909 effective words/s', 'datetime': '2025-04-15T21:49:34.983469', 'gensim': '4.3.3', 'python': '3.10.16 | packaged by Anaconda, Inc. | (main, Dec 11 2024, 16:19:12) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'train'}
2025-04-15 21:49:34,983 : INFO : Word2Vec lifecycle event {'params': 'Word2Vec<vocab=3877, vector_size=100, alpha=0.025>', 'datetime': '2025-04-15T21:49:34.983469', 'gensim': '4.3.3', 'python': '3.10.16 | packaged by Anaconda, Inc. | (main, Dec 11 2024, 16:19:12) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'created'}
2025-04-15 21:49:34,984 : INFO : collecting all words and their counts
2025-04-15 21:49:34,984 : INFO : PROGRESS: at sente

Epoch 4, Loss: 1439677.5
Training completed in 4.07 seconds

===== Testing 10 epochs =====
Training model with 10 epochs...


2025-04-15 21:49:35,891 : INFO : EPOCH 0: training on 520609 raw words (518228 effective words) took 0.8s, 630078 effective words/s


Epoch 0, Loss: 2021493.75


2025-04-15 21:49:36,731 : INFO : EPOCH 1: training on 520609 raw words (518235 effective words) took 0.8s, 619544 effective words/s


Epoch 1, Loss: 1580106.25


2025-04-15 21:49:37,533 : INFO : EPOCH 2: training on 520609 raw words (518291 effective words) took 0.8s, 648396 effective words/s


Epoch 2, Loss: 1467821.5


2025-04-15 21:49:38,348 : INFO : EPOCH 3: training on 520609 raw words (518310 effective words) took 0.8s, 638334 effective words/s


Epoch 3, Loss: 1409817.5


2025-04-15 21:49:39,218 : INFO : EPOCH 4: training on 520609 raw words (518262 effective words) took 0.9s, 597780 effective words/s


Epoch 4, Loss: 1403377.5


2025-04-15 21:49:40,062 : INFO : EPOCH 5: training on 520609 raw words (518298 effective words) took 0.8s, 617043 effective words/s


Epoch 5, Loss: 1330098.5


2025-04-15 21:49:40,894 : INFO : EPOCH 6: training on 520609 raw words (518286 effective words) took 0.8s, 626000 effective words/s


Epoch 6, Loss: 1267486.0


2025-04-15 21:49:41,708 : INFO : EPOCH 7: training on 520609 raw words (518244 effective words) took 0.8s, 639471 effective words/s


Epoch 7, Loss: 1291421.0


2025-04-15 21:49:42,521 : INFO : EPOCH 8: training on 520609 raw words (518202 effective words) took 0.8s, 639880 effective words/s


Epoch 8, Loss: 1299391.0


2025-04-15 21:49:43,313 : INFO : EPOCH 9: training on 520609 raw words (518274 effective words) took 0.8s, 656865 effective words/s
2025-04-15 21:49:43,313 : INFO : Word2Vec lifecycle event {'msg': 'training on 5206090 raw words (5182630 effective words) took 8.2s, 628250 effective words/s', 'datetime': '2025-04-15T21:49:43.313943', 'gensim': '4.3.3', 'python': '3.10.16 | packaged by Anaconda, Inc. | (main, Dec 11 2024, 16:19:12) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'train'}
2025-04-15 21:49:43,314 : INFO : Word2Vec lifecycle event {'params': 'Word2Vec<vocab=3877, vector_size=100, alpha=0.025>', 'datetime': '2025-04-15T21:49:43.314916', 'gensim': '4.3.3', 'python': '3.10.16 | packaged by Anaconda, Inc. | (main, Dec 11 2024, 16:19:12) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'created'}
2025-04-15 21:49:43,315 : INFO : collecting all words and their counts
2025-04-15 21:49:43,315 : INFO : PROGRESS: at sente

Epoch 9, Loss: 1322545.0
Training completed in 8.33 seconds

===== Testing 15 epochs =====
Training model with 15 epochs...


2025-04-15 21:49:44,238 : INFO : EPOCH 0: training on 520609 raw words (518228 effective words) took 0.8s, 618671 effective words/s


Epoch 0, Loss: 1965923.75


2025-04-15 21:49:45,090 : INFO : EPOCH 1: training on 520609 raw words (518246 effective words) took 0.8s, 611320 effective words/s


Epoch 1, Loss: 1584924.0


2025-04-15 21:49:45,935 : INFO : EPOCH 2: training on 520609 raw words (518215 effective words) took 0.8s, 615205 effective words/s


Epoch 2, Loss: 1519749.25


2025-04-15 21:49:46,742 : INFO : EPOCH 3: training on 520609 raw words (518300 effective words) took 0.8s, 644281 effective words/s


Epoch 3, Loss: 1410922.0


2025-04-15 21:49:47,563 : INFO : EPOCH 4: training on 520609 raw words (518282 effective words) took 0.8s, 634188 effective words/s


Epoch 4, Loss: 1447024.5


2025-04-15 21:49:48,409 : INFO : EPOCH 5: training on 520609 raw words (518330 effective words) took 0.8s, 614785 effective words/s


Epoch 5, Loss: 1324753.5


2025-04-15 21:49:49,271 : INFO : EPOCH 6: training on 520609 raw words (518332 effective words) took 0.9s, 604178 effective words/s


Epoch 6, Loss: 1232125.0


2025-04-15 21:49:50,106 : INFO : EPOCH 7: training on 520609 raw words (518317 effective words) took 0.8s, 622763 effective words/s


Epoch 7, Loss: 1281349.0


2025-04-15 21:49:50,982 : INFO : EPOCH 8: training on 520609 raw words (518286 effective words) took 0.9s, 594575 effective words/s


Epoch 8, Loss: 1224847.0


2025-04-15 21:49:51,832 : INFO : EPOCH 9: training on 520609 raw words (518226 effective words) took 0.8s, 612945 effective words/s


Epoch 9, Loss: 1282009.0


2025-04-15 21:49:52,687 : INFO : EPOCH 10: training on 520609 raw words (518247 effective words) took 0.9s, 609300 effective words/s


Epoch 10, Loss: 1282976.0


2025-04-15 21:49:53,531 : INFO : EPOCH 11: training on 520609 raw words (518279 effective words) took 0.8s, 616397 effective words/s


Epoch 11, Loss: 1263793.0


2025-04-15 21:49:54,394 : INFO : EPOCH 12: training on 520609 raw words (518213 effective words) took 0.9s, 602646 effective words/s


Epoch 12, Loss: 1218492.0


2025-04-15 21:49:55,219 : INFO : EPOCH 13: training on 520609 raw words (518234 effective words) took 0.8s, 631345 effective words/s


Epoch 13, Loss: 1181876.0


2025-04-15 21:49:56,070 : INFO : EPOCH 14: training on 520609 raw words (518257 effective words) took 0.8s, 612532 effective words/s
2025-04-15 21:49:56,070 : INFO : Word2Vec lifecycle event {'msg': 'training on 7809135 raw words (7773992 effective words) took 12.7s, 613387 effective words/s', 'datetime': '2025-04-15T21:49:56.070411', 'gensim': '4.3.3', 'python': '3.10.16 | packaged by Anaconda, Inc. | (main, Dec 11 2024, 16:19:12) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'train'}
2025-04-15 21:49:56,071 : INFO : Word2Vec lifecycle event {'params': 'Word2Vec<vocab=3877, vector_size=100, alpha=0.025>', 'datetime': '2025-04-15T21:49:56.071404', 'gensim': '4.3.3', 'python': '3.10.16 | packaged by Anaconda, Inc. | (main, Dec 11 2024, 16:19:12) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'created'}
2025-04-15 21:49:56,071 : INFO : collecting all words and their counts
2025-04-15 21:49:56,072 : INFO : PROGRESS: at sen

Epoch 14, Loss: 1239458.0
Training completed in 12.76 seconds

===== Testing 20 epochs =====
Training model with 20 epochs...


2025-04-15 21:49:57,005 : INFO : EPOCH 0: training on 520609 raw words (518229 effective words) took 0.9s, 599705 effective words/s


Epoch 0, Loss: 2022855.875


2025-04-15 21:49:57,869 : INFO : EPOCH 1: training on 520609 raw words (518246 effective words) took 0.9s, 602728 effective words/s


Epoch 1, Loss: 1643818.125


2025-04-15 21:49:58,704 : INFO : EPOCH 2: training on 520609 raw words (518218 effective words) took 0.8s, 622863 effective words/s


Epoch 2, Loss: 1460645.0


2025-04-15 21:49:59,619 : INFO : EPOCH 3: training on 520609 raw words (518217 effective words) took 0.9s, 568281 effective words/s


Epoch 3, Loss: 1364249.5


2025-04-15 21:50:00,474 : INFO : EPOCH 4: training on 520609 raw words (518120 effective words) took 0.9s, 608827 effective words/s


Epoch 4, Loss: 1402647.5


2025-04-15 21:50:01,357 : INFO : EPOCH 5: training on 520609 raw words (518263 effective words) took 0.9s, 588966 effective words/s


Epoch 5, Loss: 1363956.0


2025-04-15 21:50:02,222 : INFO : EPOCH 6: training on 520609 raw words (518306 effective words) took 0.9s, 601803 effective words/s


Epoch 6, Loss: 1286969.0


2025-04-15 21:50:03,093 : INFO : EPOCH 7: training on 520609 raw words (518240 effective words) took 0.9s, 597465 effective words/s


Epoch 7, Loss: 1323691.0


2025-04-15 21:50:03,955 : INFO : EPOCH 8: training on 520609 raw words (518285 effective words) took 0.9s, 604487 effective words/s


Epoch 8, Loss: 1280618.0


2025-04-15 21:50:04,807 : INFO : EPOCH 9: training on 520609 raw words (518149 effective words) took 0.8s, 610641 effective words/s


Epoch 9, Loss: 1321639.0


2025-04-15 21:50:05,663 : INFO : EPOCH 10: training on 520609 raw words (518274 effective words) took 0.9s, 607796 effective words/s


Epoch 10, Loss: 1281670.0


2025-04-15 21:50:06,527 : INFO : EPOCH 11: training on 520609 raw words (518237 effective words) took 0.9s, 602929 effective words/s


Epoch 11, Loss: 1302219.0


2025-04-15 21:50:07,363 : INFO : EPOCH 12: training on 520609 raw words (518222 effective words) took 0.8s, 622710 effective words/s


Epoch 12, Loss: 1166684.0


2025-04-15 21:50:08,223 : INFO : EPOCH 13: training on 520609 raw words (518260 effective words) took 0.9s, 604130 effective words/s


Epoch 13, Loss: 1199210.0


2025-04-15 21:50:09,054 : INFO : EPOCH 14: training on 520609 raw words (518312 effective words) took 0.8s, 627220 effective words/s


Epoch 14, Loss: 1168514.0


2025-04-15 21:50:09,976 : INFO : EPOCH 15: training on 520609 raw words (518232 effective words) took 0.9s, 564515 effective words/s


Epoch 15, Loss: 1190258.0


2025-04-15 21:50:10,823 : INFO : EPOCH 16: training on 520609 raw words (518335 effective words) took 0.8s, 615054 effective words/s


Epoch 16, Loss: 1171584.0


2025-04-15 21:50:11,673 : INFO : EPOCH 17: training on 520609 raw words (518254 effective words) took 0.8s, 612757 effective words/s


Epoch 17, Loss: 1129770.0


2025-04-15 21:50:12,500 : INFO : EPOCH 18: training on 520609 raw words (518321 effective words) took 0.8s, 629197 effective words/s


Epoch 18, Loss: 1185338.0


2025-04-15 21:50:13,364 : INFO : EPOCH 19: training on 520609 raw words (518243 effective words) took 0.9s, 603064 effective words/s
2025-04-15 21:50:13,365 : INFO : Word2Vec lifecycle event {'msg': 'training on 10412180 raw words (10364963 effective words) took 17.2s, 601708 effective words/s', 'datetime': '2025-04-15T21:50:13.365186', 'gensim': '4.3.3', 'python': '3.10.16 | packaged by Anaconda, Inc. | (main, Dec 11 2024, 16:19:12) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'train'}
2025-04-15 21:50:13,365 : INFO : Word2Vec lifecycle event {'params': 'Word2Vec<vocab=3877, vector_size=100, alpha=0.025>', 'datetime': '2025-04-15T21:50:13.365186', 'gensim': '4.3.3', 'python': '3.10.16 | packaged by Anaconda, Inc. | (main, Dec 11 2024, 16:19:12) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'created'}
2025-04-15 21:50:13,366 : INFO : collecting all words and their counts
2025-04-15 21:50:13,367 : INFO : PROGRESS: at s

Epoch 19, Loss: 1200246.0
Training completed in 17.29 seconds

===== Testing 25 epochs =====
Training model with 25 epochs...


2025-04-15 21:50:14,332 : INFO : EPOCH 0: training on 520609 raw words (518231 effective words) took 0.9s, 583035 effective words/s


Epoch 0, Loss: 1943760.125


2025-04-15 21:50:15,217 : INFO : EPOCH 1: training on 520609 raw words (518243 effective words) took 0.9s, 588174 effective words/s


Epoch 1, Loss: 1515165.375


2025-04-15 21:50:16,117 : INFO : EPOCH 2: training on 520609 raw words (518261 effective words) took 0.9s, 578208 effective words/s


Epoch 2, Loss: 1534205.5


2025-04-15 21:50:16,976 : INFO : EPOCH 3: training on 520609 raw words (518366 effective words) took 0.9s, 605213 effective words/s


Epoch 3, Loss: 1340084.5


2025-04-15 21:50:17,859 : INFO : EPOCH 4: training on 520609 raw words (518265 effective words) took 0.9s, 589650 effective words/s


Epoch 4, Loss: 1441882.0


2025-04-15 21:50:18,725 : INFO : EPOCH 5: training on 520609 raw words (518256 effective words) took 0.9s, 600306 effective words/s


Epoch 5, Loss: 1337536.5


2025-04-15 21:50:19,598 : INFO : EPOCH 6: training on 520609 raw words (518288 effective words) took 0.9s, 596732 effective words/s


Epoch 6, Loss: 1229599.0


2025-04-15 21:50:20,461 : INFO : EPOCH 7: training on 520609 raw words (518336 effective words) took 0.9s, 603177 effective words/s


Epoch 7, Loss: 1281916.0


2025-04-15 21:50:21,353 : INFO : EPOCH 8: training on 520609 raw words (518308 effective words) took 0.9s, 583789 effective words/s


Epoch 8, Loss: 1284701.0


2025-04-15 21:50:22,238 : INFO : EPOCH 9: training on 520609 raw words (518281 effective words) took 0.9s, 587958 effective words/s


Epoch 9, Loss: 1280181.0


2025-04-15 21:50:23,100 : INFO : EPOCH 10: training on 520609 raw words (518369 effective words) took 0.9s, 604081 effective words/s


Epoch 10, Loss: 1264452.0


2025-04-15 21:50:23,941 : INFO : EPOCH 11: training on 520609 raw words (518263 effective words) took 0.8s, 618540 effective words/s


Epoch 11, Loss: 1274666.0


2025-04-15 21:50:24,800 : INFO : EPOCH 12: training on 520609 raw words (518246 effective words) took 0.9s, 605762 effective words/s


Epoch 12, Loss: 1169807.0


2025-04-15 21:50:25,673 : INFO : EPOCH 13: training on 520609 raw words (518350 effective words) took 0.9s, 596742 effective words/s


Epoch 13, Loss: 1159444.0


2025-04-15 21:50:26,536 : INFO : EPOCH 14: training on 520609 raw words (518254 effective words) took 0.9s, 602338 effective words/s


Epoch 14, Loss: 1161270.0


2025-04-15 21:50:27,386 : INFO : EPOCH 15: training on 520609 raw words (518243 effective words) took 0.8s, 612651 effective words/s


Epoch 15, Loss: 1163458.0


2025-04-15 21:50:28,251 : INFO : EPOCH 16: training on 520609 raw words (518267 effective words) took 0.9s, 602463 effective words/s


Epoch 16, Loss: 1168868.0


2025-04-15 21:50:29,128 : INFO : EPOCH 17: training on 520609 raw words (518270 effective words) took 0.9s, 593918 effective words/s


Epoch 17, Loss: 1162158.0


2025-04-15 21:50:29,970 : INFO : EPOCH 18: training on 520609 raw words (518249 effective words) took 0.8s, 618235 effective words/s


Epoch 18, Loss: 1151544.0


2025-04-15 21:50:30,846 : INFO : EPOCH 19: training on 520609 raw words (518255 effective words) took 0.9s, 594404 effective words/s


Epoch 19, Loss: 1168114.0


2025-04-15 21:50:31,701 : INFO : EPOCH 20: training on 520609 raw words (518234 effective words) took 0.9s, 609350 effective words/s


Epoch 20, Loss: 1167124.0


2025-04-15 21:50:32,605 : INFO : EPOCH 21: training on 520609 raw words (518271 effective words) took 0.9s, 575591 effective words/s


Epoch 21, Loss: 1173428.0


2025-04-15 21:50:33,486 : INFO : EPOCH 22: training on 520609 raw words (518312 effective words) took 0.9s, 590805 effective words/s


Epoch 22, Loss: 1146470.0


2025-04-15 21:50:34,364 : INFO : EPOCH 23: training on 520609 raw words (518253 effective words) took 0.9s, 592858 effective words/s


Epoch 23, Loss: 1179364.0


2025-04-15 21:50:35,268 : INFO : EPOCH 24: training on 520609 raw words (518299 effective words) took 0.9s, 575976 effective words/s
2025-04-15 21:50:35,268 : INFO : Word2Vec lifecycle event {'msg': 'training on 13015225 raw words (12956970 effective words) took 21.8s, 593583 effective words/s', 'datetime': '2025-04-15T21:50:35.268561', 'gensim': '4.3.3', 'python': '3.10.16 | packaged by Anaconda, Inc. | (main, Dec 11 2024, 16:19:12) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'train'}
2025-04-15 21:50:35,269 : INFO : Word2Vec lifecycle event {'params': 'Word2Vec<vocab=3877, vector_size=100, alpha=0.025>', 'datetime': '2025-04-15T21:50:35.269560', 'gensim': '4.3.3', 'python': '3.10.16 | packaged by Anaconda, Inc. | (main, Dec 11 2024, 16:19:12) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'created'}
2025-04-15 21:50:35,269 : INFO : collecting all words and their counts
2025-04-15 21:50:35,270 : INFO : PROGRESS: at s

Epoch 24, Loss: 1197142.0
Training completed in 21.90 seconds

===== Testing 30 epochs =====
Training model with 30 epochs...


2025-04-15 21:50:36,270 : INFO : EPOCH 0: training on 520609 raw words (518229 effective words) took 0.9s, 566720 effective words/s


Epoch 0, Loss: 1934487.25


2025-04-15 21:50:37,124 : INFO : EPOCH 1: training on 520609 raw words (518242 effective words) took 0.9s, 609658 effective words/s


Epoch 1, Loss: 1580962.75


2025-04-15 21:50:38,023 : INFO : EPOCH 2: training on 520609 raw words (518291 effective words) took 0.9s, 578843 effective words/s


Epoch 2, Loss: 1471441.5


2025-04-15 21:50:38,900 : INFO : EPOCH 3: training on 520609 raw words (518222 effective words) took 0.9s, 593459 effective words/s


Epoch 3, Loss: 1411492.5


2025-04-15 21:50:39,780 : INFO : EPOCH 4: training on 520609 raw words (518290 effective words) took 0.9s, 591182 effective words/s


Epoch 4, Loss: 1403434.5


2025-04-15 21:50:40,680 : INFO : EPOCH 5: training on 520609 raw words (518318 effective words) took 0.9s, 578414 effective words/s


Epoch 5, Loss: 1335993.5


2025-04-15 21:50:41,607 : INFO : EPOCH 6: training on 520609 raw words (518330 effective words) took 0.9s, 562072 effective words/s


Epoch 6, Loss: 1326586.0


2025-04-15 21:50:42,516 : INFO : EPOCH 7: training on 520609 raw words (518226 effective words) took 0.9s, 572071 effective words/s


Epoch 7, Loss: 1325469.0


2025-04-15 21:50:43,390 : INFO : EPOCH 8: training on 520609 raw words (518202 effective words) took 0.9s, 596132 effective words/s


Epoch 8, Loss: 1270222.0


2025-04-15 21:50:44,298 : INFO : EPOCH 9: training on 520609 raw words (518354 effective words) took 0.9s, 573171 effective words/s


Epoch 9, Loss: 1335764.0


2025-04-15 21:50:45,202 : INFO : EPOCH 10: training on 520609 raw words (518287 effective words) took 0.9s, 576613 effective words/s


Epoch 10, Loss: 1223840.0


2025-04-15 21:50:46,101 : INFO : EPOCH 11: training on 520609 raw words (518302 effective words) took 0.9s, 578817 effective words/s


Epoch 11, Loss: 1300617.0


2025-04-15 21:50:47,005 : INFO : EPOCH 12: training on 520609 raw words (518268 effective words) took 0.9s, 575570 effective words/s


Epoch 12, Loss: 1110732.0


2025-04-15 21:50:47,934 : INFO : EPOCH 13: training on 520609 raw words (518250 effective words) took 0.9s, 558977 effective words/s


Epoch 13, Loss: 1132678.0


2025-04-15 21:50:48,775 : INFO : EPOCH 14: training on 520609 raw words (518200 effective words) took 0.8s, 619746 effective words/s


Epoch 14, Loss: 1147414.0


2025-04-15 21:50:49,682 : INFO : EPOCH 15: training on 520609 raw words (518245 effective words) took 0.9s, 573333 effective words/s


Epoch 15, Loss: 1178426.0


2025-04-15 21:50:50,558 : INFO : EPOCH 16: training on 520609 raw words (518275 effective words) took 0.9s, 593754 effective words/s


Epoch 16, Loss: 1125150.0


2025-04-15 21:50:51,472 : INFO : EPOCH 17: training on 520609 raw words (518264 effective words) took 0.9s, 569770 effective words/s


Epoch 17, Loss: 1187036.0


2025-04-15 21:50:52,330 : INFO : EPOCH 18: training on 520609 raw words (518323 effective words) took 0.9s, 606610 effective words/s


Epoch 18, Loss: 1163964.0


2025-04-15 21:50:53,214 : INFO : EPOCH 19: training on 520609 raw words (518220 effective words) took 0.9s, 589077 effective words/s


Epoch 19, Loss: 1130870.0


2025-04-15 21:50:54,080 : INFO : EPOCH 20: training on 520609 raw words (518324 effective words) took 0.9s, 600453 effective words/s


Epoch 20, Loss: 1164688.0


2025-04-15 21:50:55,040 : INFO : EPOCH 21: training on 520609 raw words (518240 effective words) took 1.0s, 542558 effective words/s


Epoch 21, Loss: 1098140.0


2025-04-15 21:50:55,912 : INFO : EPOCH 22: training on 520609 raw words (518185 effective words) took 0.9s, 596975 effective words/s


Epoch 22, Loss: 1166002.0


2025-04-15 21:50:56,780 : INFO : EPOCH 23: training on 520609 raw words (518281 effective words) took 0.9s, 599052 effective words/s


Epoch 23, Loss: 1190240.0


2025-04-15 21:50:57,630 : INFO : EPOCH 24: training on 520609 raw words (518324 effective words) took 0.8s, 613194 effective words/s


Epoch 24, Loss: 1168594.0


2025-04-15 21:50:58,503 : INFO : EPOCH 25: training on 520609 raw words (518322 effective words) took 0.9s, 595994 effective words/s


Epoch 25, Loss: 1141878.0


2025-04-15 21:50:59,373 : INFO : EPOCH 26: training on 520609 raw words (518285 effective words) took 0.9s, 597632 effective words/s


Epoch 26, Loss: 1125674.0


2025-04-15 21:51:00,239 : INFO : EPOCH 27: training on 520609 raw words (518193 effective words) took 0.9s, 601647 effective words/s


Epoch 27, Loss: 1040336.0


2025-04-15 21:51:01,087 : INFO : EPOCH 28: training on 520609 raw words (518217 effective words) took 0.8s, 612898 effective words/s


Epoch 28, Loss: 1110132.0


2025-04-15 21:51:01,943 : INFO : EPOCH 29: training on 520609 raw words (518195 effective words) took 0.9s, 608903 effective words/s
2025-04-15 21:51:01,943 : INFO : Word2Vec lifecycle event {'msg': 'training on 15618270 raw words (15547904 effective words) took 26.6s, 584708 effective words/s', 'datetime': '2025-04-15T21:51:01.943086', 'gensim': '4.3.3', 'python': '3.10.16 | packaged by Anaconda, Inc. | (main, Dec 11 2024, 16:19:12) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'train'}
2025-04-15 21:51:01,944 : INFO : Word2Vec lifecycle event {'params': 'Word2Vec<vocab=3877, vector_size=100, alpha=0.025>', 'datetime': '2025-04-15T21:51:01.944176', 'gensim': '4.3.3', 'python': '3.10.16 | packaged by Anaconda, Inc. | (main, Dec 11 2024, 16:19:12) [MSC v.1929 64 bit (AMD64)]', 'platform': 'Windows-10-10.0.26100-SP0', 'event': 'created'}


Epoch 29, Loss: 1054848.0
Training completed in 26.68 seconds

Loss decrease between epochs:
From 5 to 10 epochs: 8.14% decrease
From 10 to 15 epochs: 6.28% decrease
From 15 to 20 epochs: 3.16% decrease
From 20 to 25 epochs: 0.26% decrease
From 25 to 30 epochs: 11.89% decrease

Diminishing returns detected at 20 epochs (only 3.16% improvement)

Epoch testing summary:
 Epochs  Final Loss  Training Time (s)
      5   1439677.5               4.07
     10   1322545.0               8.33
     15   1239458.0              12.76
     20   1200246.0              17.29
     25   1197142.0              21.90
     30   1054848.0              26.68

Recommended number of epochs based on convergence: 20

To use this value in your original code, replace:
epoches_num = 10  # Current value
with:
epoches_num = 20  # Optimized value based on convergence
