In [1]:
# retraining the bi-encoder model based on user feedback collected from notebooks 12 and 13

# 1. Check feedback statistics (how many actions, what types)
# 2. Visualize feedback distribution
# 3. Configure retraining parameters
# 4. Run retraining with best practices
# 5. Evaluate new model vs. old model


# - Low learning rate (1e-5) to avoid catastrophic forgetting
# - Few epochs (1-2) for fine-tuning
# - Mix ratio: 20% feedback + 80% original training data
# - Hard negative mining from "Not Interested" actions
# - Model versioning with timestamps


# setup
import os
import sys
from pathlib import Path

# find project root
cwd = os.getcwd()
if 'notebooks' in cwd:
    PROJECT_ROOT = os.path.dirname(os.path.dirname(cwd))
else:
    PROJECT_ROOT = cwd
    
os.chdir(PROJECT_ROOT)
sys.path.insert(0, PROJECT_ROOT)

print(f"Project root: {PROJECT_ROOT}")

Project root: /home/developer/project


In [2]:
# imports
import sqlite3
import pandas as pd
import numpy as np
from datetime import datetime

# our modules
from demo.scripts.feedback_storage import (
    get_action_count,
    get_action_summary,
    get_feedback_pairs,
    DB_PATH
)
from demo.scripts.model_retrainer import (
    check_retrain_needed,
    retrain_from_feedback,
    get_latest_model,
    collect_training_data
)

print(f"Feedback database: {DB_PATH}")
print(f"Database exists: {Path(DB_PATH).exists()}")

Feedback database: /home/developer/project/demo/data/feedback/feedback.db
Database exists: True


In [3]:
# get current feedback stats
summary = get_action_summary()
print("FEEDBACK STATISTICS")
total = summary.get('total', 0)
print(f"\nTotal actions: {total}")

# meaningful actions (for training)
meaningful = get_action_count()
print(f"Meaningful actions (weight != 0): {meaningful}")

# by action type
by_action = summary.get('by_action', {})
if by_action:
    print("\nActions by type:")
    print("-" * 40)
    for action, data in sorted(by_action.items(), key=lambda x: x[1]['count'], reverse=True):
        count = data['count']
        weight = data['total_weight']
        pct = 100 * count / total if total > 0 else 0
        print(f"  {action:18s} {count:4d} ({pct:5.1f}%)  weight: {weight:+.1f}")

# by role
by_role = summary.get('by_role', {})
if by_role:
    print("\nActions by role:")
    for role, count in by_role.items():
        print(f"  {role}: {count}")

FEEDBACK STATISTICS

Total actions: 44
Meaningful actions (weight != 0): 38

Actions by type:
----------------------------------------
  view_full            20 ( 45.5%)  weight: +6.0
  apply                 9 ( 20.5%)  weight: +9.0
  skip                  6 ( 13.6%)  weight: +0.0
  save                  4 (  9.1%)  weight: +2.0
  contact               3 (  6.8%)  weight: +3.0
  not_interested        2 (  4.5%)  weight: -1.0

Actions by role:
  job_seeker: 27
  recruiter: 17


In [4]:
# check training pairs
positive, negatives = get_feedback_pairs(min_positive_weight=0.5)

print("TRAINING DATA AVAILABLE")
print(f"\nPositive pairs (Apply/Contact/Hire): {len(positive)}")
print(f"Hard negatives (Not Interested): {len(negatives)}")

if len(positive) > 0:
    print("\nSample positive pair:")
    cv, job = positive[0]
    print(f"  CV: {cv[:100]}...")
    print(f"  Job: {job[:100]}...")

if len(negatives) > 0:
    print("\nSample hard negative:")
    cv, job = negatives[0]
    print(f"  CV: {cv[:100]}...")
    print(f"  Job: {job[:100]}...")

TRAINING DATA AVAILABLE

Positive pairs (Apply/Contact/Hire): 16
Hard negatives (Not Interested): 2

Sample positive pair:
  CV: Senior Python Developer with 8 years of experience in Django, PostgreSQL,
and AWS. Led teams of 5+ e...
  Job: Role of Senior Engineer / Python Developer with Django at Allnessjobs in Austin, TX. Required skills...

Sample hard negative:
  CV: Senior Python Developer with 8 years of experience in Django, PostgreSQL,
and AWS. Led teams of 5+ e...
  Job: Role of Senior Python Developer at Oxygen in Princeton, NJ. Required skills: Python, Django, Django ...


In [5]:
# check if ready for retraining
THRESHOLD = 50  # recommended minimum

needed, count = check_retrain_needed(threshold=THRESHOLD)

print("RETRAINING READINESS")
print(f"\nCurrent meaningful actions: {count}")
print(f"Threshold: {THRESHOLD}")
print(f"Ready for retraining: {'YES' if needed else 'NO'}")

if not needed:
    remaining = THRESHOLD - count
    print(f"\nNeed {remaining} more actions before recommended retraining.")
    print("You can still force retrain with a lower threshold.")
else:
    print("\nRecommendation: Proceed with retraining!")

RETRAINING READINESS

Current meaningful actions: 38
Threshold: 50
Ready for retraining: NO

Need 12 more actions before recommended retraining.
You can still force retrain with a lower threshold.


In [6]:
# show current models
print("AVAILABLE MODELS")

base_model = Path(PROJECT_ROOT) / "training" / "output" / "models" / "cv-job-matcher-e5"
retrained_dir = Path(PROJECT_ROOT) / "training" / "output" / "models" / "retrained"

print(f"\nBase model: {base_model}")
print(f"  Exists: {base_model.exists()}")

if retrained_dir.exists():
    retrained_models = sorted(retrained_dir.glob("cv-job-matcher-*"))
    if retrained_models:
        print(f"\nRetrained models ({len(retrained_models)}):")
        for m in retrained_models[-5:]:  # show last 5
            print(f"  - {m.name}")
    else:
        print("\nNo retrained models yet.")
else:
    print("\nNo retrained models yet.")

latest = get_latest_model()
print(f"\nLatest model in use: {latest}")

AVAILABLE MODELS

Base model: /home/developer/project/training/output/models/cv-job-matcher-e5
  Exists: True

No retrained models yet.

Latest model in use: /home/developer/project/training/output/models/cv-job-matcher-e5


In [7]:
# RETRAINING CONFIGURATION

config = {
    'threshold': 10,       # set low to force retrain, 50 for production
    'epochs': 2,           # 1-2 recommended
    'batch_size': 32,      # 32 works for most GPUs
    'learning_rate': 1e-5, # low rate for fine-tuning
    'mix_ratio': 0.2       # 20% feedback, 80% original
}

print("RETRAINING CONFIGURATION")
print("=" * 50)
for key, value in config.items():
    print(f"  {key}: {value}")

# preview what data will be used
print("\nDATA PREVIEW:")
pairs, negs = collect_training_data(mix_ratio=config['mix_ratio'])
print(f"  Total training pairs: {len(pairs)}")
print(f"  Hard negatives available: {len(negs)}")

2026-01-28 18:11:11,577 - Collected 16 positive pairs from feedback
2026-01-28 18:11:11,577 - Collected 2 hard negatives from feedback
2026-01-28 18:11:11,618 - Loaded 5839 original training pairs
2026-01-28 18:11:11,619 - Using 64 original pairs (sampled)
2026-01-28 18:11:11,619 - Total training pairs: 80
2026-01-28 18:11:11,619 -   - From feedback: 16 (20.0%)
2026-01-28 18:11:11,619 -   - From original: 64 (80.0%)


RETRAINING CONFIGURATION
  threshold: 10
  epochs: 2
  batch_size: 32
  learning_rate: 1e-05
  mix_ratio: 0.2

DATA PREVIEW:
  Total training pairs: 80
  Hard negatives available: 2


In [8]:
# RUN RETRAINING
# Set RUN_RETRAIN = True to execute

RUN_RETRAIN = False  # <-- Change to True to run

if RUN_RETRAIN:
    print("Starting retraining")
    
    result = retrain_from_feedback(
        threshold=config['threshold'],
        epochs=config['epochs'],
        batch_size=config['batch_size'],
        learning_rate=config['learning_rate'],
        mix_ratio=config['mix_ratio']
    )
    
    if result['success']:
        print("\n" + "=" * 50)
        print("RETRAINING COMPLETE!")
        print("=" * 50)
        print(f"\nNew model saved to:")
        print(f"  {result['model_path']}")
        print(f"\nTraining statistics:")
        print(f"  Actions used: {result['action_count']}")
        print(f"  Training pairs: {result['training_pairs']}")
        print(f"  Hard negatives: {result['hard_negatives']}")
        print(f"  Epochs: {result['epochs']}")
        print(f"  Training time: {result['training_time']:.1f}s")
        print("\nTo use the new model:")
        print("  1. Restart notebook 12 or 13")
        print("  2. Or update MODEL_PATH to point to the new model")
    else:
        print(f"\nRetraining skipped: {result.get('reason', 'unknown')}")
else:
    print("Retraining not enabled.")
    print("Set RUN_RETRAIN = True in the cell above to run.")

Retraining not enabled.
Set RUN_RETRAIN = True in the cell above to run.


In [9]:
# compare models (only if retrained model exists)
import torch
from sentence_transformers import SentenceTransformer

base_path = Path(PROJECT_ROOT) / "training" / "output" / "models" / "cv-job-matcher-e5"
latest_path = get_latest_model()

if str(latest_path) != str(base_path):
    print("MODEL COMPARISON")
    
    # load both models
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f"\nLoading models on {device}")
    
    base_model = SentenceTransformer(str(base_path), device=device)
    new_model = SentenceTransformer(str(latest_path), device=device)
    
    # test queries
    test_queries = [
        "query: Python developer with Django and AWS experience",
        "query: Data scientist with machine learning and NLP skills",
        "query: Frontend developer with React and TypeScript"
    ]
    
    test_docs = [
        "passage: Senior Python Developer at TechCorp. Required: Django, AWS, PostgreSQL.",
        "passage: ML Engineer position. Skills: Python, TensorFlow, NLP, statistics.",
        "passage: React Developer needed. Must know TypeScript, Redux, CSS."
    ]
    
    print("\nComparing embeddings on test queries:")
    
    for i, (query, doc) in enumerate(zip(test_queries, test_docs)):
        # encode with both models
        base_q = base_model.encode(query, normalize_embeddings=True)
        base_d = base_model.encode(doc, normalize_embeddings=True)
        base_sim = float(np.dot(base_q, base_d))
        
        new_q = new_model.encode(query, normalize_embeddings=True)
        new_d = new_model.encode(doc, normalize_embeddings=True)
        new_sim = float(np.dot(new_q, new_d))
        
        print(f"\nTest {i+1}:")
        print(f"  Query: {query[7:50]}...")
        print(f"  Doc: {doc[9:50]}...")
        print(f"  Base similarity: {base_sim:.4f}")
        print(f"  New similarity:  {new_sim:.4f}")
        print(f"  Difference: {new_sim - base_sim:+.4f}")
else:
    print("No retrained model available for comparison.")
    print("Run retraining first.")

No retrained model available for comparison.
Run retraining first.


In [10]:
# show retraining history from database
print("RETRAINING HISTORY")

if Path(DB_PATH).exists():
    conn = sqlite3.connect(str(DB_PATH))
    
    try:
        history = conn.execute('''
            SELECT model_version, previous_model, num_actions_used, 
                   num_positive_pairs, num_hard_negatives, training_time_sec, timestamp
            FROM retraining_log
            ORDER BY timestamp DESC
            LIMIT 10
        ''').fetchall()
        
        if history:
            print(f"\nLast {len(history)} retraining runs:")
            for row in history:
                version, prev, actions, pairs, negs, time_sec, ts = row
                print(f"\n  {ts}")
                print(f"    Model version: {version}")
                print(f"    Actions used: {actions}")
                print(f"    Training pairs: {pairs}")
                print(f"    Hard negatives: {negs}")
                print(f"    Training time: {time_sec:.1f}s")
        else:
            print("\nNo retraining runs recorded yet.")
    except sqlite3.OperationalError:
        print("\nRetraining log table not found (no retraining done yet).")
    finally:
        conn.close()
else:
    print("\nFeedback database not found.")

RETRAINING HISTORY

No retraining runs recorded yet.
