In [1]:
# Install the necessary dependencies
!pip install transformers datasets tqdm tk

In [2]:
# Import necessary libraries
import tkinter as tk
from tkinter import scrolledtext
import threading
import time
from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset
import torch


In [3]:
# Define the TrainingProgressWindow UI
class TrainingProgressWindow:
    def __init__(self, title="Training Progress"):
        self.window = tk.Tk()
        self.window.title(title)
        
        # Create a ScrolledText widget to show logs
        self.log_box = scrolledtext.ScrolledText(self.window, width=80, height=20, wrap=tk.WORD)
        self.log_box.grid(row=0, column=0, padx=10, pady=10)
        
        # Create a Progress Bar widget
        self.progress = tk.DoubleVar()
        self.progress_bar = tk.ttk.Progressbar(self.window, variable=self.progress, maximum=100, length=300)
        self.progress_bar.grid(row=1, column=0, padx=10, pady=10)
        
        # Start the Tkinter event loop in a separate thread
        threading.Thread(target=self._start_gui, daemon=True).start()

    def _start_gui(self):
        self.window.mainloop()

    def update_log(self, message):
        self.log_box.insert(tk.END, message + "\n")
        self.log_box.yview(tk.END)

    def update_progress(self, progress_value):
        self.progress.set(progress_value)
        self.window.update_idletasks()

    def close(self):
        self.window.quit()
        self.window.destroy()


In [4]:
# Load dataset and tokenizer for simulation
dataset = load_dataset('imdb')  # Example dataset
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

# Preprocess dataset
def tokenize_function(examples):
    return tokenizer(examples['text'], padding="max_length", truncation=True)
train_data = dataset['train'].map(tokenize_function, batched=True)
val_data = dataset['test'].map(tokenize_function, batched=True)

# Define model
model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Training Arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    evaluation_strategy='epoch',
    logging_dir='./logs',
    logging_steps=10,
    load_best_model_at_end=True
)


In [5]:
# Initialize the progress window
progress_window = TrainingProgressWindow()

# Simulate training loop with progress and logging updates
def simulated_training():
    total_steps = 100  # total steps for training simulation
    for epoch in range(3):
        progress_window.update_log(f"Epoch {epoch+1} starting...")
        for step in range(total_steps):
            time.sleep(0.1)  # Simulate a training step
            progress_value = (epoch * total_steps + step + 1) / (3 * total_steps) * 100
            progress_window.update_progress(progress_value)
        progress_window.update_log(f"Epoch {epoch+1} completed.")

# Run simulated training in a separate thread
threading.Thread(target=simulated_training, daemon=True).start()


In [6]:
# Start the training process and show the window
progress_window.window.mainloop()  # Keep the UI open to show logs and progress