In [8]:
import json
from dataset import InterviewDataset,HierarchicalInterviewDataset
from torch.utils.data import Dataset, DataLoader
import yaml
from transformers import AlbertTokenizer,Trainer,TrainingArguments
import torch
torch.autograd.set_detect_anomaly(True)


<torch.autograd.anomaly_mode.set_detect_anomaly at 0x41e9fb370>

In [9]:
config_path = "config.yaml"
device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")

with open(config_path, "r") as file:
    config = yaml.safe_load(file)

with open(config["train"]["train_data_path"], "r") as f:
    train_data = json.load(f)
with open(config["train"]["val_data_path"], "r") as f:
    val_data = json.load(f)
with open(config["train"]["test_data_path"], "r") as f:
    test_data = json.load(f)

tokenizer = AlbertTokenizer.from_pretrained("albert-base-v2")

train_dataset = InterviewDataset(train_data, tokenizer)
val_dataset = InterviewDataset(val_data, tokenizer)
test_dataset = InterviewDataset(test_data, tokenizer)
train_loader = DataLoader(train_dataset, batch_size=config["train"]["batch_size"], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=config["train"]["batch_size"], shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [10]:
import time
import torch
from transformers import Trainer, TrainingArguments
from model import HongzhenAlbertForRegression

model = HongzhenAlbertForRegression("albert-base-v2", num_outputs=3)
model.to(device)

HongzhenAlbertForRegression(
  (albert): AlbertModel(
    (embeddings): AlbertEmbeddings(
      (word_embeddings): Embedding(30000, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0, inplace=False)
    )
    (encoder): AlbertTransformer(
      (embedding_hidden_mapping_in): Linear(in_features=128, out_features=768, bias=True)
      (albert_layer_groups): ModuleList(
        (0): AlbertLayerGroup(
          (albert_layers): ModuleList(
            (0): AlbertLayer(
              (full_layer_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (attention): AlbertSdpaAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768,

In [22]:
model.eval()

with torch.no_grad():
    total_loss = 0
    num_batches = 0
    
    for batch in val_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device) 
        
        loss, _ = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        print(_)
        total_loss += loss.item()
        num_batches += 1
    
    avg_loss = total_loss / num_batches
print(f"Validation Loss Before Training: {avg_loss}")

tensor([[0.0000, 0.0772, 0.0000],
        [0.0000, 0.0901, 0.0000],
        [0.0000, 0.1791, 0.0000],
        [0.0000, 0.0975, 0.0000],
        [0.0000, 0.1554, 0.0000],
        [0.0000, 0.1279, 0.0000],
        [0.0000, 0.1032, 0.0000],
        [0.0000, 0.1490, 0.0000]], device='mps:0')
tensor([[0.0000, 0.1080, 0.0000],
        [0.0000, 0.0084, 0.0000],
        [0.0000, 0.1341, 0.0000],
        [0.0000, 0.0445, 0.0000],
        [0.0000, 0.2013, 0.0000],
        [0.0000, 0.0756, 0.0000],
        [0.0000, 0.1847, 0.0000],
        [0.0000, 0.1132, 0.0000]], device='mps:0')
tensor([[0.0000, 0.1489, 0.0000],
        [0.0000, 0.0837, 0.0000],
        [0.0000, 0.0797, 0.0000],
        [0.0000, 0.2201, 0.0000],
        [0.0000, 0.1147, 0.0000],
        [0.0000, 0.1197, 0.0000],
        [0.0000, 0.1543, 0.0000],
        [0.0000, 0.1446, 0.0000]], device='mps:0')
tensor([[0.0000, 0.2102, 0.0000],
        [0.0000, 0.1379, 0.0000],
        [0.0000, 0.1517, 0.0000],
        [0.0000, 0.1255, 0.0000

In [12]:
from transformers import TrainerCallback

model.train()

class TimeCallback(TrainerCallback):
    def __init__(self):
        self.epoch_start_time = None
        self.epoch_times = []

    def on_epoch_begin(self, args, state, control, **kwargs):
        self.epoch_start_time = time.time()

    def on_epoch_end(self, args, state, control, **kwargs):
        epoch_time = time.time() - self.epoch_start_time
        self.epoch_times.append(epoch_time)
        print(f"Epoch {state.epoch} completed in {epoch_time:.2f} seconds.")
time_callback = TimeCallback()
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=config["train"]["max_epochs"],
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    save_strategy="epoch"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    callbacks=[time_callback]
)

start_time = time.time()

trainer.train()

end_time = time.time()

trainer.save_model("./saved_model")

training_time = end_time - start_time
print(f"Training Time: {training_time:.2f} seconds")

test_batch = next(iter(torch.utils.data.DataLoader(val_dataset, batch_size=4)))

input_ids = test_batch["input_ids"].to(device)
attention_mask = test_batch["attention_mask"].to(device)
labels = test_batch["labels"].to(device)

start_inference = time.time()

num_inference_steps = 100
for _ in range(num_inference_steps):
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)

end_inference = time.time()

total_inference_time = end_inference - start_inference
latency = total_inference_time / num_inference_steps
print(f"Average Latency: {latency:.4f} seconds per batch")

throughput = (len(input_ids) * num_inference_steps) / total_inference_time
print(f"Throughput: {throughput:.2f} samples per second")




Epoch,Training Loss,Validation Loss
1,15.6856,11.050546
2,7.2083,5.535478
3,3.7705,3.504957


Epoch 1.0 completed in 37.05 seconds.
Epoch 2.0 completed in 37.12 seconds.
Epoch 3.0 completed in 37.17 seconds.


KeyboardInterrupt: 

In [6]:
model.eval()

with torch.no_grad():
    total_loss = 0
    num_batches = 0
    
    for batch in val_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device) 
        
        loss, _ = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        print(loss.item(),_)
        
        total_loss += loss.item()
        num_batches += 1
    
    avg_loss = total_loss / num_batches
print(f"Validation Loss Before Training: {avg_loss}")

0.6810821294784546 tensor([[5.2185, 5.1319, 5.0372]], device='mps:0')
0.11642146110534668 tensor([[5.2728, 5.1545, 5.0070]], device='mps:0')
0.768265426158905 tensor([[4.9239, 4.7962, 4.5402]], device='mps:0')
0.5067713260650635 tensor([[5.0608, 4.9485, 4.7374]], device='mps:0')
0.8978704214096069 tensor([[4.7967, 4.6630, 4.4623]], device='mps:0')
0.6006026864051819 tensor([[4.8445, 4.6274, 4.3532]], device='mps:0')
1.4578046798706055 tensor([[4.9492, 4.7967, 4.4427]], device='mps:0')
0.5027912855148315 tensor([[5.2679, 5.1663, 5.0085]], device='mps:0')
0.009636633098125458 tensor([[5.2463, 5.1680, 5.0402]], device='mps:0')
0.40736326575279236 tensor([[5.1075, 5.0047, 4.7708]], device='mps:0')
0.0689166709780693 tensor([[4.9920, 4.8334, 4.4663]], device='mps:0')
0.15607339143753052 tensor([[4.8868, 4.7204, 4.3712]], device='mps:0')
0.1524137258529663 tensor([[5.1650, 5.0016, 4.7736]], device='mps:0')
0.9223455190658569 tensor([[5.0733, 4.9011, 4.5763]], device='mps:0')
0.30051541328430

In [1]:
model.eval()

with torch.no_grad():
    total_loss = 0
    num_batches = 0
    
    for batch in test_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device) 
        
        loss, _ = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        print(loss.item(),_)
        
        total_loss += loss.item()
        num_batches += 1
    
    avg_loss = total_loss / num_batches
print(f"Validation Loss Before Training: {avg_loss}")

NameError: name 'model' is not defined