import os, time os.environ['OMP_NUM_THREADS'] = '8' from transformers import AutoTokenizer from transformers import DataCollatorWithPadding import evaluate import numpy as np from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer from torch.profiler import profile, record_function, ProfilerActivity from datasets import load_dataset imdb = load_dataset("imdb",split=['train[:5]','test[:5]']) tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") def preprocess_function(examples): return tokenizer(examples["text"], truncation=True) tokenized_imdb_tr = imdb[0].map(preprocess_function, batched=True) tokenized_imdb_ts = imdb[1].map(preprocess_function, batched=True) data_collator = DataCollatorWithPadding(tokenizer=tokenizer) accuracy = evaluate.load("accuracy") def compute_metrics(eval_pred): predictions, labels = eval_pred predictions = np.argmax(predictions, axis=1) return accuracy.compute(predictions=predictions, references=labels) id2label = {0: "NEGATIVE", 1: "POSITIVE"} label2id = {"NEGATIVE": 0, "POSITIVE": 1} model = AutoModelForSequenceClassification.from_pretrained( "bert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id ) training_args = TrainingArguments( output_dir="my_awesome_model2", learning_rate=2e-5, per_device_train_batch_size=1, per_device_eval_batch_size=1, num_train_epochs=1, weight_decay=0.01, evaluation_strategy="epoch", save_strategy="epoch", load_best_model_at_end=True, # push_to_hub=True, ) trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_imdb_tr, eval_dataset=tokenized_imdb_ts, tokenizer=tokenizer, data_collator=data_collator, compute_metrics=compute_metrics, ) # python -m venv venv_10_3 # # pip install torch==2.0 transformers pandas datasets scikit-learn evaluate # tb0 = torch._C._profiler.gather_traceback(python=True, script=True, cpp=True) with profile(with_stack=True, profile_memory=True, record_shapes=True) as prof: start_time = time.time() trainer.train() end_time = time.time() # symbolized_tracebacks = torch._C._profiler.symbolize_tracebacks([tb0]) # python test2.py &> pereval_tr_def.txt # print(symbolized_tracebacks[0]) print(prof.key_averages().table()) total_time = end_time-start_time print("Training Time Taken:") print(total_time) del os.environ['OMP_NUM_THREADS'] # Print the profiling results