In [22]:
import os
from sklearn.model_selection import train_test_split

# Directory containing text files
data_dir = 'data'
all_text = ''

# Reading all text files from the directory
for file_name in os.listdir(data_dir):
    if file_name.endswith('.txt'):
        with open(os.path.join(data_dir, file_name), 'r', encoding='utf-8') as file:
            all_text += file.read() + '\n'

# Splitting data into training (80%) and evaluation (20%) sets
train_text, eval_text = train_test_split(all_text.split('\n'), test_size=0.2, random_state=42)

# Joining lines to form complete text for training and evaluation
train_text = '\n'.join(train_text)
eval_text = '\n'.join(eval_text)

# Saving the training and evaluation data to separate files
with open('train.txt', 'w', encoding='utf-8') as train_file:
    train_file.write(train_text)

with open('eval.txt', 'w', encoding='utf-8') as eval_file:
    eval_file.write(eval_text)

print("Data split and saved successfully.")


Data split and saved successfully.


In [6]:
from transformers import Trainer, TrainingArguments, AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset

# Load tokenizer and model (you can replace 'gpt2' with any other LLM)
model_name = 'gpt2'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Load the text files into datasets
train_dataset = load_dataset('text', data_files='train.txt')['train']
eval_dataset = load_dataset('text', data_files='eval.txt')['train']

from transformers import DataCollatorForLanguageModeling

# Set pad_token if not already set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Tokenize the datasets and include labels
def tokenize_function(examples):
    outputs = tokenizer(examples['text'], padding='max_length', truncation=True, max_length=512)
    outputs["labels"] = outputs["input_ids"].copy()  # Set labels to be the same as input_ids for causal LM
    return outputs

train_dataset = train_dataset.map(tokenize_function, batched=True, remove_columns=['text'])
eval_dataset = eval_dataset.map(tokenize_function, batched=True, remove_columns=['text'])

# Define a data collator that will dynamically pad the inputs and labels
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False  # Set to False for causal language modeling
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

# Start training
trainer.train()

# Save the fine-tuned model
model.save_pretrained('./fine-tuned-model')
tokenizer.save_pretrained('./fine-tuned-model')

print("Fine-tuning complete and model saved.")



Map: 100%|██████████| 56/56 [00:00<00:00, 1866.71 examples/s]
Map: 100%|██████████| 15/15 [00:00<00:00, 833.27 examples/s]
  trainer = Trainer(

[A
                                      
[A                                            

  0%|          | 0/42 [10:15<?, ?it/s]       
[A
[A

{'eval_loss': 4.82485294342041, 'eval_runtime': 0.3054, 'eval_samples_per_second': 49.118, 'eval_steps_per_second': 13.098, 'epoch': 1.0}



[A
                                      
[A                                            

  0%|          | 0/42 [10:19<?, ?it/s]       
[A
[A

{'eval_loss': 4.740758895874023, 'eval_runtime': 0.3216, 'eval_samples_per_second': 46.636, 'eval_steps_per_second': 12.436, 'epoch': 2.0}



[A
[A
                                      
[A                                            

  0%|          | 0/42 [10:25<?, ?it/s]       
[A
                                      
100%|██████████| 42/42 [00:14<00:00,  2.91it/s]


{'eval_loss': 4.708907604217529, 'eval_runtime': 0.402, 'eval_samples_per_second': 37.312, 'eval_steps_per_second': 9.95, 'epoch': 3.0}
{'train_runtime': 14.4416, 'train_samples_per_second': 11.633, 'train_steps_per_second': 2.908, 'train_loss': 4.7641652425130205, 'epoch': 3.0}
Fine-tuning complete and model saved.


In [7]:
from transformers import Trainer, TrainingArguments, AutoModelForMaskedLM, AutoTokenizer
from datasets import load_dataset

# Load tokenizer and model (using 'xlm-roberta-base' for multilingual capabilities)
model_name = 'xlm-roberta-base'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForMaskedLM.from_pretrained(model_name)

# Load the text files into datasets
train_dataset = load_dataset('text', data_files='train.txt')['train']
eval_dataset = load_dataset('text', data_files='eval.txt')['train']

from transformers import DataCollatorForLanguageModeling

# Set pad_token if not already set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Tokenize the datasets and include labels
def tokenize_function(examples):
    outputs = tokenizer(examples['text'], padding='max_length', truncation=True, max_length=512)
    outputs["labels"] = outputs["input_ids"].copy()  # Set labels to be the same as input_ids for masked LM
    return outputs

train_dataset = train_dataset.map(tokenize_function, batched=True, remove_columns=['text'])
eval_dataset = eval_dataset.map(tokenize_function, batched=True, remove_columns=['text'])

# Define a data collator that will dynamically pad the inputs and labels
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=True,  # Enable masked language modeling for training
    mlm_probability=0.15  # Set the probability of masking tokens
)

# Define training arguments with constraints for limited VRAM
training_args = TrainingArguments(
    output_dir='./results',
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=2,  # Adjust batch size as needed
    per_device_eval_batch_size=2,   # Adjust as needed
    gradient_accumulation_steps=4,  # Increase to simulate a larger batch size with less VRAM
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir='./logs',
    logging_steps=500,
    fp16=True,  # Use mixed precision for better VRAM efficiency
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

# Start training
trainer.train()

# Save the fine-tuned model
model.save_pretrained('./fine-tuned-model')
tokenizer.save_pretrained('./fine-tuned-model')

print("Fine-tuning complete and model saved.")


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForMaskedLM: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing XLMRobertaForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Map: 100%|██████████| 56/56 [00:00<00:00, 800.00 examples/s]
Map: 100%|██████████| 15/15 

{'eval_loss': 2.205465078353882, 'eval_runtime': 1.0121, 'eval_samples_per_second': 14.82, 'eval_steps_per_second': 7.904, 'epoch': 1.0}



[A
[A
[A
[A
                                      
[A                                            

  0%|          | 0/42 [26:37<?, ?it/s]       
[A
[A

{'eval_loss': 2.551729917526245, 'eval_runtime': 1.1791, 'eval_samples_per_second': 12.722, 'eval_steps_per_second': 6.785, 'epoch': 2.0}



[A
[A
[A
[A
                                      
[A                                            

  0%|          | 0/42 [27:06<?, ?it/s]       
[A
[A

{'eval_loss': 2.179887056350708, 'eval_runtime': 1.046, 'eval_samples_per_second': 14.34, 'eval_steps_per_second': 7.648, 'epoch': 3.0}


                                      
100%|██████████| 21/21 [01:17<00:00,  3.68s/it]


{'train_runtime': 77.2222, 'train_samples_per_second': 2.176, 'train_steps_per_second': 0.272, 'train_loss': 2.478679656982422, 'epoch': 3.0}
Fine-tuning complete and model saved.


In [12]:
from transformers import AutoModelForMaskedLM, AutoTokenizer
import torch

# Step 1: Load the fine-tuned model and tokenizer
model_path = './fine-tuned-model'
model = AutoModelForMaskedLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
model.eval()  # Set the model to evaluation mode

# Step 2: Function to run inference and predict masked tokens
def predict_masked_token(text):
    # Tokenize input with <mask>
    inputs = tokenizer(text, return_tensors="pt")
    
    # Generate predictions
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = outputs.logits
    
    # Find the index of the masked token and get the predicted token
    masked_index = torch.where(inputs['input_ids'] == tokenizer.mask_token_id)[1]
    predicted_token_id = torch.argmax(predictions[0, masked_index, :])
    predicted_token = tokenizer.decode(predicted_token_id)
    
    return predicted_token

# Step 3: Use the function to answer a question or complete a sentence
example_text = "The capital of <mask> is Paris."
answer = predict_masked_token(example_text)
print(f"Input: {example_text}")
print(f"Predicted answer: {answer}")

# Example for asking a question
question = "New media as software = Recensão <mask>."
answer = predict_masked_token(question)
print(f"Input: {question}")
print(f"Predicted answer: {answer}")


Input: The capital of <mask> is Paris.
Predicted answer: France
Input: New media as software = Recensão <mask>.
Predicted answer: Software


In [23]:
# Import necessary libraries
import os
from transformers import (
    T5Config, T5Tokenizer, T5ForConditionalGeneration, TrainingArguments, Trainer
)
from datasets import Dataset

# 1. Create a custom T5 configuration
config = T5Config(
    vocab_size=32128,
    n_positions=512,
    d_model=512,
    d_kv=64,
    d_ff=2048,
    num_layers=6,
    num_decoder_layers=6,
    num_heads=8,
    relative_attention_num_buckets=32,
    dropout_rate=0.1,
    layer_norm_epsilon=1e-6,
    initializer_factor=1.0,
    is_encoder_decoder=True,
    pad_token_id=0,
    eos_token_id=1,
    decoder_start_token_id=0  # Set to the pad_token_id
)


# 2. Load the tokenizer
tokenizer = T5Tokenizer.from_pretrained('t5-base')

# 3. Initialize the T5 model with the custom configuration
model = T5ForConditionalGeneration(config)

# 4. Load and preprocess data with chunks and in-book train-test splits
train_chunks = []
test_chunks = []

for filename in os.listdir('data'):
    if filename.endswith('.txt'):
        with open(os.path.join('data', filename), 'r', encoding='utf-8') as file:
            content = file.read().strip()
            
            # Tokenize the content into tokens of length 512
            tokenized_content = tokenizer.encode(content, truncation=False, return_tensors="pt")[0]
            
            # Split the tokenized content into chunks of 512 tokens
            for i in range(0, len(tokenized_content), 512):
                chunk = tokenized_content[i:i+512]
                
                # Decode the chunk back to text and store it as a chunk
                chunk_text = tokenizer.decode(chunk, skip_special_tokens=True).strip()
                if chunk_text:  # Ensure the chunk is not empty
                    train_chunks.append(chunk_text)
            
            # Split into training and testing chunks (80/20 split)
            split_index = int(len(train_chunks) * 0.8)
            test_chunks.extend(train_chunks[split_index:])
            train_chunks = train_chunks[:split_index]

# Convert the train and test chunks into Dataset objects
train_dataset = Dataset.from_dict({"text": train_chunks})
test_dataset = Dataset.from_dict({"text": test_chunks})


# Tokenize the datasets
def preprocess_function(examples):
    model_inputs = tokenizer(
        examples["text"],
        padding="longest",
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )
    # Set the input as the label for unsupervised learning
    model_inputs["labels"] = model_inputs["input_ids"].clone()
    return model_inputs

# Apply preprocessing to the training and test data
tokenized_train_data = train_dataset.map(preprocess_function, batched=True, remove_columns=["text"])
tokenized_test_data = test_dataset.map(preprocess_function, batched=True, remove_columns=["text"])

# 5. Set up training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=100,
    per_device_train_batch_size=1,  # Adjust to fit VRAM
    gradient_accumulation_steps=4,
    evaluation_strategy="steps",
    eval_steps=500,  # Evaluate every 500 steps
    save_steps=500,
    save_total_limit=2,
    logging_dir="./logs",
    logging_steps=100,
    fp16=True,  # Enable mixed precision
    remove_unused_columns=False  # Ensure all columns are retained
)

# 6. Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_data,
    eval_dataset=tokenized_test_data
)

# 7. Train the model
trainer.train()

# 8. Save the trained model and tokenizer
trainer.save_model("./fine_tuned_t5")
tokenizer.save_pretrained("./fine_tuned_t5")

Map: 100%|██████████| 485/485 [00:00<00:00, 768.56 examples/s]
Map: 100%|██████████| 17708/17708 [00:23<00:00, 755.21 examples/s]
  1%|          | 100/12100 [00:30<1:00:00,  3.33it/s]

{'loss': 6.6963, 'grad_norm': 8.378801345825195, 'learning_rate': 4.958677685950414e-05, 'epoch': 0.82}


  2%|▏         | 200/12100 [01:00<56:47,  3.49it/s]  

{'loss': 5.8958, 'grad_norm': 12.086433410644531, 'learning_rate': 4.917355371900827e-05, 'epoch': 1.65}


  2%|▏         | 300/12100 [01:30<57:43,  3.41it/s]  

{'loss': 5.5834, 'grad_norm': 9.473154067993164, 'learning_rate': 4.87603305785124e-05, 'epoch': 2.47}


  3%|▎         | 400/12100 [01:59<57:20,  3.40it/s]  

{'loss': 5.4235, 'grad_norm': 8.992918968200684, 'learning_rate': 4.834710743801653e-05, 'epoch': 3.3}


  4%|▍         | 500/12100 [02:29<1:02:44,  3.08it/s]

{'loss': 5.2344, 'grad_norm': 9.476482391357422, 'learning_rate': 4.793388429752066e-05, 'epoch': 4.12}



  4%|▍         | 500/12100 [06:39<1:02:44,  3.08it/s]

{'eval_loss': 7.236117839813232, 'eval_runtime': 250.3105, 'eval_samples_per_second': 70.744, 'eval_steps_per_second': 8.845, 'epoch': 4.12}


  5%|▍         | 600/12100 [07:12<1:00:24,  3.17it/s]  

{'loss': 5.1583, 'grad_norm': 8.530677795410156, 'learning_rate': 4.75206611570248e-05, 'epoch': 4.95}


  6%|▌         | 700/12100 [07:44<1:05:59,  2.88it/s]

{'loss': 5.0105, 'grad_norm': 8.562764167785645, 'learning_rate': 4.7107438016528926e-05, 'epoch': 5.77}


  7%|▋         | 800/12100 [08:14<58:46,  3.20it/s]  

{'loss': 4.9265, 'grad_norm': 9.934786796569824, 'learning_rate': 4.669421487603306e-05, 'epoch': 6.6}


  7%|▋         | 900/12100 [08:43<1:00:27,  3.09it/s]

{'loss': 4.7926, 'grad_norm': 8.429489135742188, 'learning_rate': 4.6280991735537196e-05, 'epoch': 7.42}


  8%|▊         | 1000/12100 [09:13<53:25,  3.46it/s] 

{'loss': 4.7525, 'grad_norm': 8.050535202026367, 'learning_rate': 4.586776859504133e-05, 'epoch': 8.25}



  8%|▊         | 1000/12100 [13:24<53:25,  3.46it/s]

{'eval_loss': 7.091294765472412, 'eval_runtime': 251.4555, 'eval_samples_per_second': 70.422, 'eval_steps_per_second': 8.805, 'epoch': 8.25}


  9%|▉         | 1100/12100 [14:00<1:01:47,  2.97it/s]  

{'loss': 4.6527, 'grad_norm': 8.725563049316406, 'learning_rate': 4.545454545454546e-05, 'epoch': 9.07}


 10%|▉         | 1200/12100 [14:34<58:11,  3.12it/s]  

{'loss': 4.5476, 'grad_norm': 7.808073043823242, 'learning_rate': 4.504132231404959e-05, 'epoch': 9.9}


 11%|█         | 1300/12100 [15:09<1:02:44,  2.87it/s]

{'loss': 4.4995, 'grad_norm': 7.877635478973389, 'learning_rate': 4.462809917355372e-05, 'epoch': 10.72}


 12%|█▏        | 1400/12100 [15:54<1:22:49,  2.15it/s]

{'loss': 4.4029, 'grad_norm': 8.554482460021973, 'learning_rate': 4.4214876033057856e-05, 'epoch': 11.55}


 12%|█▏        | 1500/12100 [16:37<2:02:35,  1.44it/s]

{'loss': 4.3504, 'grad_norm': 8.313836097717285, 'learning_rate': 4.3801652892561984e-05, 'epoch': 12.37}


                                                      
 12%|█▏        | 1500/12100 [21:10<2:02:35,  1.44it/s]

{'eval_loss': 6.9733734130859375, 'eval_runtime': 272.9511, 'eval_samples_per_second': 64.876, 'eval_steps_per_second': 8.111, 'epoch': 12.37}


 13%|█▎        | 1600/12100 [21:45<53:04,  3.30it/s]    

{'loss': 4.2681, 'grad_norm': 8.52759075164795, 'learning_rate': 4.338842975206612e-05, 'epoch': 13.2}


 14%|█▍        | 1700/12100 [22:18<52:54,  3.28it/s]  

{'loss': 4.2176, 'grad_norm': 8.901269912719727, 'learning_rate': 4.2975206611570254e-05, 'epoch': 14.02}


 15%|█▍        | 1800/12100 [22:52<56:18,  3.05it/s]  

{'loss': 4.1398, 'grad_norm': 8.279951095581055, 'learning_rate': 4.256198347107438e-05, 'epoch': 14.85}


 16%|█▌        | 1900/12100 [23:25<54:05,  3.14it/s]  

{'loss': 4.0836, 'grad_norm': 8.595198631286621, 'learning_rate': 4.214876033057851e-05, 'epoch': 15.67}


 17%|█▋        | 2000/12100 [24:00<53:18,  3.16it/s]  

{'loss': 4.0194, 'grad_norm': 10.841180801391602, 'learning_rate': 4.1735537190082645e-05, 'epoch': 16.49}


                                                    
 17%|█▋        | 2000/12100 [28:26<53:18,  3.16it/s]

{'eval_loss': 6.88667631149292, 'eval_runtime': 266.4374, 'eval_samples_per_second': 66.462, 'eval_steps_per_second': 8.31, 'epoch': 16.49}


 17%|█▋        | 2100/12100 [29:01<53:45,  3.10it/s]    

{'loss': 3.9962, 'grad_norm': 9.193202018737793, 'learning_rate': 4.132231404958678e-05, 'epoch': 17.32}


 18%|█▊        | 2200/12100 [29:30<46:58,  3.51it/s]

{'loss': 3.917, 'grad_norm': 9.272565841674805, 'learning_rate': 4.0909090909090915e-05, 'epoch': 18.14}


 19%|█▉        | 2300/12100 [29:59<47:01,  3.47it/s]

{'loss': 3.8658, 'grad_norm': 9.78098201751709, 'learning_rate': 4.049586776859504e-05, 'epoch': 18.97}


 20%|█▉        | 2400/12100 [30:28<45:38,  3.54it/s]

{'loss': 3.8488, 'grad_norm': 9.63419246673584, 'learning_rate': 4.008264462809918e-05, 'epoch': 19.79}


 21%|██        | 2500/12100 [30:59<52:18,  3.06it/s]  

{'loss': 3.7537, 'grad_norm': 10.415353775024414, 'learning_rate': 3.9669421487603306e-05, 'epoch': 20.62}


                                                    
 21%|██        | 2500/12100 [35:19<52:18,  3.06it/s]

{'eval_loss': 6.8289666175842285, 'eval_runtime': 259.6469, 'eval_samples_per_second': 68.2, 'eval_steps_per_second': 8.527, 'epoch': 20.62}


 21%|██▏       | 2600/12100 [35:55<51:15,  3.09it/s]    

{'loss': 3.7082, 'grad_norm': 9.23658275604248, 'learning_rate': 3.925619834710744e-05, 'epoch': 21.44}


 22%|██▏       | 2700/12100 [36:30<52:26,  2.99it/s]  

{'loss': 3.6993, 'grad_norm': 10.451447486877441, 'learning_rate': 3.884297520661157e-05, 'epoch': 22.27}


 23%|██▎       | 2800/12100 [37:02<51:32,  3.01it/s]  

{'loss': 3.6424, 'grad_norm': 12.360048294067383, 'learning_rate': 3.8429752066115703e-05, 'epoch': 23.09}


 24%|██▍       | 2900/12100 [37:32<52:03,  2.95it/s]

{'loss': 3.6004, 'grad_norm': 9.5986328125, 'learning_rate': 3.801652892561984e-05, 'epoch': 23.92}


 25%|██▍       | 3000/12100 [38:06<46:26,  3.27it/s]  

{'loss': 3.5543, 'grad_norm': 10.398751258850098, 'learning_rate': 3.760330578512397e-05, 'epoch': 24.74}


                                                    
 25%|██▍       | 3000/12100 [42:37<46:26,  3.27it/s]

{'eval_loss': 6.756008148193359, 'eval_runtime': 271.2696, 'eval_samples_per_second': 65.278, 'eval_steps_per_second': 8.162, 'epoch': 24.74}


 26%|██▌       | 3100/12100 [43:12<1:04:59,  2.31it/s]  

{'loss': 3.5119, 'grad_norm': 10.71173095703125, 'learning_rate': 3.71900826446281e-05, 'epoch': 25.57}


 26%|██▋       | 3200/12100 [43:43<47:10,  3.14it/s]  

{'loss': 3.5191, 'grad_norm': 11.558547019958496, 'learning_rate': 3.6776859504132236e-05, 'epoch': 26.39}


 27%|██▋       | 3300/12100 [44:14<41:50,  3.51it/s]

{'loss': 3.4484, 'grad_norm': 10.745762825012207, 'learning_rate': 3.6363636363636364e-05, 'epoch': 27.22}


 28%|██▊       | 3400/12100 [44:45<41:33,  3.49it/s]

{'loss': 3.4251, 'grad_norm': 10.158214569091797, 'learning_rate': 3.59504132231405e-05, 'epoch': 28.04}


 29%|██▉       | 3500/12100 [45:18<46:16,  3.10it/s]  

{'loss': 3.38, 'grad_norm': 11.752439498901367, 'learning_rate': 3.553719008264463e-05, 'epoch': 28.87}


                                                    
 29%|██▉       | 3500/12100 [49:37<46:16,  3.10it/s]

{'eval_loss': 6.795668125152588, 'eval_runtime': 259.1897, 'eval_samples_per_second': 68.321, 'eval_steps_per_second': 8.542, 'epoch': 28.87}


 30%|██▉       | 3600/12100 [50:09<39:10,  3.62it/s]    

{'loss': 3.3667, 'grad_norm': 11.629626274108887, 'learning_rate': 3.512396694214876e-05, 'epoch': 29.69}


 31%|███       | 3700/12100 [50:38<43:34,  3.21it/s]

{'loss': 3.2962, 'grad_norm': 10.828832626342773, 'learning_rate': 3.47107438016529e-05, 'epoch': 30.52}


 31%|███▏      | 3800/12100 [51:09<38:04,  3.63it/s]

{'loss': 3.3004, 'grad_norm': 12.258402824401855, 'learning_rate': 3.429752066115703e-05, 'epoch': 31.34}


 32%|███▏      | 3900/12100 [51:38<41:30,  3.29it/s]

{'loss': 3.2631, 'grad_norm': 13.483973503112793, 'learning_rate': 3.388429752066116e-05, 'epoch': 32.16}


 33%|███▎      | 4000/12100 [52:08<41:21,  3.26it/s]

{'loss': 3.2327, 'grad_norm': 10.779280662536621, 'learning_rate': 3.347107438016529e-05, 'epoch': 32.99}


                                                    
 33%|███▎      | 4000/12100 [56:20<41:21,  3.26it/s]

{'eval_loss': 6.743625640869141, 'eval_runtime': 251.6857, 'eval_samples_per_second': 70.358, 'eval_steps_per_second': 8.797, 'epoch': 32.99}


 34%|███▍      | 4100/12100 [56:57<44:24,  3.00it/s]    

{'loss': 3.1938, 'grad_norm': 12.065994262695312, 'learning_rate': 3.305785123966942e-05, 'epoch': 33.81}


 35%|███▍      | 4200/12100 [57:26<35:42,  3.69it/s]

{'loss': 3.1864, 'grad_norm': 12.930488586425781, 'learning_rate': 3.264462809917356e-05, 'epoch': 34.64}


 36%|███▌      | 4300/12100 [57:56<37:18,  3.48it/s]

{'loss': 3.1479, 'grad_norm': 12.388154029846191, 'learning_rate': 3.2231404958677685e-05, 'epoch': 35.46}


 36%|███▋      | 4400/12100 [58:25<37:40,  3.41it/s]

{'loss': 3.1338, 'grad_norm': 12.389945983886719, 'learning_rate': 3.181818181818182e-05, 'epoch': 36.29}


 37%|███▋      | 4500/12100 [58:58<40:19,  3.14it/s]

{'loss': 3.101, 'grad_norm': 13.095452308654785, 'learning_rate': 3.1404958677685955e-05, 'epoch': 37.11}


                                                    
 37%|███▋      | 4500/12100 [1:03:17<40:19,  3.14it/s]

{'eval_loss': 6.744268894195557, 'eval_runtime': 259.1294, 'eval_samples_per_second': 68.337, 'eval_steps_per_second': 8.544, 'epoch': 37.11}


 38%|███▊      | 4600/12100 [1:03:48<44:33,  2.80it/s]    

{'loss': 3.0666, 'grad_norm': 12.968731880187988, 'learning_rate': 3.099173553719008e-05, 'epoch': 37.94}


 39%|███▉      | 4700/12100 [1:04:19<34:25,  3.58it/s]

{'loss': 3.0527, 'grad_norm': 11.472243309020996, 'learning_rate': 3.057851239669421e-05, 'epoch': 38.76}


 40%|███▉      | 4800/12100 [1:04:49<49:28,  2.46it/s]

{'loss': 3.0039, 'grad_norm': 11.85537052154541, 'learning_rate': 3.016528925619835e-05, 'epoch': 39.59}


 40%|████      | 4900/12100 [1:05:19<34:35,  3.47it/s]

{'loss': 3.0411, 'grad_norm': 12.782691955566406, 'learning_rate': 2.975206611570248e-05, 'epoch': 40.41}


 41%|████▏     | 5000/12100 [1:05:50<34:28,  3.43it/s]

{'loss': 2.9893, 'grad_norm': 12.806731224060059, 'learning_rate': 2.9338842975206616e-05, 'epoch': 41.24}


                                                      
 41%|████▏     | 5000/12100 [1:10:06<34:28,  3.43it/s]

{'eval_loss': 6.726179122924805, 'eval_runtime': 255.3026, 'eval_samples_per_second': 69.361, 'eval_steps_per_second': 8.672, 'epoch': 41.24}


 42%|████▏     | 5100/12100 [1:10:38<45:22,  2.57it/s]    

{'loss': 2.9475, 'grad_norm': 13.565256118774414, 'learning_rate': 2.8925619834710744e-05, 'epoch': 42.06}


 43%|████▎     | 5200/12100 [1:11:12<33:39,  3.42it/s]  

{'loss': 2.9462, 'grad_norm': 12.958394050598145, 'learning_rate': 2.8512396694214875e-05, 'epoch': 42.89}


 44%|████▍     | 5300/12100 [1:11:41<30:52,  3.67it/s]

{'loss': 2.9153, 'grad_norm': 13.223419189453125, 'learning_rate': 2.809917355371901e-05, 'epoch': 43.71}


 45%|████▍     | 5400/12100 [1:12:12<36:26,  3.06it/s]

{'loss': 2.903, 'grad_norm': 13.815834045410156, 'learning_rate': 2.7685950413223145e-05, 'epoch': 44.54}


 45%|████▌     | 5500/12100 [1:12:46<41:47,  2.63it/s]

{'loss': 2.8906, 'grad_norm': 13.465398788452148, 'learning_rate': 2.7272727272727273e-05, 'epoch': 45.36}


                                                      
 45%|████▌     | 5500/12100 [1:16:56<41:47,  2.63it/s]

{'eval_loss': 6.762022495269775, 'eval_runtime': 249.7169, 'eval_samples_per_second': 70.912, 'eval_steps_per_second': 8.866, 'epoch': 45.36}


 46%|████▋     | 5600/12100 [1:17:26<29:25,  3.68it/s]    

{'loss': 2.857, 'grad_norm': 14.012314796447754, 'learning_rate': 2.6859504132231405e-05, 'epoch': 46.19}


 47%|████▋     | 5700/12100 [1:17:55<31:26,  3.39it/s]

{'loss': 2.8471, 'grad_norm': 13.57468032836914, 'learning_rate': 2.644628099173554e-05, 'epoch': 47.01}


 48%|████▊     | 5800/12100 [1:18:23<30:40,  3.42it/s]

{'loss': 2.8203, 'grad_norm': 14.73724365234375, 'learning_rate': 2.6033057851239674e-05, 'epoch': 47.84}


 49%|████▉     | 5900/12100 [1:18:51<28:54,  3.57it/s]

{'loss': 2.8063, 'grad_norm': 14.033075332641602, 'learning_rate': 2.5619834710743802e-05, 'epoch': 48.66}


 50%|████▉     | 6000/12100 [1:19:19<27:09,  3.74it/s]

{'loss': 2.7909, 'grad_norm': 12.858050346374512, 'learning_rate': 2.5206611570247934e-05, 'epoch': 49.48}


                                                      
 50%|████▉     | 6000/12100 [1:23:24<27:09,  3.74it/s]

{'eval_loss': 6.764978408813477, 'eval_runtime': 245.4937, 'eval_samples_per_second': 72.132, 'eval_steps_per_second': 9.019, 'epoch': 49.48}


 50%|█████     | 6100/12100 [1:23:56<29:15,  3.42it/s]    

{'loss': 2.766, 'grad_norm': 13.190642356872559, 'learning_rate': 2.4797520661157024e-05, 'epoch': 50.31}


 51%|█████     | 6200/12100 [1:24:26<29:13,  3.37it/s]

{'loss': 2.7615, 'grad_norm': 13.797409057617188, 'learning_rate': 2.438429752066116e-05, 'epoch': 51.13}


 52%|█████▏    | 6300/12100 [1:24:56<27:15,  3.55it/s]

{'loss': 2.7482, 'grad_norm': 14.228917121887207, 'learning_rate': 2.397107438016529e-05, 'epoch': 51.96}


 53%|█████▎    | 6400/12100 [1:25:26<28:08,  3.38it/s]

{'loss': 2.7192, 'grad_norm': 14.130762100219727, 'learning_rate': 2.3557851239669422e-05, 'epoch': 52.78}


 54%|█████▎    | 6500/12100 [1:25:55<25:08,  3.71it/s]

{'loss': 2.7023, 'grad_norm': 14.36883544921875, 'learning_rate': 2.3144628099173554e-05, 'epoch': 53.61}


                                                      
 54%|█████▎    | 6500/12100 [1:30:00<25:08,  3.71it/s]

{'eval_loss': 6.756079196929932, 'eval_runtime': 244.8559, 'eval_samples_per_second': 72.32, 'eval_steps_per_second': 9.042, 'epoch': 53.61}


 55%|█████▍    | 6600/12100 [1:30:29<25:05,  3.65it/s]    

{'loss': 2.7045, 'grad_norm': 14.90459156036377, 'learning_rate': 2.273140495867769e-05, 'epoch': 54.43}


 55%|█████▌    | 6700/12100 [1:31:04<30:15,  2.98it/s]

{'loss': 2.6768, 'grad_norm': 13.32442855834961, 'learning_rate': 2.231818181818182e-05, 'epoch': 55.26}


 56%|█████▌    | 6800/12100 [1:31:34<28:51,  3.06it/s]

{'loss': 2.659, 'grad_norm': 15.18667221069336, 'learning_rate': 2.190495867768595e-05, 'epoch': 56.08}


 57%|█████▋    | 6900/12100 [1:32:06<27:23,  3.16it/s]

{'loss': 2.639, 'grad_norm': 14.812850952148438, 'learning_rate': 2.1491735537190083e-05, 'epoch': 56.91}


 58%|█████▊    | 7000/12100 [1:32:41<25:36,  3.32it/s]

{'loss': 2.6332, 'grad_norm': 15.713417053222656, 'learning_rate': 2.1078512396694218e-05, 'epoch': 57.73}


                                                      
 58%|█████▊    | 7000/12100 [1:36:55<25:36,  3.32it/s]

{'eval_loss': 6.772141456604004, 'eval_runtime': 254.0101, 'eval_samples_per_second': 69.714, 'eval_steps_per_second': 8.716, 'epoch': 57.73}


 59%|█████▊    | 7100/12100 [1:37:29<28:36,  2.91it/s]    

{'loss': 2.6229, 'grad_norm': 15.490211486816406, 'learning_rate': 2.066528925619835e-05, 'epoch': 58.56}


 60%|█████▉    | 7200/12100 [1:38:02<24:57,  3.27it/s]

{'loss': 2.5912, 'grad_norm': 14.377694129943848, 'learning_rate': 2.025206611570248e-05, 'epoch': 59.38}


 60%|██████    | 7300/12100 [1:38:34<29:55,  2.67it/s]

{'loss': 2.6226, 'grad_norm': 15.346203804016113, 'learning_rate': 1.984297520661157e-05, 'epoch': 60.21}


 61%|██████    | 7400/12100 [1:39:08<22:43,  3.45it/s]

{'loss': 2.5829, 'grad_norm': 15.62594223022461, 'learning_rate': 1.9429752066115702e-05, 'epoch': 61.03}


 62%|██████▏   | 7500/12100 [1:39:41<25:43,  2.98it/s]

{'loss': 2.5681, 'grad_norm': 15.886835098266602, 'learning_rate': 1.9016528925619837e-05, 'epoch': 61.86}


                                                      
 62%|██████▏   | 7500/12100 [1:44:05<25:43,  2.98it/s]

{'eval_loss': 6.789973258972168, 'eval_runtime': 263.6608, 'eval_samples_per_second': 67.162, 'eval_steps_per_second': 8.397, 'epoch': 61.86}


 63%|██████▎   | 7600/12100 [1:44:36<21:25,  3.50it/s]    

{'loss': 2.555, 'grad_norm': 15.317134857177734, 'learning_rate': 1.860330578512397e-05, 'epoch': 62.68}


 64%|██████▎   | 7700/12100 [1:45:09<24:14,  3.03it/s]

{'loss': 2.5558, 'grad_norm': 14.709211349487305, 'learning_rate': 1.81900826446281e-05, 'epoch': 63.51}


 64%|██████▍   | 7800/12100 [1:45:40<23:06,  3.10it/s]

{'loss': 2.5143, 'grad_norm': 14.96395492553711, 'learning_rate': 1.777685950413223e-05, 'epoch': 64.33}


 65%|██████▌   | 7900/12100 [1:46:12<20:46,  3.37it/s]

{'loss': 2.51, 'grad_norm': 15.254074096679688, 'learning_rate': 1.7363636363636366e-05, 'epoch': 65.15}


 66%|██████▌   | 8000/12100 [1:46:44<23:09,  2.95it/s]

{'loss': 2.5304, 'grad_norm': 14.901637077331543, 'learning_rate': 1.6950413223140495e-05, 'epoch': 65.98}


                                                      
 66%|██████▌   | 8000/12100 [1:51:02<23:09,  2.95it/s]

{'eval_loss': 6.7808837890625, 'eval_runtime': 258.8076, 'eval_samples_per_second': 68.421, 'eval_steps_per_second': 8.555, 'epoch': 65.98}


 67%|██████▋   | 8100/12100 [1:51:36<20:00,  3.33it/s]   

{'loss': 2.4925, 'grad_norm': 15.152372360229492, 'learning_rate': 1.653719008264463e-05, 'epoch': 66.8}


 68%|██████▊   | 8200/12100 [1:52:05<20:03,  3.24it/s]

{'loss': 2.5067, 'grad_norm': 16.16629409790039, 'learning_rate': 1.612396694214876e-05, 'epoch': 67.63}


 69%|██████▊   | 8300/12100 [1:52:36<17:59,  3.52it/s]

{'loss': 2.4814, 'grad_norm': 14.660419464111328, 'learning_rate': 1.5710743801652896e-05, 'epoch': 68.45}


 69%|██████▉   | 8400/12100 [1:53:06<18:42,  3.30it/s]

{'loss': 2.4508, 'grad_norm': 15.333077430725098, 'learning_rate': 1.5297520661157024e-05, 'epoch': 69.28}


 70%|███████   | 8500/12100 [1:53:35<16:40,  3.60it/s]

{'loss': 2.4726, 'grad_norm': 14.836492538452148, 'learning_rate': 1.4884297520661159e-05, 'epoch': 70.1}


                                                      
 70%|███████   | 8500/12100 [1:57:55<16:40,  3.60it/s]

{'eval_loss': 6.8048529624938965, 'eval_runtime': 260.3177, 'eval_samples_per_second': 68.025, 'eval_steps_per_second': 8.505, 'epoch': 70.1}


 71%|███████   | 8600/12100 [1:58:25<20:12,  2.89it/s]   

{'loss': 2.4523, 'grad_norm': 15.271116256713867, 'learning_rate': 1.447107438016529e-05, 'epoch': 70.93}


 72%|███████▏  | 8700/12100 [1:58:55<16:18,  3.47it/s]

{'loss': 2.4367, 'grad_norm': 15.634391784667969, 'learning_rate': 1.4057851239669423e-05, 'epoch': 71.75}


 73%|███████▎  | 8800/12100 [1:59:24<16:18,  3.37it/s]

{'loss': 2.4278, 'grad_norm': 15.47030258178711, 'learning_rate': 1.3644628099173553e-05, 'epoch': 72.58}


 74%|███████▎  | 8900/12100 [1:59:54<15:27,  3.45it/s]

{'loss': 2.4372, 'grad_norm': 15.563417434692383, 'learning_rate': 1.3231404958677688e-05, 'epoch': 73.4}


 74%|███████▍  | 9000/12100 [2:00:22<14:39,  3.53it/s]

{'loss': 2.4062, 'grad_norm': 16.24469757080078, 'learning_rate': 1.2818181818181818e-05, 'epoch': 74.23}


                                                      
 74%|███████▍  | 9000/12100 [2:04:43<14:39,  3.53it/s]

{'eval_loss': 6.8253936767578125, 'eval_runtime': 260.0948, 'eval_samples_per_second': 68.083, 'eval_steps_per_second': 8.512, 'epoch': 74.23}


 75%|███████▌  | 9100/12100 [2:05:13<14:07,  3.54it/s]   

{'loss': 2.4083, 'grad_norm': 13.833292961120605, 'learning_rate': 1.2404958677685952e-05, 'epoch': 75.05}


 76%|███████▌  | 9200/12100 [2:05:42<13:40,  3.54it/s]

{'loss': 2.3966, 'grad_norm': 14.789444923400879, 'learning_rate': 1.1991735537190084e-05, 'epoch': 75.88}


 77%|███████▋  | 9300/12100 [2:06:12<13:45,  3.39it/s]

{'loss': 2.3922, 'grad_norm': 15.984077453613281, 'learning_rate': 1.1578512396694215e-05, 'epoch': 76.7}


 78%|███████▊  | 9400/12100 [2:06:41<13:06,  3.43it/s]

{'loss': 2.3848, 'grad_norm': 15.620939254760742, 'learning_rate': 1.1165289256198348e-05, 'epoch': 77.53}


 79%|███████▊  | 9500/12100 [2:07:10<12:19,  3.51it/s]

{'loss': 2.3858, 'grad_norm': 15.602383613586426, 'learning_rate': 1.075206611570248e-05, 'epoch': 78.35}


                                                      
 79%|███████▊  | 9500/12100 [2:11:30<12:19,  3.51it/s]

{'eval_loss': 6.81731653213501, 'eval_runtime': 260.1879, 'eval_samples_per_second': 68.059, 'eval_steps_per_second': 8.509, 'epoch': 78.35}


 79%|███████▉  | 9600/12100 [2:12:00<11:54,  3.50it/s]   

{'loss': 2.3808, 'grad_norm': 15.339333534240723, 'learning_rate': 1.0338842975206613e-05, 'epoch': 79.18}


 80%|████████  | 9700/12100 [2:12:29<11:25,  3.50it/s]

{'loss': 2.3416, 'grad_norm': 16.559043884277344, 'learning_rate': 9.925619834710745e-06, 'epoch': 80.0}


 81%|████████  | 9800/12100 [2:12:59<11:20,  3.38it/s]

{'loss': 2.357, 'grad_norm': 15.714468002319336, 'learning_rate': 9.512396694214878e-06, 'epoch': 80.82}


 82%|████████▏ | 9900/12100 [2:13:28<11:42,  3.13it/s]

{'loss': 2.3362, 'grad_norm': 16.044240951538086, 'learning_rate': 9.09917355371901e-06, 'epoch': 81.65}


 83%|████████▎ | 10000/12100 [2:13:58<10:02,  3.49it/s]

{'loss': 2.3498, 'grad_norm': 16.537641525268555, 'learning_rate': 8.685950413223142e-06, 'epoch': 82.47}


                                                       
 83%|████████▎ | 10000/12100 [2:18:18<10:02,  3.49it/s]

{'eval_loss': 6.829772472381592, 'eval_runtime': 260.2676, 'eval_samples_per_second': 68.038, 'eval_steps_per_second': 8.507, 'epoch': 82.47}


 83%|████████▎ | 10100/12100 [2:18:48<09:40,  3.44it/s]   

{'loss': 2.345, 'grad_norm': 15.380019187927246, 'learning_rate': 8.272727272727274e-06, 'epoch': 83.3}


 84%|████████▍ | 10200/12100 [2:19:17<09:17,  3.41it/s]

{'loss': 2.3241, 'grad_norm': 16.44738006591797, 'learning_rate': 7.859504132231405e-06, 'epoch': 84.12}


 85%|████████▌ | 10300/12100 [2:19:47<08:58,  3.34it/s]

{'loss': 2.3278, 'grad_norm': 15.550043106079102, 'learning_rate': 7.446280991735538e-06, 'epoch': 84.95}


 86%|████████▌ | 10400/12100 [2:20:16<08:05,  3.50it/s]

{'loss': 2.3163, 'grad_norm': 16.36534309387207, 'learning_rate': 7.033057851239671e-06, 'epoch': 85.77}


 87%|████████▋ | 10500/12100 [2:20:45<07:47,  3.42it/s]

{'loss': 2.313, 'grad_norm': 16.257549285888672, 'learning_rate': 6.619834710743802e-06, 'epoch': 86.6}


                                                       
 87%|████████▋ | 10500/12100 [2:25:05<07:47,  3.42it/s]

{'eval_loss': 6.851263999938965, 'eval_runtime': 260.3414, 'eval_samples_per_second': 68.018, 'eval_steps_per_second': 8.504, 'epoch': 86.6}


 88%|████████▊ | 10600/12100 [2:25:36<06:59,  3.58it/s]   

{'loss': 2.307, 'grad_norm': 17.11347007751465, 'learning_rate': 6.206611570247934e-06, 'epoch': 87.42}


 88%|████████▊ | 10700/12100 [2:26:05<06:46,  3.45it/s]

{'loss': 2.3075, 'grad_norm': 15.412469863891602, 'learning_rate': 5.793388429752066e-06, 'epoch': 88.25}


 89%|████████▉ | 10800/12100 [2:26:35<06:10,  3.51it/s]

{'loss': 2.3038, 'grad_norm': 15.852795600891113, 'learning_rate': 5.380165289256198e-06, 'epoch': 89.07}


 90%|█████████ | 10900/12100 [2:27:09<07:09,  2.79it/s]

{'loss': 2.3048, 'grad_norm': 15.737333297729492, 'learning_rate': 4.9669421487603305e-06, 'epoch': 89.9}


 91%|█████████ | 11000/12100 [2:27:42<05:27,  3.36it/s]

{'loss': 2.2888, 'grad_norm': 16.05455780029297, 'learning_rate': 4.553719008264463e-06, 'epoch': 90.72}


                                                       
 91%|█████████ | 11000/12100 [2:32:14<05:27,  3.36it/s]

{'eval_loss': 6.84592866897583, 'eval_runtime': 271.8607, 'eval_samples_per_second': 65.136, 'eval_steps_per_second': 8.144, 'epoch': 90.72}


 92%|█████████▏| 11100/12100 [2:32:46<04:47,  3.48it/s]   

{'loss': 2.2843, 'grad_norm': 16.33783531188965, 'learning_rate': 4.140495867768595e-06, 'epoch': 91.55}


 93%|█████████▎| 11200/12100 [2:33:15<04:20,  3.46it/s]

{'loss': 2.2904, 'grad_norm': 16.69676399230957, 'learning_rate': 3.727272727272727e-06, 'epoch': 92.37}


 93%|█████████▎| 11300/12100 [2:33:45<04:26,  3.00it/s]

{'loss': 2.2689, 'grad_norm': 16.69298553466797, 'learning_rate': 3.3181818181818183e-06, 'epoch': 93.2}


 94%|█████████▍| 11400/12100 [2:34:14<03:11,  3.65it/s]

{'loss': 2.2924, 'grad_norm': 16.267929077148438, 'learning_rate': 2.90495867768595e-06, 'epoch': 94.02}


 95%|█████████▌| 11500/12100 [2:34:43<02:43,  3.67it/s]

{'loss': 2.2822, 'grad_norm': 16.490894317626953, 'learning_rate': 2.4917355371900825e-06, 'epoch': 94.85}


                                                       
 95%|█████████▌| 11500/12100 [2:39:00<02:43,  3.67it/s]

{'eval_loss': 6.851628303527832, 'eval_runtime': 257.0795, 'eval_samples_per_second': 68.881, 'eval_steps_per_second': 8.612, 'epoch': 94.85}


 96%|█████████▌| 11600/12100 [2:39:32<02:29,  3.35it/s]   

{'loss': 2.2666, 'grad_norm': 16.766637802124023, 'learning_rate': 2.0785123966942152e-06, 'epoch': 95.67}


 97%|█████████▋| 11700/12100 [2:40:03<01:54,  3.49it/s]

{'loss': 2.2754, 'grad_norm': 15.42139720916748, 'learning_rate': 1.6652892561983473e-06, 'epoch': 96.49}


 98%|█████████▊| 11800/12100 [2:40:32<01:23,  3.58it/s]

{'loss': 2.2563, 'grad_norm': 16.441789627075195, 'learning_rate': 1.2520661157024794e-06, 'epoch': 97.32}


 98%|█████████▊| 11900/12100 [2:41:00<00:57,  3.48it/s]

{'loss': 2.2836, 'grad_norm': 16.650550842285156, 'learning_rate': 8.388429752066116e-07, 'epoch': 98.14}


 99%|█████████▉| 12000/12100 [2:41:29<00:29,  3.44it/s]

{'loss': 2.2776, 'grad_norm': 15.606143951416016, 'learning_rate': 4.2561983471074387e-07, 'epoch': 98.97}


                                                       
 99%|█████████▉| 12000/12100 [2:45:46<00:29,  3.44it/s]

{'eval_loss': 6.856013298034668, 'eval_runtime': 257.7867, 'eval_samples_per_second': 68.692, 'eval_steps_per_second': 8.588, 'epoch': 98.97}


100%|██████████| 12100/12100 [2:46:15<00:00,  3.61it/s]  

{'loss': 2.2557, 'grad_norm': 14.04565715789795, 'learning_rate': 1.2396694214876035e-08, 'epoch': 99.79}


100%|██████████| 12100/12100 [2:46:16<00:00,  1.21it/s]


{'train_runtime': 9976.7676, 'train_samples_per_second': 4.861, 'train_steps_per_second': 1.213, 'train_loss': 3.1137027835057784, 'epoch': 99.79}


('./fine_tuned_t5\\tokenizer_config.json',
 './fine_tuned_t5\\special_tokens_map.json',
 './fine_tuned_t5\\spiece.model',
 './fine_tuned_t5\\added_tokens.json')

In [18]:
train_chunks

['180 Manuel Portela Software Takes Command accurately revisi ts and summarizes the history of human -computer interaction, while analyzing these interactions as they are embodied in our current software. The work develops its own critical language and provides a model for analyzing computer applications, theorizing t hem as formal components that are determinant in the creation of media and in the transcoding of cultural practices that incorporate media software. The intelligence and breadth of Manovich’s approach makes this book relevant for all those who, in the fiel ds of computer science, art, design, history and theory of new media, and related disciplines, wish to understand the multiple forms and structures of interaction and manipulation encoded in the software we use in many of our creative and communicative pra ctices. Manovich beautifully synthesizes a significant part of his work as programmer, designer and digital animator, media artist, researcher and professor, helping 

In [25]:
# Import necessary libraries
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration

# 1. Load the trained model and tokenizer
model_path = "./fine_tuned_t5"
tokenizer = T5Tokenizer.from_pretrained(model_path)
model = T5ForConditionalGeneration.from_pretrained(model_path)

# 2. Move the model to the appropriate device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# 3. Prepare the input text for inference
input_text = "Jaka strategia ta kierowała Warholem już w wywiadzie z 1963 roku?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)

# 4. Generate predictions with appropriate generation parameters
try:
    outputs = model.generate(
        input_ids,
        max_length=512,       # Maximum length of the generated text
        num_return_sequences=1,
        temperature=0.9,      # Adjusts randomness (increase for more diverse output)
        top_k=50,             # Top-k sampling to limit possible tokens
        top_p=0.95,           # Nucleus sampling for balanced generation
        num_beams=5,          # Use beam search for coherent results
        early_stopping=True,
        no_repeat_ngram_size=2  # Prevents repeating n-grams for more coherent output
    )

    # 5. Decode and print the generated output
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print("Generated Output:", generated_text)

except Exception as e:
    print("Error during generation:", str(e))


Generated Output: a w roku z gldaszt ia. W k o naakiej si cym zasy, którzym dody czytelazu Muzeumaem. Krzto tywyka wana dobdzie najwaanianie ej rozbrze – uzowa byaowanizuca sazkadzia latwartuki. Jegosza ju kiednystawórzuje si
