In [1]:
import pandas as pd
import torch
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments

# Load the sentiment-response dataset
df = pd.read_excel('result.xlsx')
sentiments = df['sentiment'].tolist()
responses = df['response'].tolist()

# Initialize the BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(responses))

  from .autonotebook import tqdm as notebook_tqdm
Downloading (…)solve/main/vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 488kB/s]
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Downloading (…)okenizer_config.json: 100%|██████████| 28.0/28.0 [00:00<00:00, 1.28kB/s]
Downloading (…)lve/main/config.json: 100%|██████████| 570/570 [00:00<00:00, 27.3kB/s]
Downloading pytorch_model.bin: 100%|██████████| 440M/440M [04:16<00:00, 1.71MB/s] 
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense

In [None]:


# Preprocess the data using the tokenizer
inputs = tokenizer(sentiments, padding=True, truncation=True, max_length=512, return_tensors='pt')
labels = torch.tensor([responses.index(r) for r in responses])

# Define the training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=32,
    learning_rate=5e-5,
    evaluation_strategy='epoch',
    save_strategy='epoch',
    save_total_limit=5,
    load_best_model_at_end=True,
    metric_for_best_model='eval_loss',
    greater_is_better=False
)

# Define the trainer object
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=(inputs, labels)
)

# Fine-tune the language model
trainer.train()

# Save the fine-tuned model
trainer.save_model('./fine-tuned-bert')

# Generate email responses using the fine-tuned model
model = BertForSequenceClassification.from_pretrained('./fine-tuned-bert')
model.eval()

sentiment = "Positive"
input_ids = tokenizer.encode(sentiment, return_tensors='pt')
output = model(input_ids=input_ids)[0]
response_id = torch.argmax(output).item()
response = responses[response_id]

