In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TrainingArguments, Trainer
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
import torch

In [2]:
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")

In [3]:
# encode the new user input, add the eos_token and return a tensor in Pytorch
new_user_input_ids = tokenizer.encode(input(">> User:") + tokenizer.eos_token, return_tensors='pt')

# append the new user input tokens to the chat history
# bot_input_ids = torch.cat([chat_history_ids, new_user_input_ids], dim=-1) if step > 0 else new_user_input_ids
bot_input_ids = new_user_input_ids

# generated a response while limiting the total chat history to 1000 tokens, 
chat_history_ids = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id)

# pretty print last ouput tokens from bot
print("DialoGPT: {}".format(tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)))

>> User:hello, how are you ?
DialoGPT: Hello, how are you?


In [3]:
df_train = pd.read_csv("Data/empatheticdialogues/train.csv", on_bad_lines='skip')

In [4]:
print("Shape", df_train.shape)
print("Columns", df_train.columns)

Shape (76668, 8)
Columns Index(['conv_id', 'utterance_idx', 'context', 'prompt', 'speaker_idx',
       'utterance', 'selfeval', 'tags'],
      dtype='object')


In [5]:
df_train["prompt"].iloc[0]

'I remember going to the fireworks with my best friend. There was a lot of people_comma_ but it only felt like us in the world.'

In [6]:
df_train["utterance"].iloc[0]

'I remember going to see the fireworks with my best friend. It was the first time we ever spent time alone together. Although there was a lot of people_comma_ we felt like the only people in the world.'

In [7]:
n_context = 3

columns_name = ["response"] + ["context"+str(i) for i in range(n_context)]

save_dict_list = []
for conv_id, df_conv in df_train.groupby("conv_id"):
    
    total_iter = df_conv.shape[0] - n_context
    
    if total_iter > 0:
    
        all_conversation_list = df_conv["utterance"].tolist()

        for i in range(total_iter):
            conversation_list = all_conversation_list[(i):(i+n_context)]
            response_list = [all_conversation_list[i+n_context]]

            my_dict = dict(zip(columns_name, response_list + conversation_list))
            save_dict_list.append(my_dict)
            
df_conv_tune = pd.DataFrame(save_dict_list)

In [8]:
print("Shape", df_conv_tune.shape)
print("Columns", df_conv_tune.columns)

Shape (23281, 4)
Columns Index(['response', 'context0', 'context1', 'context2'], dtype='object')


In [26]:
tokenizer_tc = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
model_tc = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased")

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier

In [46]:
inputs = tokenizer_tc("What are you saying ? I do not Understand", return_tensors="pt")
with torch.no_grad():
    logits = model_tc(**inputs).logits

predicted_class_id = logits.argmax().item()
model_tc.config.id2label[predicted_class_id]

'LABEL_1'

In [47]:
logits

tensor([[0.0308, 0.0556]])

In [42]:
output = model_tc(**inputs)

In [43]:
output

SequenceClassifierOutput(loss=None, logits=tensor([[0.1044, 0.0261]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)