In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!wget https://storage.googleapis.com/ai2-mosaic-public/projects/mosaic-kgs/comet-atomic_2020_BART.zip

--2024-11-19 15:12:42--  https://storage.googleapis.com/ai2-mosaic-public/projects/mosaic-kgs/comet-atomic_2020_BART.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 142.250.4.207, 172.253.118.207, 74.125.200.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|142.250.4.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1507095346 (1.4G) [application/zip]
Saving to: ‘comet-atomic_2020_BART.zip’


2024-11-19 15:13:54 (20.2 MB/s) - ‘comet-atomic_2020_BART.zip’ saved [1507095346/1507095346]



In [3]:
!unzip comet-atomic_2020_BART.zip -d comet-atomic_2020_BART

Archive:  comet-atomic_2020_BART.zip
   creating: comet-atomic_2020_BART/comet-atomic_2020_BART/
  inflating: comet-atomic_2020_BART/comet-atomic_2020_BART/added_tokens.json  
  inflating: comet-atomic_2020_BART/comet-atomic_2020_BART/.DS_Store  
  inflating: comet-atomic_2020_BART/__MACOSX/comet-atomic_2020_BART/._.DS_Store  
  inflating: comet-atomic_2020_BART/comet-atomic_2020_BART/tokenizer_config.json  
  inflating: comet-atomic_2020_BART/comet-atomic_2020_BART/special_tokens_map.json  
  inflating: comet-atomic_2020_BART/comet-atomic_2020_BART/config.json  
  inflating: comet-atomic_2020_BART/comet-atomic_2020_BART/.added_tokens.json.swp  
  inflating: comet-atomic_2020_BART/comet-atomic_2020_BART/merges.txt  
  inflating: comet-atomic_2020_BART/comet-atomic_2020_BART/pytorch_model.bin  
  inflating: comet-atomic_2020_BART/comet-atomic_2020_BART/vocab.json  


In [4]:
!git lfs install

Git LFS initialized.


In [5]:
!git clone https://huggingface.co/facebook/blenderbot_small-90M

Cloning into 'blenderbot_small-90M'...
remote: Enumerating objects: 35, done.[K
remote: Counting objects: 100% (14/14), done.[K
remote: Compressing objects: 100% (10/10), done.[K
remote: Total 35 (delta 8), reused 4 (delta 4), pack-reused 21 (from 1)[K
Unpacking objects: 100% (35/35), 761.70 KiB | 1.58 MiB/s, done.
Filtering content: 100% (3/3), 1002.36 MiB | 45.45 MiB/s, done.


In [7]:

import json
with open('/content/ESConv.json', 'r',encoding='utf-8') as f:
    esconv_data = json.load(f)
esconv_data[0]

{'experience_type': 'Previous Experience',
 'emotion_type': 'anxiety',
 'problem_type': 'job crisis',
 'situation': 'I hate my job but I am scared to quit and seek a new career.',
 'survey_score': {'seeker': {'initial_emotion_intensity': '5',
   'empathy': '5',
   'relevance': '5',
   'final_emotion_intensity': '1'},
  'supporter': {'relevance': '5'}},
 'dialog': [{'speaker': 'seeker', 'annotation': {}, 'content': 'Hello\n'},
  {'speaker': 'supporter',
   'annotation': {'strategy': 'Question'},
   'content': 'Hello, what would you like to talk about?'},
  {'speaker': 'seeker',
   'annotation': {},
   'content': 'I am having a lot of anxiety about quitting my current job. It is too stressful but pays well\n'},
  {'speaker': 'supporter',
   'annotation': {'strategy': 'Question'},
   'content': 'What makes your job stressful for you?'},
  {'speaker': 'seeker',
   'annotation': {'feedback': '5'},
   'content': 'I have to deal with many people in hard financial situations and it is upsettin

In [8]:
import json
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from tqdm import tqdm

def use_task_specific_params(model, task):
    task_specific_params = {
        "summarization": {
            "max_length": 512,
            "min_length": 50,
            "num_beams": 5,
            "length_penalty": 2.0,
            "early_stopping": True,
        },
    }
    model.config.update(task_specific_params[task])

def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i: i + n]

class Comet:
    def __init__(self, model_path):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model = AutoModelForSeq2SeqLM.from_pretrained(model_path).to(self.device)
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        task = "summarization"
        use_task_specific_params(self.model, task)
        self.batch_size = 1
        self.decoder_start_token_id = None

    def generate(self, queries, decode_method="beam", num_generate=5):
        with torch.no_grad():
            examples = queries
            decs = []
            for batch in list(chunks(examples, self.batch_size)):
                batch = self.tokenizer(batch, return_tensors="pt", truncation=True, padding="max_length").to(self.device)
                input_ids = batch['input_ids']
                attention_mask = batch['attention_mask']

                # Generate outputs from the model
                summaries = self.model.generate(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    decoder_start_token_id=self.decoder_start_token_id,
                    num_beams=num_generate,
                    num_return_sequences=num_generate,
                )

                # Decode the output
                dec = self.tokenizer.batch_decode(summaries, skip_special_tokens=True, clean_up_tokenization_spaces=False)
                decs.append(dec)
            return decs

    def get_encoder_hidden_states(self, queries):
        """Extract hidden states from the encoder's last layer"""
        with torch.no_grad():
            batch = self.tokenizer(queries, return_tensors="pt", truncation=True, padding="max_length").to(self.device)
            input_ids = batch['input_ids']
            attention_mask = batch['attention_mask']

            # Extract the hidden states from the encoder
            # Access the encoder through self.model.model.encoder for BartForConditionalGeneration
            outputs = self.model.model.encoder(input_ids=input_ids, attention_mask=attention_mask, return_dict=True)
            hidden_states = outputs.last_hidden_state
            return hidden_states

# Define relations
intra_relations = ["xReact", "xEffect", "xWant"]
inter_relations = ["oReact", "oEffect", "oWant"]

def run():
    # Load dataset (e.g., esconv_data)
    with open('/content/ESConv.json', 'r',encoding='utf-8') as f:
        esconv_data = json.load(f)

    print("model loading ...")
    comet = Comet("/content/comet-atomic_2020_BART/comet-atomic_2020_BART")
    comet.model.zero_grad()
    print("model loaded")
    K_EC_intra = []
    # Iterate over the data and generate COMET inputs
    for conversation in tqdm(esconv_data[:5]):
        situation = conversation['situation']
        seeker_utterances = []
        supporter_utterances = []
        for utt in conversation['dialog']:
            if utt['speaker']=='seeker':
                seeker_utterances.append(utt['content'])
            else:
                supporter_utterances.append(utt['content'])
        all_hidden = []
        # Loop through seeker's utterances
        for utterance in seeker_utterances:
            for rel in intra_relations:
                # Create the query: Concatenate utterance with relation
                query = f"{utterance} [MASK] {rel}"
                print(f"Query: {query}")

                # Generate descriptions using COMET for the current relation
                results = comet.generate([query], decode_method="beam", num_generate=1)
                print(f"Results for {rel}: {results}")

                # Get hidden states for causal representations
                hidden_states = comet.get_encoder_hidden_states([query])
                print(f"Hidden States for {rel}: {hidden_states.shape}")
                all_hidden.append(hidden_states)
        K_EC_intra.append(hidden_states)
    #print(K_EC_intra)
    return K_EC_intra
if __name__ == "__main__":
    K_EC_intra = run()
    print(K_EC_intra)

model loading ...
model loaded


  0%|          | 0/5 [00:00<?, ?it/s]

Query: Hello
 [MASK] xReact




Results for xReact: [[' happy to see them.                                            ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: Hello
 [MASK] xEffect
Results for xEffect: [[' none at all.                                             ']]
Hidden States for xEffect: torch.Size([1, 1024, 1024])
Query: Hello
 [MASK] xWant
Results for xWant: [[' to greet the person who called them.                                         ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: I am having a lot of anxiety about quitting my current job. It is too stressful but pays well
 [MASK] xReact
Results for xReact: [[' nervous about the future.                                            ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: I am having a lot of anxiety about quitting my current job. It is too stressful but pays well
 [MASK] xEffect
Results for xEffect: [[' get a new job. It is too stressful but pays well for the company.                            

 20%|██        | 1/5 [00:32<02:09, 32.49s/it]

Results for xWant: [[' to go home and sleep.Bye bye.                                       ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: hello im looking for someone to talk to  [MASK] xReact
Results for xReact: [[' happy to have found someone to talk to.                                        ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: hello im looking for someone to talk to  [MASK] xEffect
Results for xEffect: [[' none at all.                                             ']]
Hidden States for xEffect: torch.Size([1, 1024, 1024])
Query: hello im looking for someone to talk to  [MASK] xWant
Results for xWant: [[' to talk to someone else.                                           ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: im fine how are you
 [MASK] xReact
Results for xReact: [[' happy to see you.  happy to see you.  happy to see you.  happy to see you.  happy to see you.  happy to see you.  happy to see you.  happy to see you. 

 40%|████      | 2/5 [02:03<03:20, 66.97s/it]

Results for xWant: [[' to go home and sleep.                                           ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: Hello [MASK] xReact
Results for xReact: [[' happy to see them.                                            ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: Hello [MASK] xEffect
Results for xEffect: [[' none at all.                                             ']]
Hidden States for xEffect: torch.Size([1, 1024, 1024])
Query: Hello [MASK] xWant
Results for xWant: [[' to talk to them.                                            ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: I'm concerned about my job. I have been out of work for the past 5 weeks because I tested positive for COVID-19. [MASK] xReact
Results for xReact: [[' worried about their job security.                                           ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: I'm concerned about my job. I have been out of work

 60%|██████    | 3/5 [02:51<01:56, 58.27s/it]

Results for xWant: [[' to go home and relax.  Have a great holiday.                                     ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: I am dong good. You?
 [MASK] xReact
Results for xReact: [[' good about themselves.                                             ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: I am dong good. You?
 [MASK] xEffect
Results for xEffect: [[' none at all.                                             ']]
Hidden States for xEffect: torch.Size([1, 1024, 1024])
Query: I am dong good. You?
 [MASK] xWant
Results for xWant: [[' to be a good person.                                           ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: I have been staying home since last March due to COVID; getting very depressed. [MASK] xReact
Results for xReact: [[' depressed.                                               ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: I have been staying home since

 80%|████████  | 4/5 [03:21<00:47, 47.11s/it]

Results for xWant: [[' to go home and sleep.Bye bye.                                       ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: Infinitely complicated.
 [MASK] xReact
Results for xReact: [[' confused.                                               ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: Infinitely complicated.
 [MASK] xEffect
Results for xEffect: [[' none at all.                                             ']]
Hidden States for xEffect: torch.Size([1, 1024, 1024])
Query: Infinitely complicated.
 [MASK] xWant
Results for xWant: [[' to be a good person.                                           ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: Too many decisions. I don't know what to do.
 [MASK] xReact
Results for xReact: [[" confused.I don't know what to do.I don't know what to do.I don't know what to do.I don't know what to do.I don't know.I don't know.I don't know."]]
Hidden States for xReact: torch.Size([1, 1024, 1024])

100%|██████████| 5/5 [05:28<00:00, 65.75s/it]

Results for xWant: [[' to celebrate with friends and family.                                          ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])





[tensor([[[-5.0703e-03,  1.3541e-02,  3.2393e-03,  ...,  8.2240e-03,
           1.1626e-03, -1.6766e-04],
         [-1.3525e-01, -2.4622e-01, -2.3928e-01,  ..., -6.0200e-02,
           5.3351e-02, -2.6545e-02],
         [-1.4300e-01, -1.1901e-01, -1.7524e-01,  ..., -6.4175e-02,
          -4.5786e-02,  7.1092e-02],
         ...,
         [-1.9616e-01, -3.0659e-01, -3.7300e-01,  ..., -3.2957e-02,
           1.2619e-01,  4.2278e-02],
         [-1.4466e-01, -3.2804e-01, -2.5233e-01,  ..., -4.2753e-02,
           8.8551e-02, -2.8745e-02],
         [-1.4850e-01, -4.1776e-01, -3.1348e-01,  ..., -5.6096e-03,
           8.2875e-02,  7.1741e-02]]], device='cuda:0'), tensor([[[-4.8395e-03,  1.4276e-02,  3.2395e-03,  ...,  8.2632e-03,
           2.5778e-03, -1.2364e-04],
         [ 1.3121e-01, -2.8105e-01,  7.7290e-02,  ..., -1.6483e-01,
           8.8132e-02,  4.3381e-02],
         [-4.6438e-02, -9.5528e-02, -2.7117e-02,  ..., -1.0376e-01,
          -8.0014e-02, -2.0861e-02],
         ...,
      

In [9]:
import json
from tqdm import tqdm

# Import Comet from the previous cell or file where it is defined
# from your_file import Comet  # Assuming your_file contains the Comet class

def run_st_inter():
    # Load dataset (e.g., esconv_data)
    with open('/content/ESConv.json', 'r', encoding='utf-8') as f:
        esconv_data = json.load(f)

    print("Model loading...")
    # Change the model path to the correct location
    comet = Comet("/content/comet-atomic_2020_BART")  # This was the path cloned in input 18
    comet.model.zero_grad()
    print("Model loaded")

    K_EC_inter = []
    K_ES = []


    # Iterate over the data and generate COMET inputs
    for conversation in tqdm(esconv_data[:5]):
        situation = conversation['situation']
        all_situation_hidden = []

        # Generate COMET queries for the situation using each relation
        for rel in inter_relations:
            # Create the query: Concatenate situation with relation
            situation_query = f"{situation} [MASK] {rel}"
            print(f"Situation Query: {situation_query}")

            # Generate descriptions using COMET for the current relation
            situation_results = comet.generate([situation_query], decode_method="beam", num_generate=1)
            print(f"Situation Results for {rel}: {situation_results}")

            # Get hidden states for the situation
            situation_hidden_states = comet.get_encoder_hidden_states([situation_query])
            print(f"Situation Hidden States for {rel}: {situation_hidden_states.shape}")

            all_situation_hidden.append(situation_hidden_states)

        # Process dialog
        seeker_utterances = []
        supporter_utterances = []
        for utt in conversation['dialog']:
            if utt['speaker'] == 'seeker':
                seeker_utterances.append(utt['content'])
            else:
                supporter_utterances.append(utt['content'])

        all_hidden = []

        # Loop through supporter's utterances
        for utterance in supporter_utterances:
            utterance_hidden_states = []

            # Generate COMET queries for each relation
            for rel in inter_relations:
                # Create the query: Concatenate utterance with relation
                query = f"{utterance} [MASK] {rel}"
                print(f"Query: {query}")

                # Generate descriptions using COMET for the current relation
                results = comet.generate([query], decode_method="beam", num_generate=1)
                print(f"Results for {rel}: {results}")

                # Get hidden states for causal representations
                hidden_states = comet.get_encoder_hidden_states([query])
                print(f"Hidden States for {rel}: {hidden_states.shape}")

                utterance_hidden_states.append(hidden_states)

            # Append the hidden states for all relations for the current utterance
            all_hidden.append(utterance_hidden_states)

        # Append the hidden states for each conversation
        K_EC_inter.append(all_hidden)
        K_ES.append(all_situation_hidden)

    print("K_EC_inter:", K_EC_inter)
    print("K_ES:", K_ES)
    return K_EC_inter, K_ES

if __name__ == "__main__":
    K_EC_inter = run()
    print(K_EC_intra)
    K_ES = run()
    print(K_ES)

model loading ...
model loaded


  0%|          | 0/5 [00:00<?, ?it/s]

Query: Hello
 [MASK] xReact
Results for xReact: [[' happy to see them.                                            ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: Hello
 [MASK] xEffect
Results for xEffect: [[' none at all.                                             ']]
Hidden States for xEffect: torch.Size([1, 1024, 1024])
Query: Hello
 [MASK] xWant
Results for xWant: [[' to greet the person who called them.                                         ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: I am having a lot of anxiety about quitting my current job. It is too stressful but pays well
 [MASK] xReact
Results for xReact: [[' nervous about the future.                                            ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: I am having a lot of anxiety about quitting my current job. It is too stressful but pays well
 [MASK] xEffect
Results for xEffect: [[' get a new job. It is too stressful but pays well for the company.

 20%|██        | 1/5 [00:32<02:08, 32.05s/it]

Results for xWant: [[' to go home and sleep.Bye bye.                                       ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: hello im looking for someone to talk to  [MASK] xReact
Results for xReact: [[' happy to have found someone to talk to.                                        ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: hello im looking for someone to talk to  [MASK] xEffect
Results for xEffect: [[' none at all.                                             ']]
Hidden States for xEffect: torch.Size([1, 1024, 1024])
Query: hello im looking for someone to talk to  [MASK] xWant
Results for xWant: [[' to talk to someone else.                                           ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: im fine how are you
 [MASK] xReact
Results for xReact: [[' happy to see you.  happy to see you.  happy to see you.  happy to see you.  happy to see you.  happy to see you.  happy to see you.  happy to see you. 

 40%|████      | 2/5 [02:03<03:20, 66.86s/it]

Results for xWant: [[' to go home and sleep.                                           ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: Hello [MASK] xReact
Results for xReact: [[' happy to see them.                                            ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: Hello [MASK] xEffect
Results for xEffect: [[' none at all.                                             ']]
Hidden States for xEffect: torch.Size([1, 1024, 1024])
Query: Hello [MASK] xWant
Results for xWant: [[' to talk to them.                                            ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: I'm concerned about my job. I have been out of work for the past 5 weeks because I tested positive for COVID-19. [MASK] xReact
Results for xReact: [[' worried about their job security.                                           ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: I'm concerned about my job. I have been out of work

 60%|██████    | 3/5 [02:50<01:56, 58.11s/it]

Results for xWant: [[' to go home and relax.  Have a great holiday.                                     ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: I am dong good. You?
 [MASK] xReact
Results for xReact: [[' good about themselves.                                             ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: I am dong good. You?
 [MASK] xEffect
Results for xEffect: [[' none at all.                                             ']]
Hidden States for xEffect: torch.Size([1, 1024, 1024])
Query: I am dong good. You?
 [MASK] xWant
Results for xWant: [[' to be a good person.                                           ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: I have been staying home since last March due to COVID; getting very depressed. [MASK] xReact
Results for xReact: [[' depressed.                                               ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: I have been staying home since

 80%|████████  | 4/5 [03:20<00:46, 46.83s/it]

Results for xWant: [[' to go home and sleep.Bye bye.                                       ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: Infinitely complicated.
 [MASK] xReact
Results for xReact: [[' confused.                                               ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: Infinitely complicated.
 [MASK] xEffect
Results for xEffect: [[' none at all.                                             ']]
Hidden States for xEffect: torch.Size([1, 1024, 1024])
Query: Infinitely complicated.
 [MASK] xWant
Results for xWant: [[' to be a good person.                                           ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: Too many decisions. I don't know what to do.
 [MASK] xReact
Results for xReact: [[" confused.I don't know what to do.I don't know what to do.I don't know what to do.I don't know what to do.I don't know.I don't know.I don't know."]]
Hidden States for xReact: torch.Size([1, 1024, 1024])

100%|██████████| 5/5 [05:29<00:00, 65.86s/it]

Results for xWant: [[' to celebrate with friends and family.                                          ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
[tensor([[[-5.0703e-03,  1.3541e-02,  3.2393e-03,  ...,  8.2240e-03,
           1.1626e-03, -1.6766e-04],
         [-1.3525e-01, -2.4622e-01, -2.3928e-01,  ..., -6.0200e-02,
           5.3351e-02, -2.6545e-02],
         [-1.4300e-01, -1.1901e-01, -1.7524e-01,  ..., -6.4175e-02,
          -4.5786e-02,  7.1092e-02],
         ...,
         [-1.9616e-01, -3.0659e-01, -3.7300e-01,  ..., -3.2957e-02,
           1.2619e-01,  4.2278e-02],
         [-1.4466e-01, -3.2804e-01, -2.5233e-01,  ..., -4.2753e-02,
           8.8551e-02, -2.8745e-02],
         [-1.4850e-01, -4.1776e-01, -3.1348e-01,  ..., -5.6096e-03,
           8.2875e-02,  7.1741e-02]]], device='cuda:0'), tensor([[[-4.8395e-03,  1.4276e-02,  3.2395e-03,  ...,  8.2632e-03,
           2.5778e-03, -1.2364e-04],
         [ 1.3121e-01, -2.8105e-01,  7.7290e-02,  ..., -1.6483e-01,
   




model loaded


  0%|          | 0/5 [00:00<?, ?it/s]

Query: Hello
 [MASK] xReact
Results for xReact: [[' happy to see them.                                            ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: Hello
 [MASK] xEffect
Results for xEffect: [[' none at all.                                             ']]
Hidden States for xEffect: torch.Size([1, 1024, 1024])
Query: Hello
 [MASK] xWant
Results for xWant: [[' to greet the person who called them.                                         ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: I am having a lot of anxiety about quitting my current job. It is too stressful but pays well
 [MASK] xReact
Results for xReact: [[' nervous about the future.                                            ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: I am having a lot of anxiety about quitting my current job. It is too stressful but pays well
 [MASK] xEffect
Results for xEffect: [[' get a new job. It is too stressful but pays well for the company.

 20%|██        | 1/5 [00:31<02:07, 31.80s/it]

Results for xWant: [[' to go home and sleep.Bye bye.                                       ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: hello im looking for someone to talk to  [MASK] xReact
Results for xReact: [[' happy to have found someone to talk to.                                        ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: hello im looking for someone to talk to  [MASK] xEffect
Results for xEffect: [[' none at all.                                             ']]
Hidden States for xEffect: torch.Size([1, 1024, 1024])
Query: hello im looking for someone to talk to  [MASK] xWant
Results for xWant: [[' to talk to someone else.                                           ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: im fine how are you
 [MASK] xReact
Results for xReact: [[' happy to see you.  happy to see you.  happy to see you.  happy to see you.  happy to see you.  happy to see you.  happy to see you.  happy to see you. 

 40%|████      | 2/5 [02:01<03:16, 65.60s/it]

Results for xWant: [[' to go home and sleep.                                           ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: Hello [MASK] xReact
Results for xReact: [[' happy to see them.                                            ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: Hello [MASK] xEffect
Results for xEffect: [[' none at all.                                             ']]
Hidden States for xEffect: torch.Size([1, 1024, 1024])
Query: Hello [MASK] xWant
Results for xWant: [[' to talk to them.                                            ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: I'm concerned about my job. I have been out of work for the past 5 weeks because I tested positive for COVID-19. [MASK] xReact
Results for xReact: [[' worried about their job security.                                           ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: I'm concerned about my job. I have been out of work

 60%|██████    | 3/5 [02:48<01:54, 57.25s/it]

Results for xWant: [[' to go home and relax.  Have a great holiday.                                     ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: I am dong good. You?
 [MASK] xReact
Results for xReact: [[' good about themselves.                                             ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: I am dong good. You?
 [MASK] xEffect
Results for xEffect: [[' none at all.                                             ']]
Hidden States for xEffect: torch.Size([1, 1024, 1024])
Query: I am dong good. You?
 [MASK] xWant
Results for xWant: [[' to be a good person.                                           ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: I have been staying home since last March due to COVID; getting very depressed. [MASK] xReact
Results for xReact: [[' depressed.                                               ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: I have been staying home since

 80%|████████  | 4/5 [03:18<00:46, 46.43s/it]

Results for xWant: [[' to go home and sleep.Bye bye.                                       ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: Infinitely complicated.
 [MASK] xReact
Results for xReact: [[' confused.                                               ']]
Hidden States for xReact: torch.Size([1, 1024, 1024])
Query: Infinitely complicated.
 [MASK] xEffect
Results for xEffect: [[' none at all.                                             ']]
Hidden States for xEffect: torch.Size([1, 1024, 1024])
Query: Infinitely complicated.
 [MASK] xWant
Results for xWant: [[' to be a good person.                                           ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
Query: Too many decisions. I don't know what to do.
 [MASK] xReact
Results for xReact: [[" confused.I don't know what to do.I don't know what to do.I don't know what to do.I don't know what to do.I don't know.I don't know.I don't know."]]
Hidden States for xReact: torch.Size([1, 1024, 1024])

100%|██████████| 5/5 [05:26<00:00, 65.37s/it]

Results for xWant: [[' to celebrate with friends and family.                                          ']]
Hidden States for xWant: torch.Size([1, 1024, 1024])
[tensor([[[-5.0703e-03,  1.3541e-02,  3.2393e-03,  ...,  8.2240e-03,
           1.1626e-03, -1.6766e-04],
         [-1.3525e-01, -2.4622e-01, -2.3928e-01,  ..., -6.0200e-02,
           5.3351e-02, -2.6545e-02],
         [-1.4300e-01, -1.1901e-01, -1.7524e-01,  ..., -6.4175e-02,
          -4.5786e-02,  7.1092e-02],
         ...,
         [-1.9616e-01, -3.0659e-01, -3.7300e-01,  ..., -3.2957e-02,
           1.2619e-01,  4.2278e-02],
         [-1.4466e-01, -3.2804e-01, -2.5233e-01,  ..., -4.2753e-02,
           8.8551e-02, -2.8745e-02],
         [-1.4850e-01, -4.1776e-01, -3.1348e-01,  ..., -5.6096e-03,
           8.2875e-02,  7.1741e-02]]], device='cuda:0'), tensor([[[-4.8395e-03,  1.4276e-02,  3.2395e-03,  ...,  8.2632e-03,
           2.5778e-03, -1.2364e-04],
         [ 1.3121e-01, -2.8105e-01,  7.7290e-02,  ..., -1.6483e-01,
   




In [10]:
import torch

In [11]:
K_EC_intra = torch.stack(K_EC_intra)
K_EC_inter = torch.stack(K_EC_inter)
K_ES = torch.stack(K_ES)

In [12]:
!pip install transformers
!pip install torch torchvision



In [31]:
import json
import torch
from transformers import BlenderbotTokenizer, BlenderbotModel
from typing import List, Tuple, Dict
import logging
from tqdm import tqdm

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class EmotionalSupportAnalyzer:
    def __init__(
        self,
        model_name: str = 'facebook/blenderbot_small-90M',
        max_length: int = 128,
        batch_size: int = 8
    ):
        """
        Initialize the analyzer with memory-efficient settings.

        Args:
            model_name: Name of the BlenderBot model
            max_length: Maximum sequence length
            batch_size: Size of batches for processing
        """
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.batch_size = batch_size
        self.max_length = max_length

        logger.info(f"Using device: {self.device}")
        logger.info(f"Batch size: {batch_size}")

        try:
            self.tokenizer = BlenderbotTokenizer.from_pretrained(model_name)
            self.model = BlenderbotModel.from_pretrained(model_name).to(self.device)
        except Exception as e:
            logger.error(f"Error loading BlenderBot model: {str(e)}")
            raise

    def load_dataset(self, file_path: str) -> Dict:
        """Load the dataset from JSON file."""
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
            logger.info(f"Loaded dataset with {len(data)} entries")
            return data
        except Exception as e:
            logger.error(f"Error loading dataset: {str(e)}")
            raise

    def preprocess_data(self, data: List[Dict]) -> Tuple[List[str], List[str]]:
        """Extract situations and contexts from the dataset."""
        situations = []
        contexts = []

        for entry in data:
            situation = entry.get('situation', '')
            dialog = entry.get('dialog', [])

            context = " ".join([
                d.get('content', '') for d in dialog
                if isinstance(d, dict) and 'content' in d
            ])

            situations.append(situation)
            contexts.append(context)

        return situations, contexts

    def encode_batch(self, texts: List[str]) -> torch.Tensor:
        """Encode a single batch of texts."""
        encoded_inputs = self.tokenizer(
            texts,
            padding=True,
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        ).to(self.device)
        encoded_inputs['decoder_input_ids'] = encoded_inputs['input_ids']
        self.model.eval()
        with torch.no_grad():
            outputs = self.model(**encoded_inputs)
            # Get the last hidden state for each sequence (mean pooling)
            embeddings = outputs.last_hidden_state.mean(dim=1).cpu()

        # Clear CUDA cache
        torch.cuda.empty_cache()
        return embeddings

    def encode_texts_batched(self, texts: List[str]) -> torch.Tensor:
        """Encode texts in batches to manage memory."""
        all_embeddings = []

        for i in tqdm(range(0, len(texts), self.batch_size), desc="Processing batches"):
            batch_texts = texts[i:i + self.batch_size]
            batch_embeddings = self.encode_batch(batch_texts)
            all_embeddings.append(batch_embeddings)

        return torch.cat(all_embeddings, dim=0)

    def process_conversations(self, file_path: str) -> Tuple[torch.Tensor, torch.Tensor]:
        """Process all conversations with batch processing."""
        # Load and preprocess data
        data = self.load_dataset(file_path)
        situations, contexts = self.preprocess_data(data)

        logger.info("Generating situation embeddings...")
        situation_embeddings = self.encode_texts_batched(situations)

        logger.info("Generating context embeddings...")
        context_embeddings = self.encode_texts_batched(contexts)

        logger.info(f"Final shapes - Situations: {situation_embeddings.shape}, Contexts: {context_embeddings.shape}")
        return situation_embeddings, context_embeddings

def save_embeddings(embeddings: torch.Tensor, file_path: str):
    """Save embeddings to disk."""
    torch.save(embeddings, file_path)
    logger.info(f"Saved embeddings to {file_path}")

def load_embeddings(file_path: str) -> torch.Tensor:
    """Load embeddings from disk."""
    embeddings = torch.load(file_path)
    logger.info(f"Loaded embeddings of shape {embeddings.shape}")
    return embeddings

# Example usage
if __name__ == "__main__":
    # Initialize with smaller batch size
    analyzer = EmotionalSupportAnalyzer(batch_size=8)
    file_path = '/content/ESConv.json'

    try:
        # Process conversations
        situation_embeddings, context_embeddings = analyzer.process_conversations(file_path)

        # Save embeddings
        save_embeddings(situation_embeddings, 'situation_embeddings.pt')
        save_embeddings(context_embeddings, 'context_embeddings.pt')

        logger.info("Processing completed successfully")

    except Exception as e:
        logger.error(f"Processing failed: {str(e)}")


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'BlenderbotSmallTokenizer'. 
The class this function is called from is 'BlenderbotTokenizer'.
You are using a model of type blenderbot-small to instantiate a model of type blenderbot. This is not supported for all configurations of models and can yield errors.
Some weights of BlenderbotModel were not initialized from the model checkpoint at facebook/blenderbot_small-90M and are newly initialized: ['model.decoder.layer_norm.bias', 'model.decoder.layer_norm.weight', 'model.encoder.layer_norm.bias', 'model.encoder.layer_norm.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Processing batches: 100%|██████████| 163/163 [00:07<00:00, 20.58it/s]
Processing batches: 100%|██████████| 163/163 [00:12<00:00, 12.66it/s]


In [33]:
import torch.nn as nn  # Import nn module to define layers
class SelfAttentionEncoder(nn.Module):
    def __init__(self, input_dim):
        super(SelfAttentionEncoder, self).__init__()
        self.hidden_dim = input_dim

        self.query_layer = nn.Linear(input_dim, input_dim)
        self.key_layer = nn.Linear(input_dim, input_dim)
        self.value_layer = nn.Linear(input_dim, input_dim)

    def forward(self, x):
        Q = self.query_layer(x)
        K = self.key_layer(x)
        V = self.value_layer(x)

        attention_scores = torch.bmm(Q, K.transpose(1, 2))
        attention_weights = nn.functional.softmax(attention_scores / (self.hidden_dim ** 0.5), dim=-1)

        context = torch.bmm(attention_weights, V)

        H_q = Q.mean(dim=1)  # Query representation
        H_c = context.mean(dim=1)  # Context representation

        return H_q, H_c

# Initialize the model
input_dim = situation_embeddings.shape[1]  # Hidden dimension from BlenderBot
model = SelfAttentionEncoder(input_dim=input_dim)

# Generate H_q and H_c for all situations and contexts
H_q_list = []
H_c_list = []

for situation_embeds, context_embeds in zip(situation_embeddings, context_embeddings):
    # Stack embeddings to form a single input tensor
    inputs = torch.stack((situation_embeds, context_embeds))

    # Pass through the SelfAttentionEncoder model
    H_q, H_c = model(inputs.unsqueeze(0))  # Add batch dimension

    H_q_list.append(H_q)
    H_c_list.append(H_c)

# Convert lists to tensors
H_q_tensor = torch.cat(H_q_list, dim=0)  # Shape: (num_samples, hidden_dim)
H_c_tensor = torch.cat(H_c_list, dim=0)  # Shape: (num_samples, hidden_dim)

# Print shapes
print("H_q shape:", H_q_tensor.shape)  # Should be (num_samples, hidden_dim)
print("H_c shape:", H_c_tensor.shape)  # Should be (num_samples, hidden_dim)

H_q shape: torch.Size([1300, 512])
H_c shape: torch.Size([1300, 512])


In [34]:
print(H_q_tensor)

tensor([[-1.2556,  0.1665,  0.6485,  ...,  0.3132, -0.6339, -0.1034],
        [-1.2526,  0.1731,  0.6414,  ...,  0.3085, -0.6383, -0.0960],
        [-1.2545,  0.1590,  0.6389,  ...,  0.2962, -0.6295, -0.0847],
        ...,
        [-1.2479,  0.1656,  0.6376,  ...,  0.3039, -0.6455, -0.0728],
        [-1.2428,  0.1747,  0.6367,  ...,  0.3066, -0.6447, -0.0731],
        [-1.2309,  0.1977,  0.6354,  ...,  0.3179, -0.6564, -0.0760]],
       grad_fn=<CatBackward0>)


In [35]:
print(H_c_tensor)

tensor([[ 0.4234,  0.3328, -1.2963,  ...,  0.4730, -0.1505,  1.0797],
        [ 0.4142,  0.3379, -1.2986,  ...,  0.4799, -0.1538,  1.0786],
        [ 0.4155,  0.3170, -1.3054,  ...,  0.4618, -0.1793,  1.0886],
        ...,
        [ 0.4086,  0.3229, -1.3142,  ...,  0.4616, -0.1720,  1.0802],
        [ 0.4083,  0.3314, -1.3172,  ...,  0.4740, -0.1652,  1.0740],
        [ 0.3961,  0.3522, -1.3192,  ...,  0.4947, -0.1424,  1.0629]],
       grad_fn=<CatBackward0>)


In [40]:
import json
import torch
from transformers import BlenderbotSmallTokenizer, BlenderbotSmallForConditionalGeneration
from typing import List, Dict
import logging
from tqdm import tqdm

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class StrategyEncoder:
    def __init__(
        self,
        model_name: str = 'facebook/blenderbot_small-90M',
        max_length: int = 128,
        batch_size: int = 8
    ):
        """Initialize the Strategy Encoder"""
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.batch_size = batch_size
        self.max_length = max_length

        logger.info(f"Using device: {self.device}")

        self.tokenizer = BlenderbotSmallTokenizer.from_pretrained(model_name)
        self.model = BlenderbotSmallForConditionalGeneration.from_pretrained(model_name).to(self.device)

    def extract_strategies(self, dialog: List[Dict]) -> str:
        """Extract strategy sequence from dialog"""
        strategies = []
        for turn in dialog:
            if 'strategy' in turn and turn['strategy']:
                strategy = turn['strategy'].strip()
                if strategy:  # Only add non-empty strategies
                    strategies.append(strategy)
        return " ".join(strategies)

    def get_strategy_sequences(self, data: List[Dict]) -> List[str]:
        """Get all strategy sequences from dataset"""
        strategy_sequences = []

        for entry in data:
            dialog = entry.get('dialog', [])
            if dialog:  # Only process if dialog exists
                strategy_sequence = self.extract_strategies(dialog)
                if strategy_sequence:  # Only add non-empty sequences
                    strategy_sequences.append(strategy_sequence)
                else:
                    # If no strategy found, use a placeholder to maintain alignment
                    strategy_sequences.append("[no_strategy]")

        return strategy_sequences

    def encode_batch(self, texts: List[str]) -> torch.Tensor:
        """Encode a batch of texts"""
        encoded_inputs = self.tokenizer(
            texts,
            padding=True,
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        ).to(self.device)

        self.model.eval()
        with torch.no_grad():
            # Use only the encoder's output
            encoder_outputs = self.model.model.encoder(**encoded_inputs)
            embeddings = encoder_outputs.last_hidden_state.mean(dim=1).cpu()  # Mean pooling over sequence length

        torch.cuda.empty_cache()
        return embeddings

    def encode_strategies_batched(self, texts: List[str]) -> torch.Tensor:
        """Encode all sequences in batches"""
        all_embeddings = []

        for i in tqdm(range(0, len(texts), self.batch_size), desc="Processing strategies"):
            batch_texts = texts[i:i + self.batch_size]
            batch_embeddings = self.encode_batch(batch_texts)
            all_embeddings.append(batch_embeddings)

        return torch.cat(all_embeddings, dim=0)

    def generate_HS(self, file_path: str) -> torch.Tensor:
        """Generate strategy history embeddings (HS)"""
        try:
            # Load dataset
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
            logger.info(f"Loaded dataset with {len(data)} entries")

            # Get strategy sequences
            strategy_sequences = self.get_strategy_sequences(data)
            logger.info(f"Extracted {len(strategy_sequences)} strategy sequences")

            # Generate embeddings
            HS = self.encode_strategies_batched(strategy_sequences)
            logger.info(f"Generated strategy embeddings of shape {HS.shape}")

            return HS

        except Exception as e:
            logger.error(f"Error in generate_HS: {str(e)}")
            raise

def save_embeddings(embeddings: torch.Tensor, file_path: str):
    """Save embeddings to disk"""
    torch.save(embeddings, file_path)
    logger.info(f"Saved embeddings to {file_path}")

# Example usage
if __name__ == "__main__":
    try:
        # Initialize encoder
        encoder = StrategyEncoder(batch_size=8)

        # Generate HS embeddings
        file_path = '/content/ESConv.json'
        HS = encoder.generate_HS(file_path)

        # Save embeddings
        save_embeddings(HS, 'strategy_history_embeddings.pt')

        logger.info("Processing completed successfully")
        print(f"Final HS tensor shape: {HS.shape}")

    except Exception as e:
        logger.error(f"Processing failed: {str(e)}")


Processing strategies: 100%|██████████| 163/163 [00:01<00:00, 119.64it/s]

Final HS tensor shape: torch.Size([1300, 512])





In [41]:
print(HS)

tensor([[ 0.1575, -0.2189,  0.3365,  ..., -0.0077,  0.2383, -0.1733],
        [ 0.1575, -0.2189,  0.3365,  ..., -0.0077,  0.2383, -0.1733],
        [ 0.1575, -0.2189,  0.3365,  ..., -0.0077,  0.2383, -0.1733],
        ...,
        [ 0.1575, -0.2189,  0.3365,  ..., -0.0077,  0.2383, -0.1733],
        [ 0.1575, -0.2189,  0.3365,  ..., -0.0077,  0.2383, -0.1733],
        [ 0.1575, -0.2189,  0.3365,  ..., -0.0077,  0.2383, -0.1733]])


In [43]:
import torch
import torch.nn as nn

class CrossAttentionModule(nn.Module):
    def __init__(self, hidden_dim):
        super(CrossAttentionModule, self).__init__()
        self.cross_attention = nn.MultiheadAttention(embed_dim=hidden_dim, num_heads=8, dropout=0.1)
        self.layer_norm = nn.LayerNorm(hidden_dim)
        self.residual_connection = nn.Identity()

    def forward(self, key, query, context):
        # Cross-attention between key, query, and context
        attn_output, _ = self.cross_attention(query, key, context)

        # Adding residual connection
        attn_output = self.residual_connection(attn_output + query)

        # Layer normalization
        attn_output = self.layer_norm(attn_output)

        return attn_output
class ComprehensiveEffectRepresentation(nn.Module):
    def __init__(self, hidden_dim):
        super(ComprehensiveEffectRepresentation, self).__init__()
        self.hidden_dim = hidden_dim
        self.cross_att_kec = CrossAttentionModule(hidden_dim)
        self.cross_att_kes = CrossAttentionModule(hidden_dim)

        # Linear layer to project concatenated tensor back to hidden_dim
        self.projection = nn.Linear(2 * hidden_dim, hidden_dim)

    def forward(self, KEC_intra, KEC_inter, Hc, Hq):
        # Concatenate intra and inter emotion effects to form KEC
        KEC = torch.cat((KEC_intra, KEC_inter), dim=-1)

        # Project KEC back to hidden_dim
        KEC = self.projection(KEC)

        # Apply cross-attention with cause-aware context
        KEC_hat = self.cross_att_kec(KEC, Hc, Hc)  # Cross-attention for KEC
        KES_hat = self.cross_att_kes(KEC, Hq, Hq)  # Cross-attention for KES

        return KEC_hat, KES_hat


# Example usage
hidden_dim = 512  # Adjust based on your model's requirements
cross_attention_module = ComprehensiveEffectRepresentation(hidden_dim)

# Dummy tensors for demonstration
KEC_intra = torch.randn(10, 32, hidden_dim)  # [sequence length, batch size, hidden dim]
KEC_inter = torch.randn(10, 32, hidden_dim)
Hc = torch.randn(10, 32, hidden_dim)
Hq = torch.randn(10, 32, hidden_dim)
# Forward pass
KEC_hat, KES_hat = cross_attention_module(KEC_intra, KEC_inter, Hc, Hq)


print(KEC_hat.shape, KES_hat.shape)

torch.Size([10, 32, 512]) torch.Size([10, 32, 512])


In [44]:
import torch

def compute_query_vector(Hs, Hc):
    """
    Compute the query vector h by mean-pooling the strategy history and context representation,
    then concatenating the results.

    Args:
        Hs (torch.Tensor): Strategy history tensor of shape [sequence_length, batch_size, hidden_dim].
        Hc (torch.Tensor): Context representation tensor of shape [sequence_length, batch_size, hidden_dim].

    Returns:
        h (torch.Tensor): Query vector of shape [batch_size, 2 * hidden_dim].
    """
    # Mean-pooling over the sequence length dimension
    s = torch.mean(Hs, dim=0)  # Shape: [batch_size, hidden_dim]
    c = torch.mean(Hc, dim=0)  # Shape: [batch_size, hidden_dim]

    # Concatenate the mean-pooled representations
    h = torch.cat((s, c), dim=-1)  # Shape: [batch_size, 2 * hidden_dim]

    return h

# Example usage
Hs = torch.randn(10, 32, 512)  # [sequence_length, batch_size, hidden_dim]
Hc = torch.randn(10, 32, 512)  # [sequence_length, batch_size, hidden_dim]

# Compute the query vector
h = compute_query_vector(Hs, Hc)
print(h.shape)  # Should output: [32, 1024]


torch.Size([32, 1024])


In [45]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SelfAttention(nn.Module):
    def __init__(self, embed_dim, num_heads):
        super(SelfAttention, self).__init__()
        self.self_attn = nn.MultiheadAttention(embed_dim, num_heads)
        self.layer_norm = nn.LayerNorm(embed_dim)
        self.dropout = nn.Dropout(0.1)  # Use dropout to prevent overfitting

    def forward(self, x):
        # Apply self-attention
        attn_output, _ = self.self_attn(x, x, x)  # Self-attention over the input
        x = self.layer_norm(x + self.dropout(attn_output))  # Residual connection and layer normalization
        return x

class CAE(nn.Module):
    def __init__(self, embed_dim, num_heads):
        super(CAE, self).__init__()
        self.self_attention = SelfAttention(embed_dim, num_heads)

    def forward(self, S):
        H_S = self.self_attention(S)
        return H_S

# Example usage
embed_dim = 128  # Dimension of the embeddings
num_heads = 8    # Number of attention heads
cae = CAE(embed_dim, num_heads)

# Sample input for strategy representation (batch_size, seq_len, embed_dim)
S = torch.rand(10, 20, embed_dim)  # Adjust dimensions as needed
H_S = cae(S)
print("Output Shape of H_S:", H_S.shape)


Output Shape of H_S: torch.Size([10, 20, 128])


In [46]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SelfAttentionModule(nn.Module):
    def __init__(self, embed_dim, num_heads, dropout=0.1):
        super(SelfAttentionModule, self).__init__()
        # Multi-head self-attention layer
        self.self_attention = nn.MultiheadAttention(embed_dim=embed_dim, num_heads=num_heads, dropout=dropout)
        # Layer normalization and dropout
        self.layer_norm = nn.LayerNorm(embed_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        # Compute self-attention
        attn_output, _ = self.self_attention(x, x, x)
        # Residual connection followed by layer normalization
        x = self.layer_norm(x + self.dropout(attn_output))
        return x


class StrategyEncoder(nn.Module):
    def __init__(self, embed_dim, num_heads, dropout=0.1):
        super(StrategyEncoder, self).__init__()
        # Initialize self-attention for strategy encoding
        self.self_attention_module = SelfAttentionModule(embed_dim, num_heads, dropout)

    def forward(self, strategy_embedding):
        # Pass strategy embeddings through self-attention
        strategy_encoded = self.self_attention_module(strategy_embedding)
        return strategy_encoded

# Example usage
embed_dim = 128  # Define embedding dimension
num_heads = 4    # Define number of attention heads
dropout = 0.1    # Dropout rate

# Initialize Strategy Encoder
strategy_encoder = StrategyEncoder(embed_dim, num_heads, dropout)

# Example input: Strategy descriptions as embeddings (batch_size, seq_len, embed_dim)
S = torch.rand(10, 20, embed_dim)  # e.g., batch of 10 strategy descriptions with 20 tokens each

# Get the encoded representation of strategy descriptions
H_D = strategy_encoder(S)
print("Encoded Strategy Representation Shape:", H_D.shape)

Encoded Strategy Representation Shape: torch.Size([10, 20, 128])


In [47]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# Load the model and tokenizer
model_name = "/content/blenderbot_small-90M"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Strategy descriptions
strategy_descriptions = {
    "Question": "Asking for information related to the problem to help the help-seeker articulate the issues that they face. Open-ended questions are best, and closed questions can be used to get specific information.",
    "Restatement or Paraphrasing": "A simple, more concise rephrasing of the help-seeker’s statements that could help them see their situation more clearly.",
    "Reflection of Feelings": "Articulate and describe the help-seeker’s feelings.",
    "Self-disclosure": "Divulge similar experiences that you have had or emotions that you share with the help-seeker to express your empathy.",
    "Affirmation and Reassurance": "Affirm the help-seeker’s strengths, motivation, and capabilities and provide reassurance and encouragement.",
    "Providing Suggestions": "Provide suggestions about how to change the situation, but be careful to not overstep and tell them what to do.",
    "Information": "Provide useful information to the help-seeker, for example with data, facts, opinions, resources, or by answering questions.",
    "Others": "Exchange pleasantries and use other support strategies that do not fall into the above categories.",
}

# Encode descriptions into embeddings
def encode_descriptions(descriptions, tokenizer, model):
    embeddings = {}
    model.eval()
    with torch.no_grad():
        for strategy, text in descriptions.items():
            # Tokenize and encode the text
            inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128)

            # Get encoder outputs using model.model.encoder instead of model.encoder
            outputs = model.model.encoder(**inputs)

            embeddings[strategy] = outputs.last_hidden_state.mean(dim=1)  # Mean pooling of token embeddings
    return embeddings

# Get embeddings for all strategies
strategy_embeddings = encode_descriptions(strategy_descriptions, tokenizer, model)

# Print embeddings
for strategy, embedding in strategy_embeddings.items():
    print(f"Strategy: {strategy}, Embedding Shape: {embedding.shape}")


Strategy: Question, Embedding Shape: torch.Size([1, 512])
Strategy: Restatement or Paraphrasing, Embedding Shape: torch.Size([1, 512])
Strategy: Reflection of Feelings, Embedding Shape: torch.Size([1, 512])
Strategy: Self-disclosure, Embedding Shape: torch.Size([1, 512])
Strategy: Affirmation and Reassurance, Embedding Shape: torch.Size([1, 512])
Strategy: Providing Suggestions, Embedding Shape: torch.Size([1, 512])
Strategy: Information, Embedding Shape: torch.Size([1, 512])
Strategy: Others, Embedding Shape: torch.Size([1, 512])


In [51]:
import torch

# Assuming these are the correct tensors from your global variables:
# Hq, KEC_hat, KES_hat

# Using Hc instead of H_c as it has the required dimensions
H_c = Hc  # Assign Hc to H_c

# or if Hc need a new shape [10, 32, 128] then use following with extra caution and ensure Hc.shape[0]*Hc.shape[1]*Hc.shape[2] == 10*32*128 = 40960, for the following to run
# H_c = Hc.view(10, 32, 128) # change to 3 dimension and matching elements

# Concatenate tensors using the correct variable names
X = torch.cat((Hq, H_c, KEC_hat, KES_hat), dim=1)

print(f"Shape of X: {X.shape}")

Shape of X: torch.Size([10, 128, 512])


In [52]:
print(model)

BlenderbotSmallForConditionalGeneration(
  (model): BlenderbotSmallModel(
    (shared): Embedding(54944, 512, padding_idx=0)
    (encoder): BlenderbotSmallEncoder(
      (embed_tokens): Embedding(54944, 512, padding_idx=0)
      (embed_positions): BlenderbotSmallLearnedPositionalEmbedding(512, 512)
      (layers): ModuleList(
        (0-7): 8 x BlenderbotSmallEncoderLayer(
          (self_attn): BlenderbotSmallAttention(
            (k_proj): Linear(in_features=512, out_features=512, bias=True)
            (v_proj): Linear(in_features=512, out_features=512, bias=True)
            (q_proj): Linear(in_features=512, out_features=512, bias=True)
            (out_proj): Linear(in_features=512, out_features=512, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=512, out_features=2048, bias=True)
          (fc2): Linear(in_features=2048, out_features=512

In [55]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class IndependentIntegratedExecutors(nn.Module):
    def __init__(self, hidden_dim, num_strategies):
        super(IndependentIntegratedExecutors, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_strategies = num_strategies

        # Independent executors: one cross-attention module for each strategy
        self.strategy_executors = nn.ModuleList([
            nn.MultiheadAttention(embed_dim=hidden_dim, num_heads=8, dropout=0.1)
            for _ in range(num_strategies)
        ])

        # Fusion layer for integrating strategies
        self.strategy_fusion = nn.Linear(hidden_dim * num_strategies, hidden_dim)
        self.layer_norm = nn.LayerNorm(hidden_dim)

        # Decoder for final output
        self.decoder = nn.TransformerDecoderLayer(d_model=hidden_dim, nhead=8)

    def forward(self, O, X, H_D, strategy_distribution):
        """
        Args:
            O: Decoder hidden state (T_dec, B, H)
            X: Comprehensive causal information (T_enc, B, H)
            H_D: Strategy descriptions (num_strategies, B, H) # Updated shape for H_D
            strategy_distribution: Strategy weights (B, num_strategies)
        Returns:
            O_prime: Updated decoder hidden state (T_dec, B, H)
        """
        executor_outputs = []

        # Independent strategy execution
        for i, executor in enumerate(self.strategy_executors):
            # Reshape H_D to match X's dimensions before concatenation
            h_d_i = H_D[i, :, :].unsqueeze(0).repeat(X.shape[0], 1, 1) # repeat to match X's shape
            strategy_specific_info = torch.cat((X, h_d_i), dim=0)  # Concatenate X and reshaped h_d_i

            O_E_i, _ = executor(O, strategy_specific_info, strategy_specific_info)
            executor_outputs.append(O_E_i)

        # Combine outputs from all executors
        executor_outputs = torch.stack(executor_outputs, dim=1)  # Shape: (T_dec, num_strategies, B, H)
        weighted_executor_output = torch.einsum(
            'bs,tsbh->tbh', strategy_distribution, executor_outputs
        )  # Weighted sum of executors based on strategy distribution

        # Layer normalization and residual connection
        O_E = self.layer_norm(O + weighted_executor_output)

        # Final decoding with integrated strategies
        O_prime = self.decoder(O_E, X)

        return O_prime

# Example Usage
if __name__ == "__main__":
    hidden_dim = 256
    num_strategies = 8
    batch_size = 16
    seq_len_dec = 20
    seq_len_enc = 50

    model = IndependentIntegratedExecutors(hidden_dim, num_strategies)

    # Dummy Inputs
    O = torch.randn(seq_len_dec, batch_size, hidden_dim)
    X = torch.randn(seq_len_enc, batch_size, hidden_dim)
    H_D = torch.randn(num_strategies, batch_size, hidden_dim)
    strategy_distribution = torch.softmax(torch.randn(batch_size, num_strategies), dim=-1)

    # Forward Pass
    output = model(O, X, H_D, strategy_distribution)
    print(output.shape)  # Expected: (seq_len_dec, batch_size, hidden_dim)


torch.Size([20, 16, 256])
