<a href="https://colab.research.google.com/github/GIM494/Learning-Design-Rules/blob/master/CARP_L_Demo_Copy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!wget https://the-eye.eu/public/AI/CARP_L.pt

--2021-10-12 20:44:20--  https://the-eye.eu/public/AI/CARP_L.pt
Resolving the-eye.eu (the-eye.eu)... 162.213.130.242
Connecting to the-eye.eu (the-eye.eu)|162.213.130.242|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2859997019 (2.7G) [application/octet-stream]
Saving to: ‘CARP_L.pt’


2021-10-12 20:45:19 (46.8 MB/s) - ‘CARP_L.pt’ saved [2859997019/2859997019]



In [None]:
!pip install torch 
!pip install transformers==4.6.0
!pip install sentencepiece

Collecting transformers==4.6.0
  Downloading transformers-4.6.0-py3-none-any.whl (2.3 MB)
[K     |████████████████████████████████| 2.3 MB 6.9 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 55.7 MB/s 
[?25hCollecting huggingface-hub==0.0.8
  Downloading huggingface_hub-0.0.8-py3-none-any.whl (34 kB)
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 46.1 MB/s 
Installing collected packages: tokenizers, sacremoses, huggingface-hub, transformers
Successfully installed huggingface-hub-0.0.8 sacremoses-0.0.46 tokenizers-0.10.3 transformers-4.6.0
Collecting sentencepiece
  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 6.8 MB/s 


In [None]:
import torch
from torch import nn
import torch.nn.functional as F
import numpy as np
import math
import transformers

# Model Setup

In [None]:
from transformers import AutoModel, AutoTokenizer

LATENT_DIM = 2048
USE_CUDA = True
USE_HALF = True
config = transformers.RobertaConfig()

extract_fns = {'EleutherAI/gpt-neo-1.3B' :
                (lambda out : out['hidden_states'][-1]),
                'EleutherAI/gpt-neo-2.7B' :
                (lambda out : out['hidden_states'][-1]),
                'roberta-large' : 
                (lambda out : out[0]),
                'roberta-base' :
                (lambda out : out[0]),
                'microsoft/deberta-v2-xlarge' :
                (lambda out : out[0])}

d_models = {'EleutherAI/gpt-neo-1.3B' : 2048,
            'EleutherAI/gpt-neo-2.7B' : 2560,
            'roberta-large' : 1024,
            'roberta-base' : 768,
            'microsoft/deberta-v2-xlarge' : 1536}

MODEL_PATH = "roberta-large"

class TextEncoder(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.model = AutoModel.from_pretrained(MODEL_PATH)

        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
        self.d_model = d_models[MODEL_PATH]

        # Add cls token to model and tokenizer
        self.tokenizer.add_tokens(['[quote]'])
        self.model.resize_token_embeddings(len(self.tokenizer))

    def tok(self, string_batch):
        return self.tokenizer(string_batch,
                return_tensors = 'pt',
                padding = True).to('cuda')
    
    def forward(self, x, mask = None, tokenize = False, mask_sum = True):
        if tokenize:
            x = self.tok(x)
            mask = x['attention_mask']
            x = x['input_ids']
        
        out = self.model(x, mask, output_hidden_states = True, return_dict = True)
        
        # out is a tuple of (model output, tuple)
        # the second tuple is all layers
        # in this second tuple, last elem is model output
        # we take second last hidden -> third last layer
        # size is always [batch, seq, 1536]
        
        hidden = out[0]
        #layers = out[-1]
        #hidden = layers[-2]
        
        # Mask out pad tokens embeddings
        if mask_sum:
            emb_mask = mask.unsqueeze(2).repeat(1, 1, self.d_model)
            hidden = hidden * emb_mask

        y = hidden.sum(1)
        y = F.normalize(y)
        
        return y # Sum along sequence

class ContrastiveModel(nn.Module):
    def __init__(self, encA, encB):
        super().__init__()
        
        self.encA = encA
        self.encB = encB

        self.projA = nn.Linear(self.encA.d_model, LATENT_DIM, bias = False)
        self.projB = nn.Linear(self.encB.d_model, LATENT_DIM, bias = False)

        self.logit_scale = nn.Parameter(torch.ones([]) * np.log(1 / 0.07))
        self.clamp_min = math.log(1/100)
        self.clamp_max = math.log(100)

    def clamp(self):
        with torch.no_grad():
            self.logit_scale.clamp(self.clamp_min, self.clamp_max)

    def encodeX(self, x, masks = None):
        x = self.encA(x, masks)
        return self.projA(x)

    def encodeY(self, y, masks = None):
        y = self.encB(y, masks)
        return self.projB(y)

    # Calculate contrastive loss between embedding groups
    # x, y are assumed encoding/embeddings here
    def cLoss(self, x, y):
        n = x.shape[0]
        # normalize
        x = F.normalize(x)
        y = F.normalize(y)

        logits = x @ y.T * self.logit_scale.exp()
        labels = torch.arange(n, device ='cuda')

        loss_i = F.cross_entropy(logits, labels)
        loss_t = F.cross_entropy(logits.T, labels)
        acc_i = (torch.argmax(logits, dim = 1) == labels).sum()
        acc_t = (torch.argmax(logits, dim = 0) == labels).sum()

        return (loss_i + loss_t) / 2, (acc_i + acc_t) / n / 2

    def getLogits(self, x, y):
        x = self.encodeX(*x)
        y = self.encodeY(*y)

        x = F.normalize(x)
        y = F.normalize(y)

        logits = x @ y.T * self.logit_scale.exp()
        return logits

    def forward(self, x, y):
        return self.getLogits(x, y)

model = ContrastiveModel(TextEncoder(), TextEncoder())

Downloading:   0%|          | 0.00/482 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.43G [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-large were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-large were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
model.load_state_dict(torch.load("/content/CARP_L.pt"))
if USE_HALF: model.half()
if USE_CUDA: model.cuda()

In [None]:
N_CTX = 512
def tok(string_batch):
    for i, _ in enumerate(string_batch):
        if len(string_batch[i]) > N_CTX:
            string_batch[i] = string_batch[i][-N_CTX:]

    return model.encA.tok(string_batch)

def get_batch_tokens(dataset, inds):
    batch = [dataset[ind] for ind in inds]
    pass_batch = [pair[0] for pair in batch]
    rev_batch = [pair[1] for pair in batch]

    pass_tokens = tok(pass_batch)
    rev_tokens = tok(rev_batch)
    pass_masks = pass_tokens['attention_mask']
    rev_masks = rev_tokens['attention_mask']
    pass_tokens = pass_tokens['input_ids']
    rev_tokens = rev_tokens['input_ids']

    return pass_tokens, pass_masks, rev_tokens, rev_masks

In [None]:
import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
model_name = 'tuner007/pegasus_paraphrase'
torch_device = 'cuda'
tokenizer_pegasus = PegasusTokenizer.from_pretrained(model_name)
model_pegasus = PegasusForConditionalGeneration.from_pretrained(model_name).half().to(torch_device)
#Paraphrases using peagasus. Used for softening.
def get_response(input_text,num_return_sequences,num_beams):
  batch = tokenizer_pegasus([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
  translated = model_pegasus.generate(**batch,max_length=60,num_beams=num_beams, num_return_sequences=num_return_sequences, temperature=1.5)
  tgt_text = tokenizer_pegasus.batch_decode(translated, skip_special_tokens=True)
  return tgt_text

Downloading:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/86.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.14k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

In [None]:
get_response("Doesn't [quote] contradict [quote]?", num_return_sequences=5, num_beams=5)

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /pytorch/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


["Doesn't it conflict with [quote]?",
 "Doesn't it conflict with thequote?",
 "Doesn't it conflict with the quote?",
 "Doesn't it conflict with what he said?",
 "Doesn't it conflict with [quote])?"]

In [None]:
#Compute the logits of the passage against the reviews
def get_passrev_logits(passages, reviews):
    pass_tokens = tok(passages)
    rev_tokens = tok(reviews)
    pass_masks = pass_tokens['attention_mask']
    rev_masks = rev_tokens['attention_mask']
    pass_tokens = pass_tokens['input_ids']
    rev_tokens = rev_tokens['input_ids']

    with torch.no_grad():
      logits = model.getLogits([pass_tokens, pass_masks],
                              [rev_tokens, rev_masks]).type(dtype=torch.float32)
    return logits

def report_logits(logits):
    logits /= 2.7441
    print((logits[0]).cpu().tolist())
    conf = logits.softmax(1)

    for i, row in enumerate(conf):
        for j, col in enumerate(row):
            print(str(i) + "-" + str(j) + ": " + str(round(col.item(), 2)))

def compute_softened_logits(passages, reviews1, reviews2, pairs=True):
    
    logits1 = torch.sum(get_passrev_logits(passages, reviews1), dim=-1).unsqueeze(0)/float(len(reviews1))
    if pairs:
      logits2 = torch.sum(get_passrev_logits(passages, reviews2), dim=-1).unsqueeze(0)/float(len(reviews2))

      return torch.cat([logits1, logits2], dim=-1)
    else:
      return logits1
#Lots of options to play with here that dictate how the paraphrases are generated.
#Future work is needed
def compute_logit(passages, reviews, soften=True,
                        top_k=False, k = 3, 
                        ret = False, pairs=True):
    #Softens the classifiers by using paraphrasing.
    if soften:
      if pairs:
        review1_paraphrases = list(set(get_response(reviews[0], num_return_sequences=3, num_beams=3) + [reviews[0]]))
        review2_paraphrases = list(set(get_response(reviews[1], num_return_sequences=3, num_beams=3) + [reviews[1]]))
        print(review1_paraphrases)
        print(review2_paraphrases)
        
        review1_contextual = list(map(lambda x: "[quote] " + x, review1_paraphrases)) 
        review2_contextual = list(map(lambda x: "[quote] " + x, review2_paraphrases)) 

        
        softened_logits = compute_softened_logits(passages, review1_contextual + review1_paraphrases, review2_contextual + review2_paraphrases)
        report_logits(softened_logits)
        if ret: return softened_logits
      else:
        review_paraphrases = list(set(get_response(reviews, num_return_sequences=3, num_beams=3) + [reviews]))
        #print(review_paraphrases)

        review_contextual = list(map(lambda x: "[quote] " + x, review_paraphrases))
        softened_logits = compute_softened_logits(passages, review_contextual + review_paraphrases, None, pairs=False)

        #softened_logits = (softened_logits/2.7441)
        print(softened_logits.squeeze().cpu().tolist())

        if ret: return softened_logits




# Directly get logits for list of stories and critiques


In [None]:
stories = [
           ["The tiny lizard writhes in your grasp and claws at your fingers, its pink mouth gasping for breath. You squeeze harder and harder until your fist trembles with the effort. The lizard stops squirming."],
           ["All at once, the chest is lifted from you. Looking up, you see a man at the top of the cliff, pulling intently at the rope. “That is uncommonly good of you, I do say!” He chuckles unpleasantly."],
           ["You try to get close enough to the bulter, but he backs off. “That wouldn't be seemly, Miss.”"],
           ["“No!” screams the Princess. The machine emits a dreadful grinding noise and goes through a series of complicated gyrations."],
           ["The man went to the store. He stole some cheese. His starving family was saved."],
           ["The man went to the store. He bought some cheese. He dropped it on the ground."]
]


In [None]:

print(model.logit_scale)

reviews = [
  "This is scary.",
  "The behavior doesnt make sense.",
  "The other characters wouldn't like this.",
  "This doesn't make sense for the character to do.",
  "This seems too nice.",
  "This is too cheery.",
  "This character doesn't fit."
]


#For every story, embed it and compute the cosine simarity of it against the embedded critique
for story in stories:
  print(story)

  #Iterate over all tuples and determine which apply to this case
  for pair in reviews:
    print(pair)
    compute_logit(story, pair, pairs=False)

Parameter containing:
tensor(2.7695, device='cuda:0', dtype=torch.float16, requires_grad=True)
['The tiny lizard writhes in your grasp and claws at your fingers, its pink mouth gasping for breath. You squeeze harder and harder until your fist trembles with the effort. The lizard stops squirming.']
This is scary.
4.01806640625
The behavior doesnt make sense.
2.2373046875
The other characters wouldn't like this.
2.6298828125
This doesn't make sense for the character to do.
3.2080078125
This seems too nice.
1.3391265869140625
This is too cheery.
2.725341796875
This character doesn't fit.
0.7671382427215576
['All at once, the chest is lifted from you. Looking up, you see a man at the top of the cliff, pulling intently at the rope. “That is uncommonly good of you, I do say!” He chuckles unpleasantly.']
This is scary.
2.179443359375
The behavior doesnt make sense.
1.8544921875
The other characters wouldn't like this.
3.53955078125
This doesn't make sense for the character to do.
3.4572753906