In [1]:
import sys
import os
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F

import transformers
from transformers import AutoConfig, AutoModelForSequenceClassification

sys.path.insert(0, "..")

import models
from models import *

import logging
logger = logging.getLogger('my_module_name')


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
kwargs = {
    "embed_dim": 512,
    "num_layers": 8,
    "num_heads": 4,
    "num_labels": 5,
    "ffwd_dim": 777,
    "problem_type" : "multi_label_classification"
}

In [3]:
mytf = AutoSeqClsModel.from_kwargs("mytf", **kwargs)
gpt2 = AutoSeqClsModel.from_kwargs("gpt2", **kwargs)
bert = AutoSeqClsModel.from_kwargs("bert", **kwargs)
roberta = AutoSeqClsModel.from_kwargs("roberta", **kwargs)
llama = AutoSeqClsModel.from_kwargs("code_llama", **kwargs)

In [4]:
# mytf_pt = AutoSeqClsModel.from_pretrained("mytf", **kwargs)
# gpt2_pt = AutoSeqClsModel.from_pretrained("gpt2", **kwargs)
# roberta_pt = AutoSeqClsModel.from_pretrained("roberta", **kwargs)
# # bert_pt = AutoSeqClsModel.from_pretrained("bert", **kwargs)

In [5]:
mytf_qed = AutoTFLModel.from_kwargs("one_shot", model_name="mytf", num_vars=5)
mytf_succ = AutoTFLModel.from_kwargs("next_state", model_name="mytf", num_vars=5)
mytf_ars = AutoTFLModel.from_kwargs("autoreg_ksteps", model_name="mytf", num_vars=5, num_steps=3)

gpt2_qed = AutoTFLModel.from_kwargs("one_shot", model_name="gpt2", num_vars=5)
gpt2_succ = AutoTFLModel.from_kwargs("next_state", model_name="gpt2", num_vars=5)
gpt2_ars = AutoTFLModel.from_kwargs("autoreg_ksteps", model_name="gpt2", num_vars=5, num_steps=3)

roberta_qed = AutoTFLModel.from_kwargs("one_shot", model_name="roberta", num_vars=5)
roberta_succ = AutoTFLModel.from_kwargs("next_state", model_name="roberta", num_vars=5)
roberta_ars = AutoTFLModel.from_kwargs("autoreg_ksteps", model_name="roberta", num_vars=5, num_steps=3)

In [6]:
mytf_qed_pt = AutoTFLModel.from_pretrained("one_shot", model_name="mytf", num_vars=5)
mytf_succ_pt = AutoTFLModel.from_pretrained("next_state", model_name="mytf", num_vars=5)
mytf_ars_pt = AutoTFLModel.from_pretrained("autoreg_ksteps", model_name="mytf", num_vars=5, num_steps=3)

gpt2_qed_pt = AutoTFLModel.from_pretrained("one_shot", model_name="gpt2", num_vars=5)
gpt2_succ_pt = AutoTFLModel.from_pretrained("next_state", model_name="gpt2", num_vars=5)
gpt2_ars_pt = AutoTFLModel.from_pretrained("autoreg_ksteps", model_name="gpt2", num_vars=5, num_steps=3)

roberta_qed_pt = AutoTFLModel.from_pretrained("one_shot", model_name="roberta", num_vars=5)
roberta_succ_pt = AutoTFLModel.from_pretrained("next_state", model_name="roberta", num_vars=5)
roberta_ars_pt = AutoTFLModel.from_pretrained("autoreg_ksteps", model_name="roberta", num_vars=5, num_steps=3)

Downloading config.json: 100%|██████████| 665/665 [00:00<00:00, 2.20MB/s]
Downloading model.safetensors: 100%|██████████| 548M/548M [00:07<00:00, 76.0MB/s] 
Downloading config.json: 100%|██████████| 481/481 [00:00<00:00, 5.34MB/s]
Downloading model.safetensors: 100%|██████████| 499M/499M [00:10<00:00, 48.6MB/s] 


In [7]:
## String tests
mytf_qed_str = AutoTFLModel.from_kwargs("one_shot_str", model_name="mytf", num_vars=5)
gpt2_qed_str = AutoTFLModel.from_kwargs("one_shot_str", model_name="gpt2", num_vars=5)

mytf_qed_pt_str = AutoTFLModel.from_pretrained("one_shot_str", model_name="mytf", num_vars=5)
gpt2_qed_pt_str = AutoTFLModel.from_pretrained("one_shot_str", model_name="gpt2", num_vars=5)

In [8]:
bert.config

<models.hf_models.HFSeqClsConfig at 0x7f0fc777d060>

In [9]:
mytf.embed_dim, mytf.problem_type, mytf.num_labels

(512, 'multi_label_classification', 5)

In [10]:
x = torch.randn(7,12,512)

In [11]:
mytf_out = mytf(x, output_hidden_states=True, output_attentions=True)
mytf_out.logits.shape, \
    mytf_out.last_hidden_state.shape, \
    len(mytf_out.hidden_states), \
    len(mytf_out.attentions)

(torch.Size([7, 5]), torch.Size([7, 12, 512]), 9, 8)

In [12]:
gpt2_out = gpt2(x, output_hidden_states=True, output_attentions=True)
gpt2_out.logits.shape, \
    len(gpt2_out.hidden_states), \
    len(gpt2_out.attentions)

(torch.Size([7, 5]), 9, 8)

In [13]:
bert_out = bert(x, output_hidden_states=True, output_attentions=True)
bert_out.logits.shape, \
    len(bert_out.hidden_states), \
    len(bert_out.attentions)

(torch.Size([7, 5]), 9, 8)

In [14]:
llama

HFSeqClsModel(
  (model): LlamaForSequenceClassification(
    (model): LlamaModel(
      (embed_tokens): Embedding(32000, 512)
      (layers): ModuleList(
        (0-7): 8 x LlamaDecoderLayer(
          (self_attn): LlamaAttention(
            (q_proj): Linear(in_features=512, out_features=512, bias=False)
            (k_proj): Linear(in_features=512, out_features=512, bias=False)
            (v_proj): Linear(in_features=512, out_features=512, bias=False)
            (o_proj): Linear(in_features=512, out_features=512, bias=False)
            (rotary_emb): LlamaRotaryEmbedding()
          )
          (mlp): LlamaMLP(
            (gate_proj): Linear(in_features=512, out_features=777, bias=False)
            (up_proj): Linear(in_features=512, out_features=777, bias=False)
            (down_proj): Linear(in_features=777, out_features=512, bias=False)
            (act_fn): SiLUActivation()
          )
          (input_layernorm): LlamaRMSNorm()
          (post_attention_layernorm): LlamaRMS

In [15]:
gpt2 = AutoModelForSequenceClassification.from_pretrained("gpt2", num_labels=4, problem_type="multi_label_classification")

In [None]:
gpt2

In [None]:
model = AutoModelForSequenceClassification.from_pretrained("code_llama")

In [None]:
cfg

In [None]:
transformers.Llama