In [10]:
#@title Installs and Imports
# pip installs
!pip -qq install git+https://github.com/huggingface/transformers.git
!pip install -qq py3nvml

from transformers import ReformerConfig, PyTorchBenchmark, PyTorchBenchmarkArguments, ReformerModel

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
  Building wheel for transformers (PEP 517) ... [?25l[?25hdone


In [12]:
config_no_pos_axial_embeds = ReformerConfig.from_pretrained("google/reformer-crime-and-punishment", axial_pos_embds=False)  # disable axial positional embeddings
config_pos_axial_embeds = ReformerConfig.from_pretrained("google/reformer-crime-and-punishment", axial_pos_embds=True, axial_pos_embds_dim=(32, 224), axial_pos_shape=(512, 1024))  # enable axial positional embeddings

print("Default Positional Encodings")
print(50 * '-')
model = ReformerModel(config_no_pos_axial_embeds)
print(f"Positional embeddings shape: {model.embeddings.position_embeddings}")
print(f"Num parameters of model: {model.num_parameters()}")
print(50 * '-' + '\n\n')

print("Axial Positional Encodings")
print(50 * '-')
model = ReformerModel(config_pos_axial_embeds)
print(f"Positional embeddings shape: {model.embeddings.position_embeddings}")
print(f"Num parameters of model: {model.num_parameters()}")
print(50 * '-' + '\n\n')

benchmark_args = PyTorchBenchmarkArguments(sequence_lengths=[512,1024, 2048,4096,8192,16384,32768], batch_sizes=[8], models=["Reformer-No-Axial-Pos-Embeddings", "Reformer-Axial-Pos-Embeddings"], speed=True, env_print=False)
benchmark = PyTorchBenchmark(configs=[config_no_pos_axial_embeds, config_pos_axial_embeds], args=benchmark_args)
result = benchmark.run()

Default Positional Encodings
--------------------------------------------------
Positional embeddings shape: PositionEmbeddings(
  (embedding): Embedding(524288, 256)
)
Num parameters of model: 136572416
--------------------------------------------------


Axial Positional Encodings
--------------------------------------------------
Positional embeddings shape: AxialPositionEmbeddings(
  (weights): ParameterList(
      (0): Parameter containing: [torch.FloatTensor of size 512x1x32]
      (1): Parameter containing: [torch.FloatTensor of size 1x1024x224]
  )
)
Num parameters of model: 2600448
--------------------------------------------------


1 / 2
2 / 2

--------------------------------------------------------------------------------
          Model Name             Batch Size     Seq Length     Time in s   
--------------------------------------------------------------------------------
Reformer-No-Axial-Pos-Embeddin       8              512             0.01     
Reformer-No-Axial-Po

In [13]:
config_no_pos_axial_embeds = ReformerConfig.from_pretrained("google/reformer-crime-and-punishment", axial_pos_embds=False)  # disable axial positional embeddings
config_pos_axial_embeds = ReformerConfig.from_pretrained("google/reformer-crime-and-punishment", axial_pos_embds=True, axial_pos_embds_dim=(86, 170), axial_pos_shape=(512, 1024))  # enable axial positional embeddings

print("Default Positional Encodings")
print(50 * '-')
model = ReformerModel(config_no_pos_axial_embeds)
print(f"Positional embeddings shape: {model.embeddings.position_embeddings}")
print(f"Num parameters of model: {model.num_parameters()}")
print(50 * '-' + '\n\n')

print("Axial Positional Encodings")
print(50 * '-')
model = ReformerModel(config_pos_axial_embeds)
print(f"Positional embeddings shape: {model.embeddings.position_embeddings}")
print(f"Num parameters of model: {model.num_parameters()}")
print(50 * '-' + '\n\n')

benchmark_args = PyTorchBenchmarkArguments(sequence_lengths=[512,1024, 2048,4096,8192,16384,32768], batch_sizes=[8], models=["Reformer-No-Axial-Pos-Embeddings", "Reformer-Axial-Pos-Embeddings"], speed=True, env_print=False)
benchmark = PyTorchBenchmark(configs=[config_no_pos_axial_embeds, config_pos_axial_embeds], args=benchmark_args)
result = benchmark.run()

Default Positional Encodings
--------------------------------------------------
Positional embeddings shape: PositionEmbeddings(
  (embedding): Embedding(524288, 256)
)
Num parameters of model: 136572416
--------------------------------------------------


Axial Positional Encodings
--------------------------------------------------
Positional embeddings shape: AxialPositionEmbeddings(
  (weights): ParameterList(
      (0): Parameter containing: [torch.FloatTensor of size 512x1x86]
      (1): Parameter containing: [torch.FloatTensor of size 1x1024x170]
  )
)
Num parameters of model: 2572800
--------------------------------------------------


1 / 2
2 / 2

--------------------------------------------------------------------------------
          Model Name             Batch Size     Seq Length     Time in s   
--------------------------------------------------------------------------------
Reformer-No-Axial-Pos-Embeddin       8              512            0.009     
Reformer-No-Axial-Po

In [14]:
# 128，128
config_no_pos_axial_embeds = ReformerConfig.from_pretrained("google/reformer-crime-and-punishment", axial_pos_embds=False)  # disable axial positional embeddings
config_pos_axial_embeds = ReformerConfig.from_pretrained("google/reformer-crime-and-punishment", axial_pos_embds=True, axial_pos_embds_dim=(128, 128), axial_pos_shape=(512, 1024))  # enable axial positional embeddings

print("Default Positional Encodings")
print(50 * '-')
model = ReformerModel(config_no_pos_axial_embeds)
print(f"Positional embeddings shape: {model.embeddings.position_embeddings}")
print(f"Num parameters of model: {model.num_parameters()}")
print(50 * '-' + '\n\n')

print("Axial Positional Encodings")
print(50 * '-')
model = ReformerModel(config_pos_axial_embeds)
print(f"Positional embeddings shape: {model.embeddings.position_embeddings}")
print(f"Num parameters of model: {model.num_parameters()}")
print(50 * '-' + '\n\n')

benchmark_args = PyTorchBenchmarkArguments(sequence_lengths=[512,1024, 2048,4096,8192,16384,32768], batch_sizes=[8], models=["Reformer-No-Axial-Pos-Embeddings", "Reformer-Axial-Pos-Embeddings"], speed=True, env_print=False)
benchmark = PyTorchBenchmark(configs=[config_no_pos_axial_embeds, config_pos_axial_embeds], args=benchmark_args)
result = benchmark.run()

Default Positional Encodings
--------------------------------------------------
Positional embeddings shape: PositionEmbeddings(
  (embedding): Embedding(524288, 256)
)
Num parameters of model: 136572416
--------------------------------------------------


Axial Positional Encodings
--------------------------------------------------
Positional embeddings shape: AxialPositionEmbeddings(
  (weights): ParameterList(
      (0): Parameter containing: [torch.FloatTensor of size 512x1x128]
      (1): Parameter containing: [torch.FloatTensor of size 1x1024x128]
  )
)
Num parameters of model: 2551296
--------------------------------------------------


1 / 2
2 / 2

--------------------------------------------------------------------------------
          Model Name             Batch Size     Seq Length     Time in s   
--------------------------------------------------------------------------------
Reformer-No-Axial-Pos-Embeddin       8              512            0.009     
Reformer-No-Axial-P

In [16]:
# 64，192
config_no_pos_axial_embeds = ReformerConfig.from_pretrained("google/reformer-crime-and-punishment", axial_pos_embds=False)  # disable axial positional embeddings
config_pos_axial_embeds = ReformerConfig.from_pretrained("google/reformer-crime-and-punishment", axial_pos_embds=True, axial_pos_embds_dim=(64, 192), axial_pos_shape=(512, 1024))  # enable axial positional embeddings

print("Default Positional Encodings")
print(50 * '-')
model = ReformerModel(config_no_pos_axial_embeds)
print(f"Positional embeddings shape: {model.embeddings.position_embeddings}")
print(f"Num parameters of model: {model.num_parameters()}")
print(50 * '-' + '\n\n')

print("Axial Positional Encodings")
print(50 * '-')
model = ReformerModel(config_pos_axial_embeds)
print(f"Positional embeddings shape: {model.embeddings.position_embeddings}")
print(f"Num parameters of model: {model.num_parameters()}")
print(50 * '-' + '\n\n')

benchmark_args = PyTorchBenchmarkArguments(sequence_lengths=[512,1024, 2048,4096,8192,16384,32768], batch_sizes=[8], models=["Reformer-No-Axial-Pos-Embeddings", "Reformer-Axial-Pos-Embeddings"], speed=True, env_print=False)
benchmark = PyTorchBenchmark(configs=[config_no_pos_axial_embeds, config_pos_axial_embeds], args=benchmark_args)
result = benchmark.run()

Default Positional Encodings
--------------------------------------------------
Positional embeddings shape: PositionEmbeddings(
  (embedding): Embedding(524288, 256)
)
Num parameters of model: 136572416
--------------------------------------------------


Axial Positional Encodings
--------------------------------------------------
Positional embeddings shape: AxialPositionEmbeddings(
  (weights): ParameterList(
      (0): Parameter containing: [torch.FloatTensor of size 512x1x64]
      (1): Parameter containing: [torch.FloatTensor of size 1x1024x192]
  )
)
Num parameters of model: 2584064
--------------------------------------------------


1 / 2
2 / 2

--------------------------------------------------------------------------------
          Model Name             Batch Size     Seq Length     Time in s   
--------------------------------------------------------------------------------
Reformer-No-Axial-Pos-Embeddin       8              512             0.01     
Reformer-No-Axial-Po

In [17]:
# 192,64，
config_no_pos_axial_embeds = ReformerConfig.from_pretrained("google/reformer-crime-and-punishment", axial_pos_embds=False)  # disable axial positional embeddings
config_pos_axial_embeds = ReformerConfig.from_pretrained("google/reformer-crime-and-punishment", axial_pos_embds=True, axial_pos_embds_dim=(192, 64), axial_pos_shape=(512, 1024))  # enable axial positional embeddings

print("Default Positional Encodings")
print(50 * '-')
model = ReformerModel(config_no_pos_axial_embeds)
print(f"Positional embeddings shape: {model.embeddings.position_embeddings}")
print(f"Num parameters of model: {model.num_parameters()}")
print(50 * '-' + '\n\n')

print("Axial Positional Encodings")
print(50 * '-')
model = ReformerModel(config_pos_axial_embeds)
print(f"Positional embeddings shape: {model.embeddings.position_embeddings}")
print(f"Num parameters of model: {model.num_parameters()}")
print(50 * '-' + '\n\n')

benchmark_args = PyTorchBenchmarkArguments(sequence_lengths=[512,1024, 2048,4096,8192,16384,32768], batch_sizes=[8], models=["Reformer-No-Axial-Pos-Embeddings", "Reformer-Axial-Pos-Embeddings"], speed=True, env_print=False)
benchmark = PyTorchBenchmark(configs=[config_no_pos_axial_embeds, config_pos_axial_embeds], args=benchmark_args)
result = benchmark.run()

Default Positional Encodings
--------------------------------------------------
Positional embeddings shape: PositionEmbeddings(
  (embedding): Embedding(524288, 256)
)
Num parameters of model: 136572416
--------------------------------------------------


Axial Positional Encodings
--------------------------------------------------
Positional embeddings shape: AxialPositionEmbeddings(
  (weights): ParameterList(
      (0): Parameter containing: [torch.FloatTensor of size 512x1x192]
      (1): Parameter containing: [torch.FloatTensor of size 1x1024x64]
  )
)
Num parameters of model: 2518528
--------------------------------------------------


1 / 2
2 / 2

--------------------------------------------------------------------------------
          Model Name             Batch Size     Seq Length     Time in s   
--------------------------------------------------------------------------------
Reformer-No-Axial-Pos-Embeddin       8              512            0.009     
Reformer-No-Axial-Po

In [22]:
# 192,64，
config_no_pos_axial_embeds = ReformerConfig.from_pretrained("google/reformer-crime-and-punishment", axial_pos_embds=False)  # disable axial positional embeddings
config_pos_axial_embeds = ReformerConfig.from_pretrained("google/reformer-crime-and-punishment", axial_pos_embds=True, axial_pos_embds_dim=(255, 1), axial_pos_shape=(512, 1024))  # enable axial positional embeddings

print("Default Positional Encodings")
print(50 * '-')
model = ReformerModel(config_no_pos_axial_embeds)
print(f"Positional embeddings shape: {model.embeddings.position_embeddings}")
print(f"Num parameters of model: {model.num_parameters()}")
print(50 * '-' + '\n\n')

print("Axial Positional Encodings")
print(50 * '-')
model = ReformerModel(config_pos_axial_embeds)
print(f"Positional embeddings shape: {model.embeddings.position_embeddings}")
print(f"Num parameters of model: {model.num_parameters()}")
print(50 * '-' + '\n\n')

benchmark_args = PyTorchBenchmarkArguments(sequence_lengths=[32768], batch_sizes=[8], models=["Reformer-No-Axial-Pos-Embeddings", "Reformer-Axial-Pos-Embeddings"], speed=True, env_print=False)
benchmark = PyTorchBenchmark(configs=[config_no_pos_axial_embeds, config_pos_axial_embeds], args=benchmark_args)
result = benchmark.run()

Default Positional Encodings
--------------------------------------------------
Positional embeddings shape: PositionEmbeddings(
  (embedding): Embedding(524288, 256)
)
Num parameters of model: 136572416
--------------------------------------------------


Axial Positional Encodings
--------------------------------------------------
Positional embeddings shape: AxialPositionEmbeddings(
  (weights): ParameterList(
      (0): Parameter containing: [torch.FloatTensor of size 512x1x255]
      (1): Parameter containing: [torch.FloatTensor of size 1x1024x1]
  )
)
Num parameters of model: 2486272
--------------------------------------------------


1 / 2
2 / 2

--------------------------------------------------------------------------------
          Model Name             Batch Size     Seq Length     Time in s   
--------------------------------------------------------------------------------
Reformer-No-Axial-Pos-Embeddin       8             32768           0.239     
Reformer-Axial-Pos-Em

In [21]:
# reformer-crime-and-punishment
config_no_pos_axial_embeds = ReformerConfig.from_pretrained("google/reformer-crime-and-punishment", axial_pos_embds=False, max_position_embeddings=32768)  # disable axial positional embeddings
config_pos_axial_embeds = ReformerConfig.from_pretrained("google/reformer-crime-and-punishment", axial_pos_embds=True, axial_pos_embds_dim=(128, 128), axial_pos_shape=(32, 1024), max_position_embeddings=32768)  # enable axial positional embeddings

print("Default Positional Encodings")
print(50 * '-')
model = ReformerModel(config_no_pos_axial_embeds)
print(f"Positional embeddings shape: {model.embeddings.position_embeddings}")
print(f"Num parameters of model: {model.num_parameters()}")
print(50 * '-' + '\n\n')

print("Axial Positional Encodings")
print(50 * '-')
model = ReformerModel(config_pos_axial_embeds)
print(f"Positional embeddings shape: {model.embeddings.position_embeddings}")
print(f"Num parameters of model: {model.num_parameters()}")
print(50 * '-' + '\n\n')

benchmark_args = PyTorchBenchmarkArguments(sequence_lengths=[32768], batch_sizes=[8], models=["Reformer-No-Axial-Pos-Embeddings", "Reformer-Axial-Pos-Embeddings"], speed=True, env_print=False)
benchmark = PyTorchBenchmark(configs=[config_no_pos_axial_embeds, config_pos_axial_embeds], args=benchmark_args)
result = benchmark.run()

Default Positional Encodings
--------------------------------------------------
Positional embeddings shape: PositionEmbeddings(
  (embedding): Embedding(32768, 256)
)
Num parameters of model: 10743296
--------------------------------------------------


Axial Positional Encodings
--------------------------------------------------
Positional embeddings shape: AxialPositionEmbeddings(
  (weights): ParameterList(
      (0): Parameter containing: [torch.FloatTensor of size 32x1x128]
      (1): Parameter containing: [torch.FloatTensor of size 1x1024x128]
  )
)
Num parameters of model: 2489856
--------------------------------------------------


1 / 2
2 / 2

--------------------------------------------------------------------------------
          Model Name             Batch Size     Seq Length     Time in s   
--------------------------------------------------------------------------------
Reformer-No-Axial-Pos-Embeddin       8             32768            0.24     
Reformer-Axial-Pos-Emb

In [12]:
# reformer-crime-and-punishment
config_no_pos_axial_embeds = ReformerConfig.from_pretrained("google/reformer-crime-and-punishment", axial_pos_embds=False)  # disable axial positional embeddings
config_pos_axial_embeds = ReformerConfig.from_pretrained("google/reformer-crime-and-punishment", axial_pos_embds=True, axial_pos_embds_dim=(128, 128), axial_pos_shape=(512, 1024))  # enable axial positional embeddings

print("Default Positional Encodings")
print(50 * '-')
model = ReformerModel(config_no_pos_axial_embeds)
print(f"Positional embeddings shape: {model.embeddings.position_embeddings}")
print(f"Num parameters of model: {model.num_parameters()}")
print(50 * '-' + '\n\n')

print("Axial Positional Encodings")
print(50 * '-')
model = ReformerModel(config_pos_axial_embeds)
print(f"Positional embeddings shape: {model.embeddings.position_embeddings}")
print(f"Num parameters of model: {model.num_parameters()}")
print(50 * '-' + '\n\n')

benchmark_args = PyTorchBenchmarkArguments(sequence_lengths=[512,1024,2048], batch_sizes=[8], models=["Reformer-No-Axial-Pos-Embeddings", "Reformer-Axial-Pos-Embeddings"], speed=True, env_print=False)
benchmark = PyTorchBenchmark(configs=[config_no_pos_axial_embeds, config_pos_axial_embeds], args=benchmark_args)
result = benchmark.run()

Default Positional Encodings
--------------------------------------------------
Positional embeddings shape: PositionEmbeddings(
  (embedding): Embedding(524288, 256)
)
Num parameters of model: 136572416
--------------------------------------------------


Axial Positional Encodings
--------------------------------------------------
Positional embeddings shape: AxialPositionEmbeddings(
  (weights): ParameterList(
      (0): Parameter containing: [torch.FloatTensor of size 512x1x128]
      (1): Parameter containing: [torch.FloatTensor of size 1x1024x128]
  )
)
Num parameters of model: 2551296
--------------------------------------------------


1 / 2
2 / 2

--------------------------------------------------------------------------------
          Model Name             Batch Size     Seq Length     Time in s   
--------------------------------------------------------------------------------
Reformer-No-Axial-Pos-Embeddin       8              512            0.011     
Reformer-No-Axial-P

In [13]:

# ---------------------------------------reformer-enwik8-----------------------------------
config_no_pos_axial_embeds = ReformerConfig.from_pretrained("google/reformer-enwik8", axial_pos_embds=False)  # disable axial positional embeddings
config_pos_axial_embeds = ReformerConfig.from_pretrained("google/reformer-enwik8", axial_pos_embds=True, axial_pos_embds_dim=(256, 768), axial_pos_shape=(512, 128))  # enable axial positional embeddings

print("Default Positional Encodings")
print(50 * '-')
model = ReformerModel(config_no_pos_axial_embeds)
print(f"Positional embeddings shape: {model.embeddings.position_embeddings}")
print(f"Num parameters of model: {model.num_parameters()}")
print(50 * '-' + '\n\n')

print("Axial Positional Encodings")
print(50 * '-')
model = ReformerModel(config_pos_axial_embeds)
print(f"Positional embeddings shape: {model.embeddings.position_embeddings}")
print(f"Num parameters of model: {model.num_parameters()}")
print(50 * '-' + '\n\n')

benchmark_args = PyTorchBenchmarkArguments(sequence_lengths=[512,1024,2048], batch_sizes=[8], models=["Reformer-No-Axial-Pos-Embeddings", "Reformer-Axial-Pos-Embeddings"], speed=True, env_print=False)
benchmark = PyTorchBenchmark(configs=[config_no_pos_axial_embeds, config_pos_axial_embeds], args=benchmark_args)
result = benchmark.run()


Default Positional Encodings
--------------------------------------------------
Positional embeddings shape: PositionEmbeddings(
  (embedding): Embedding(65536, 1024)
)
Num parameters of model: 215336960
--------------------------------------------------


Axial Positional Encodings
--------------------------------------------------
Positional embeddings shape: AxialPositionEmbeddings(
  (weights): ParameterList(
      (0): Parameter containing: [torch.FloatTensor of size 512x1x256]
      (1): Parameter containing: [torch.FloatTensor of size 1x128x768]
  )
)
Num parameters of model: 148457472
--------------------------------------------------


1 / 2
2 / 2

--------------------------------------------------------------------------------
          Model Name             Batch Size     Seq Length     Time in s   
--------------------------------------------------------------------------------
Reformer-No-Axial-Pos-Embeddin       8              512            0.299     
Reformer-No-Axial-

In [14]:

# ---------------------------------------reformer-enwik8-----------------------------------
config_no_pos_axial_embeds = ReformerConfig.from_pretrained("google/reformer-enwik8", axial_pos_embds=False)  # disable axial positional embeddings
config_pos_axial_embeds = ReformerConfig.from_pretrained("google/reformer-enwik8", axial_pos_embds=True, axial_pos_embds_dim=(512, 512), axial_pos_shape=(512, 128))  # enable axial positional embeddings

print("Default Positional Encodings")
print(50 * '-')
model = ReformerModel(config_no_pos_axial_embeds)
print(f"Positional embeddings shape: {model.embeddings.position_embeddings}")
print(f"Num parameters of model: {model.num_parameters()}")
print(50 * '-' + '\n\n')

print("Axial Positional Encodings")
print(50 * '-')
model = ReformerModel(config_pos_axial_embeds)
print(f"Positional embeddings shape: {model.embeddings.position_embeddings}")
print(f"Num parameters of model: {model.num_parameters()}")
print(50 * '-' + '\n\n')

benchmark_args = PyTorchBenchmarkArguments(sequence_lengths=[512,1024,2048], batch_sizes=[8], models=["Reformer-No-Axial-Pos-Embeddings", "Reformer-Axial-Pos-Embeddings"], speed=True, env_print=False)
benchmark = PyTorchBenchmark(configs=[config_no_pos_axial_embeds, config_pos_axial_embeds], args=benchmark_args)
result = benchmark.run()


Default Positional Encodings
--------------------------------------------------
Positional embeddings shape: PositionEmbeddings(
  (embedding): Embedding(65536, 1024)
)
Num parameters of model: 215336960
--------------------------------------------------


Axial Positional Encodings
--------------------------------------------------
Positional embeddings shape: AxialPositionEmbeddings(
  (weights): ParameterList(
      (0): Parameter containing: [torch.FloatTensor of size 512x1x512]
      (1): Parameter containing: [torch.FloatTensor of size 1x128x512]
  )
)
Num parameters of model: 148555776
--------------------------------------------------


1 / 2
2 / 2

--------------------------------------------------------------------------------
          Model Name             Batch Size     Seq Length     Time in s   
--------------------------------------------------------------------------------
Reformer-No-Axial-Pos-Embeddin       8              512            0.299     
Reformer-No-Axial-

In [19]:
print(config_no_pos_axial_embeds)

ReformerConfig {
  "architectures": [
    "ReformerModelWithLMHead"
  ],
  "attention_head_size": 64,
  "attention_probs_dropout_prob": 0.1,
  "attn_layers": [
    "local",
    "lsh",
    "local",
    "lsh",
    "local",
    "lsh"
  ],
  "axial_norm_std": 1.0,
  "axial_pos_embds": false,
  "axial_pos_embds_dim": [
    64,
    192
  ],
  "axial_pos_shape": [
    512,
    1024
  ],
  "chunk_size_lm_head": 0,
  "eos_token_id": 2,
  "feed_forward_size": 512,
  "hash_seed": null,
  "hidden_act": "relu",
  "hidden_dropout_prob": 0.05,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "is_decoder": true,
  "layer_norm_eps": 1e-12,
  "local_attention_probs_dropout_prob": 0.05,
  "local_attn_chunk_length": 64,
  "local_num_chunks_after": 0,
  "local_num_chunks_before": 1,
  "lsh_attention_probs_dropout_prob": 0.0,
  "lsh_attn_chunk_length": 64,
  "lsh_num_chunks_after": 0,
  "lsh_num_chunks_before": 1,
  "max_position_embeddings": 524288,
  "model_type": "reforme