In [19]:
control_codes = {
    'reset': 0,
    'bold': 1,
    'dim': 2,
    'italic': 3,
    'underline': 4,
    'reverse': 7,
    'black': 30,
    'red': 31,
    'green': 32,
    'yellow': 33,
    'blue': 34,
    'magenta': 35,
    'cyan': 36,
    'white': 37,
    'bg_black': 40,
    'bg_red': 41,
    'bg_green': 42,
    'bg_yellow': 43,
    'bg_blue': 44,
    'bg_magenta': 45,
    'bg_cyan': 46,
    'bg_white': 47,
}

def fmt_str(message: str, *formats: str) -> str:
    start = '\x1b[' + ';'.join([str(control_codes[i]) for i in formats]) + 'm'
    return start + message + '\x1b[0m'

def fail_str(message: str) -> str:
    return fmt_str(message, 'bold', 'red')

def pass_str(message: str) -> str:
    return fmt_str(message, 'bold', 'green')

def warn_str(message: str) -> str:
    return fmt_str(message, 'bold', 'yellow')

def info_str(message: str) -> str:
    return fmt_str(message, 'bold', 'blue')

def hint_str(message: str) -> str:
    return fmt_str(message, 'dim')


In [20]:
import torch
import transformers
import pandas as pd
import sys

df = pd.DataFrame(columns=[
    'Model',
    'LayerNorm',
    'Linear--LayerNorm',
    'Conv1D--LayerNorm',
    'Linear--LayerNorm (Ignore Dropout)',
    'Conv1D--LayerNorm (Ignore Dropout)'
])


def analyse(model_name: str):
    config = getattr(transformers, model_name + 'Config')()
    model = getattr(transformers, model_name + 'Model')(config)
    ram = sys.getsizeof(model)
    if ram > 12_884_901_888:
        print(warn_str(f'{model_name} is too big for this machine'))
        del model
        del config
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()
        raise MemoryError

    layer_info = []
    hooks = []

    def hook_pre_fn(module, input):
        try:
            input_shape = tuple(tuple(x.size()) for x in input)
        except AttributeError:
            input_shape = None

        layer_info.append(('pre', module.__class__.__name__, input_shape))

    def hook_fn(module, input, output):
        try:
            output_shape = tuple(tuple(x.size()) for x in output)
        except AttributeError:
            output_shape = None

        layer_info.append(('post', module.__class__.__name__, output_shape))

    for hook in hooks:
        hook.remove()

    for layer in model.named_modules():
        hooks.append(layer[1].register_forward_pre_hook(hook_pre_fn))
        hooks.append(layer[1].register_forward_hook(hook_fn))

    input_ids = torch.randint(0, 1000, (1, 128))
    model(input_ids)

    for hook in hooks:
        hook.remove()

    ln_cnt = 0
    linear_ln_cnt = 0
    conv1d_ln_cnt = 0
    linear_dropout_ln_cnt = 0
    conv1d_dropout_ln_cnt = 0

    indent = 0

    for i, layer in enumerate(layer_info):
        if layer[0] == 'pre':
            indent += 1
            # print('  ' * indent, '< ', layer[1], '<- ', layer[2], '>')
        elif layer[0] == 'post':
            # print('  ' * indent, '</', layer[1], ' ->', layer[2], '>')
            indent -= 1

    pure_layers = []
    for i in range(len(layer_info) - 1):
        if layer_info[i][0] == 'pre' and layer_info[i + 1][0] == 'post' and layer_info[i][1] == layer_info[i + 1][1]:
            pure_layers.append(layer_info[i][1])

    for i, layer in enumerate(pure_layers):
        if layer == 'LayerNorm':
            ln_cnt += 1
            if i - 1 > 0:
                if pure_layers[i - 1] == 'Linear':
                    linear_ln_cnt += 1
                elif i - 1 > 0 and pure_layers[i - 1] == 'Conv1D':
                    conv1d_ln_cnt += 1

    no_dropout_layers = [pure_layers[i] for i in range(len(pure_layers)) if pure_layers[i] != 'Dropout']

    for i, layer in enumerate(no_dropout_layers):
        if layer == 'LayerNorm':
            if i - 1 > 0:
                if no_dropout_layers[i - 1] == 'Linear':
                    linear_dropout_ln_cnt += 1
                elif i - 1 > 0 and no_dropout_layers[i - 1] == 'Conv1D':
                    conv1d_dropout_ln_cnt += 1

    # clean up RAM
    del model
    del config
    torch.cuda.empty_cache()
    torch.cuda.ipc_collect()

    return ln_cnt, linear_ln_cnt, linear_dropout_ln_cnt, conv1d_ln_cnt, conv1d_dropout_ln_cnt



In [21]:
with open('models.txt', 'r') as f:
    models = f.read().splitlines()


In [22]:
for model_name in models:
    print(hint_str(f'Analysing {model_name}'))
    try:
        ln_cnt, linear_ln_cnt, linear_dropout_ln_cnt, conv1d_ln_cnt, conv1d_dropout_ln_cnt = analyse(model_name)
    except Exception as e:
        print(fail_str(f'Error in {model_name}: {e}'))
        continue
    new_row = pd.DataFrame([{
        'Model': model_name,
        'LayerNorm': ln_cnt,
        'Linear--LayerNorm': linear_ln_cnt,
        'Conv1D--LayerNorm': conv1d_ln_cnt,
        'Linear--LayerNorm (Ignore Dropout)': linear_dropout_ln_cnt,
        'Conv1D--LayerNorm (Ignore Dropout)': conv1d_dropout_ln_cnt
    }])
    df = pd.concat([df, new_row], ignore_index=True)


[2mAnalysing Albert[0m
[2mAnalysing Bert[0m
[2mAnalysing DistilBert[0m
[2mAnalysing Electra[0m
[2mAnalysing GPT2[0m
[2mAnalysing GPTBigCode[0m
[2mAnalysing GPTNeo[0m
[2mAnalysing HuBert[0m
[1;31mError in HuBert: module transformers has no attribute HuBertConfig[0m
[2mAnalysing IBert[0m
[2mAnalysing ImageGPT[0m
[1;31mError in ImageGPT: index out of range in self[0m
[2mAnalysing Longformer[0m
[2mAnalysing LongT5[0m
[1;31mError in LongT5: You have to specify either decoder_input_ids or decoder_inputs_embeds[0m
[2mAnalysing MobileBert[0m
[2mAnalysing OpenAIGPT[0m
[2mAnalysing SqueezeBert[0m


In [23]:
df

Unnamed: 0,Model,LayerNorm,Linear--LayerNorm,Conv1D--LayerNorm,Linear--LayerNorm (Ignore Dropout),Conv1D--LayerNorm (Ignore Dropout)
0,Albert,25,12,0,24,0
1,Bert,25,0,0,24,0
2,DistilBert,13,6,0,12,0
3,Electra,25,0,0,24,0
4,GPT2,25,0,0,0,24
5,GPTBigCode,25,0,0,24,0
6,GPTNeo,49,0,0,48,0
7,IBert,0,0,0,0,0
8,Longformer,25,0,0,24,0
9,MobileBert,0,0,0,0,0


: 