## Model Loading
### Memory & Time

In [15]:
%%time
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
def mem_info(func):
    def wrapper(*args, **kw):
        pre = torch.cuda.mem_get_info()[0] / (10**9)
        func(*args, **kw)
        post = torch.cuda.mem_get_info()[0] / (10**9)
        print("Memory used:", pre - post)
    return wrapper

models = {"opt-125m": "facebook/opt-125m",
          "opt-350m": "facebook/opt-350m",
          "opt-1.3b": "facebook/opt-1.3B",
          "opt-2.7b": "facebook/opt-2.7B",
          "opt-6.7b": "facebook/opt-6.7B"}

@mem_info
def load_model(model_dict, model):
    %time model_dict[model] = AutoModelForCausalLM.from_pretrained(models[model], ).cuda()

CPU times: user 1.12 s, sys: 321 ms, total: 1.45 s
Wall time: 33.5 s


In [16]:
loaded = {}
# load_model(loaded, "opt-125m")

In [None]:
load_model(loaded, "opt-350m")

In [4]:
load_model(loaded, "opt-1.3b")

CPU times: user 7.67 s, sys: 2.26 s, total: 9.92 s
Wall time: 8.66 s
Memory used: 5.268045823999998


In [18]:
load_model(loaded, "opt-2.7b")

CPU times: user 21.1 s, sys: 7.68 s, total: 28.8 s
Wall time: 25.4 s
Memory used: 10.745806848


In [None]:
load_model(loaded, "opt-6.7b")

In [33]:
torch.cuda.empty_cache()

In [34]:
torch.cuda.mem_get_info()[0] / (10**9)

6.653673472

## Individual vs Batch Encoding

In [16]:
%%time
print(torch.cuda.mem_get_info()[0] / (10**9))
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m", use_fast=False, padding_side='left',)
print(torch.cuda.mem_get_info()[0] / (10**9))

24.79194112
24.79194112
CPU times: user 77.4 ms, sys: 13 ms, total: 90.4 ms
Wall time: 2.25 s


In [6]:
model = loaded["opt-350m"]
sentence1 = "I like cheese"
sentence2 = "I really like cheese. I also really like sandwiches."
sentences = [sentence1, sentence2 ] * 100

In [7]:
%%time
print(torch.cuda.mem_get_info()[0] / (10**9))
batch_tokens = tokenizer(sentences, padding=True, return_tensors="pt").to('cuda:0')
print(torch.cuda.mem_get_info()[0] / (10**9))

20.485308416
20.485308416
CPU times: user 14.2 ms, sys: 1.15 ms, total: 15.4 ms
Wall time: 14.5 ms


In [8]:
%%time
print(torch.cuda.mem_get_info()[0] / (10**9))
outputs = model.generate(
    batch_tokens['input_ids'],
    max_new_tokens=5,
    temperature=0,
    return_dict_in_generate=True,
    output_scores=True,
    eos_token_id=198,  # special character 'ċ' (bytecode for new line?) NOTE use this for generation
)
print(torch.cuda.mem_get_info()[0] / (10**9))

20.485308416
9.116647424
CPU times: user 1.86 s, sys: 1.4 s, total: 3.26 s
Wall time: 3.27 s


In [16]:
%%time
print(torch.cuda.mem_get_info()[0] / (10**9))
tokens = [tokenizer(x, return_tensors="pt").to('cuda:0') for x in sentences]
print(torch.cuda.mem_get_info()[0] / (10**9))

8.802074624
8.802074624
CPU times: user 32.1 ms, sys: 0 ns, total: 32.1 ms
Wall time: 31.6 ms


## Individual vs Batch Inference

In [10]:
%%time
print(torch.cuda.mem_get_info()[0] / (10**9))
looped = []
for x in tokens:
    looped.append(model.generate(
        x['input_ids'],
        max_new_tokens=5,
        temperature=0,
        return_dict_in_generate=True,
        output_scores=True,
        eos_token_id=198,  # special character 'ċ' (bytecode for new line?) NOTE use this for generation
    ))
print(torch.cuda.mem_get_info()[0] / (10**9))

9.116647424
tensor([[   2,  100,  101, 7134]], device='cuda:0')
tensor([[    2,   100,   269,   101,  7134,     4,    38,    67,   269,   101,
         19072,     4]], device='cuda:0')
tensor([[   2,  100,  101, 7134]], device='cuda:0')
tensor([[    2,   100,   269,   101,  7134,     4,    38,    67,   269,   101,
         19072,     4]], device='cuda:0')
tensor([[   2,  100,  101, 7134]], device='cuda:0')
tensor([[    2,   100,   269,   101,  7134,     4,    38,    67,   269,   101,
         19072,     4]], device='cuda:0')
tensor([[   2,  100,  101, 7134]], device='cuda:0')
tensor([[    2,   100,   269,   101,  7134,     4,    38,    67,   269,   101,
         19072,     4]], device='cuda:0')
tensor([[   2,  100,  101, 7134]], device='cuda:0')
tensor([[    2,   100,   269,   101,  7134,     4,    38,    67,   269,   101,
         19072,     4]], device='cuda:0')
tensor([[   2,  100,  101, 7134]], device='cuda:0')
tensor([[    2,   100,   269,   101,  7134,     4,    38,    67,   269,

tensor([[    2,   100,   269,   101,  7134,     4,    38,    67,   269,   101,
         19072,     4]], device='cuda:0')
tensor([[   2,  100,  101, 7134]], device='cuda:0')
tensor([[    2,   100,   269,   101,  7134,     4,    38,    67,   269,   101,
         19072,     4]], device='cuda:0')
tensor([[   2,  100,  101, 7134]], device='cuda:0')
tensor([[    2,   100,   269,   101,  7134,     4,    38,    67,   269,   101,
         19072,     4]], device='cuda:0')
tensor([[   2,  100,  101, 7134]], device='cuda:0')
tensor([[    2,   100,   269,   101,  7134,     4,    38,    67,   269,   101,
         19072,     4]], device='cuda:0')
tensor([[   2,  100,  101, 7134]], device='cuda:0')
tensor([[    2,   100,   269,   101,  7134,     4,    38,    67,   269,   101,
         19072,     4]], device='cuda:0')
tensor([[   2,  100,  101, 7134]], device='cuda:0')
tensor([[    2,   100,   269,   101,  7134,     4,    38,    67,   269,   101,
         19072,     4]], device='cuda:0')
tensor([[   2,

tensor([[   2,  100,  101, 7134]], device='cuda:0')
tensor([[    2,   100,   269,   101,  7134,     4,    38,    67,   269,   101,
         19072,     4]], device='cuda:0')
tensor([[   2,  100,  101, 7134]], device='cuda:0')
tensor([[    2,   100,   269,   101,  7134,     4,    38,    67,   269,   101,
         19072,     4]], device='cuda:0')
tensor([[   2,  100,  101, 7134]], device='cuda:0')
tensor([[    2,   100,   269,   101,  7134,     4,    38,    67,   269,   101,
         19072,     4]], device='cuda:0')
tensor([[   2,  100,  101, 7134]], device='cuda:0')
tensor([[    2,   100,   269,   101,  7134,     4,    38,    67,   269,   101,
         19072,     4]], device='cuda:0')
tensor([[   2,  100,  101, 7134]], device='cuda:0')
tensor([[    2,   100,   269,   101,  7134,     4,    38,    67,   269,   101,
         19072,     4]], device='cuda:0')
8.802074624
CPU times: user 33.9 s, sys: 15.7 s, total: 49.5 s
Wall time: 49.4 s


In [11]:
outputs.sequences[:]

tensor([[   1,    1,    1,  ...,  100,  101, 7134],
        [   2,  100,  269,  ...,  101, 7134,  350],
        [   1,    1,    1,  ...,  100,  101, 7134],
        ...,
        [   2,  100,  269,  ...,  101, 7134,  350],
        [   1,    1,    1,  ...,  100,  101, 7134],
        [   2,  100,  269,  ...,  101, 7134,  350]], device='cuda:0')

In [12]:
%%time
print(torch.cuda.mem_get_info()[0] / (10**9))
z = tokenizer.batch_decode(outputs.sequences[:])
print(torch.cuda.mem_get_info()[0] / (10**9))
print(z)

8.802074624
8.802074624
['<pad><pad><pad><pad><pad><pad><pad><pad></s>I like cheese.\nI like cheese', '</s>I really like cheese. I also really like sandwiches.\nI like cheese too', '<pad><pad><pad><pad><pad><pad><pad><pad></s>I like cheese.\nI like cheese', '</s>I really like cheese. I also really like sandwiches.\nI like cheese too', '<pad><pad><pad><pad><pad><pad><pad><pad></s>I like cheese.\nI like cheese', '</s>I really like cheese. I also really like sandwiches.\nI like cheese too', '<pad><pad><pad><pad><pad><pad><pad><pad></s>I like cheese.\nI like cheese', '</s>I really like cheese. I also really like sandwiches.\nI like cheese too', '<pad><pad><pad><pad><pad><pad><pad><pad></s>I like cheese.\nI like cheese', '</s>I really like cheese. I also really like sandwiches.\nI like cheese too', '<pad><pad><pad><pad><pad><pad><pad><pad></s>I like cheese.\nI like cheese', '</s>I really like cheese. I also really like sandwiches.\nI like cheese too', '<pad><pad><pad><pad><pad><pad><pad><pa

In [13]:
%%time
print(torch.cuda.mem_get_info()[0] / (10**9))
y = []
for x in looped:
    y.append(tokenizer.decode(x.sequences[0]))
print(torch.cuda.mem_get_info()[0] / (10**9))
print(y)

8.802074624
8.802074624
['</s>I like cheese.\nI like cheese', '</s>I really like cheese. I also really like sandwiches.\nI like cheese too', '</s>I like cheese.\nI like cheese', '</s>I really like cheese. I also really like sandwiches.\nI like cheese too', '</s>I like cheese.\nI like cheese', '</s>I really like cheese. I also really like sandwiches.\nI like cheese too', '</s>I like cheese.\nI like cheese', '</s>I really like cheese. I also really like sandwiches.\nI like cheese too', '</s>I like cheese.\nI like cheese', '</s>I really like cheese. I also really like sandwiches.\nI like cheese too', '</s>I like cheese.\nI like cheese', '</s>I really like cheese. I also really like sandwiches.\nI like cheese too', '</s>I like cheese.\nI like cheese', '</s>I really like cheese. I also really like sandwiches.\nI like cheese too', '</s>I like cheese.\nI like cheese', '</s>I really like cheese. I also really like sandwiches.\nI like cheese too', '</s>I like cheese.\nI like cheese', '</s>I rea

In [85]:
torch.tensor(torch.cuda.mem_get_info()) / 10**9

tensor([46.1775, 47.8508])

In [29]:
batch_tokens['input_ids']

tensor([[    1,     1,     1,  ...,   100,   101,  7134],
        [    2,   100,   269,  ...,   101, 19072,     4],
        [    1,     1,     1,  ...,   100,   101,  7134],
        ...,
        [    2,   100,   269,  ...,   101, 19072,     4],
        [    1,     1,     1,  ...,   100,   101,  7134],
        [    2,   100,   269,  ...,   101, 19072,     4]], device='cuda:0')

In [34]:
tokenizer.batch_decode(outputs.sequences[:, -5:])

['.\nI like cheese',
 '\nI like cheese too',
 '.\nI like cheese',
 '\nI like cheese too',
 '.\nI like cheese',
 '\nI like cheese too',
 '.\nI like cheese',
 '\nI like cheese too',
 '.\nI like cheese',
 '\nI like cheese too',
 '.\nI like cheese',
 '\nI like cheese too',
 '.\nI like cheese',
 '\nI like cheese too',
 '.\nI like cheese',
 '\nI like cheese too',
 '.\nI like cheese',
 '\nI like cheese too',
 '.\nI like cheese',
 '\nI like cheese too',
 '.\nI like cheese',
 '\nI like cheese too',
 '.\nI like cheese',
 '\nI like cheese too',
 '.\nI like cheese',
 '\nI like cheese too',
 '.\nI like cheese',
 '\nI like cheese too',
 '.\nI like cheese',
 '\nI like cheese too',
 '.\nI like cheese',
 '\nI like cheese too',
 '.\nI like cheese',
 '\nI like cheese too',
 '.\nI like cheese',
 '\nI like cheese too',
 '.\nI like cheese',
 '\nI like cheese too',
 '.\nI like cheese',
 '\nI like cheese too',
 '.\nI like cheese',
 '\nI like cheese too',
 '.\nI like cheese',
 '\nI like cheese too',
 '.\nI lik

## Testing

In [1]:
import os
import json
import torch
import sys
import gc
import time

def read_json(filepath: str) -> dict:
    with open(filepath, "r") as f:
        return json.load(f)
    
def read_jsonl(filepath: str) -> dict:
    data = []
    with open(filepath, "r") as f:
        for line in f.readlines():
            example = json.loads(line)
            data.append(example)
    return data

def format_example(example : dict, dataset: str, includeLabel=False) -> str:
    superGLUE = ['BoolQ', 'COPA', 'RTE', 'WiC', 'WSC']
    assert dataset in superGLUE

    templates = {
        'BoolQ' : lambda ex: f"Passage: {ex['passage']}\nQuestion: {ex['question']}\nAnswer:",
        'COPA' : lambda ex: f"Premise: {ex['premise']}\nQuestion: What's the {ex['question']} of this?\nAlternative 1: {ex['choice1']}\nAlternative 2: {ex['choice2']}\nCorrect Alternative:",
        'WSC' : lambda ex: f"Text: {ex['text']}\nQuestion: Does {ex['target']['span2_text']} refer to {ex['target']['span1_text']}?\nAnswer:", 
        'WiC' : lambda ex: f"Context 1: {ex['sentence1']}\nContext 2: {ex['sentence2']}\nWord: {ex['word']}\nSense Match:", 
        'RTE' : lambda ex: f"Text: {ex['premise']}\nHypothesis: {ex['hypothesis']}\nEntailment:"
    }
    

    text = templates[dataset](example)

    if includeLabel:
        if dataset == 'COPA':
            text += f"{example['label'] + 1}\n" #NOTE: COPA labeling is weird
        else:
            text += f"{example['label']}\n"

    return text

def create_model_input_text(
    test_example, train_examples, dataset, tokenizer
):
    #TODO: worry about input len at some point
    demonstrations = ''.join(list(map(lambda x: format_example(x, dataset, includeLabel=True), train_examples)))
    test_input = format_example(test_example, dataset)

    input_text = demonstrations + test_input

    return input_text

def mem_info():
    print("Free(GB):", torch.cuda.mem_get_info()[0] / (10**9))

In [2]:
mem_info()
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m", use_fast=False, padding_side='left',)
model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b").cuda()
mem_info()

Free(GB): 47.12333312
Free(GB): 36.377526272


In [3]:
exp = os.path.join("..", "old_experiments", "BoolQ", "id_1_train_16_test_64")
train = read_jsonl(os.path.join(exp, "train.jsonl"))
train = {x['idx']: x for x in train}
test = read_jsonl(os.path.join(exp, "test.jsonl"))
test = {x['idx']: x for x in test}
prompt_map = read_json(os.path.join(exp, "generations", "similar_2_64_all-roberta-large-v1","prompt_map.json"))

In [4]:
print(torch.cuda.memory_summary())

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |   10243 MB |   10243 MB |   10243 MB |       0 B  |
|       from large pool |   10238 MB |   10238 MB |   10238 MB |       0 B  |
|       from small pool |       4 MB |       4 MB |       4 MB |       0 B  |
|---------------------------------------------------------------------------|
| Active memory         |   10243 MB |   10243 MB |   10243 MB |       0 B  |
|       from large pool |   10238 MB |   10238 MB |   10238 MB |       0 B  |
|       from small pool |       4 MB |       4 MB |       4 MB |       0 B  |
|---------------------------------------------------------------

In [7]:
%%time
for k in range(64):
    print("Example", k)
    pre = torch.cuda.mem_get_info()[0] / (10**9)
    outputs['22'].append(model.generate(
        batch_tokens['22']['input_ids'][k][None, :],
        max_new_tokens=5,
        temperature=0,
        return_dict_in_generate=True,
        output_scores=True,
        eos_token_id=198,  # special character 'ċ' (bytecode for new line?) NOTE use this for generation
    ))
    post = torch.cuda.mem_get_info()[0] / (10**9)
    print("Used: ", pre - post, "\n")

Example 0
Used:  1.0003415039999979 

Example 1
Used:  0.11324620799999963 

Example 2
Used:  0.11114905600000213 

Example 3
Used:  0.11114905600000213 

Example 4
Used:  0.11324620799999963 

Example 5
Used:  0.11114905600000213 

Example 6
Used:  0.11324620799999963 

Example 7
Used:  0.11114905599999503 

Example 8
Used:  0.11114905600000213 

Example 9
Used:  0.11324620799999963 

Example 10
Used:  0.11114905600000213 

Example 11
Used:  0.11324620799999963 

Example 12
Used:  0.11114905599999503 

Example 13
Used:  0.11114905600000213 

Example 14
Used:  0.11324620799999963 

Example 15
Used:  0.11114905600000213 

Example 16
Used:  0.11324620799999963 

Example 17
Used:  0.11114905600000213 

Example 18
Used:  0.11114905599999503 

Example 19
Used:  0.11324620799999963 

Example 20
Used:  0.11114905600000213 

Example 21
Used:  0.11324620799999963 

Example 22
Used:  0.11114905600000213 

Example 23
Used:  0.11114905600000213 

Example 24
Used:  0.11324620799999963 

Example 25


In [7]:
for k, v in list(prompt_map.items())[:1]:
    print(k)
    t = [tuple(x) for x in v]
    print(t)
    

22
[(2342, 2342), (2342, 3554), (2342, 6094), (2342, 7165), (2342, 9199), (2342, 453), (2342, 6114), (2342, 7378), (3554, 2342), (3554, 3554), (3554, 6094), (3554, 7165), (3554, 9199), (3554, 453), (3554, 6114), (3554, 7378), (6094, 2342), (6094, 3554), (6094, 6094), (6094, 7165), (6094, 9199), (6094, 453), (6094, 6114), (6094, 7378), (7165, 2342), (7165, 3554), (7165, 6094), (7165, 7165), (7165, 9199), (7165, 453), (7165, 6114), (7165, 7378), (9199, 2342), (9199, 3554), (9199, 6094), (9199, 7165), (9199, 9199), (9199, 453), (9199, 6114), (9199, 7378), (453, 2342), (453, 3554), (453, 6094), (453, 7165), (453, 9199), (453, 453), (453, 6114), (453, 7378), (6114, 2342), (6114, 3554), (6114, 6094), (6114, 7165), (6114, 9199), (6114, 453), (6114, 6114), (6114, 7378), (7378, 2342), (7378, 3554), (7378, 6094), (7378, 7165), (7378, 9199), (7378, 453), (7378, 6114), (7378, 7378)]


In [None]:
%%time
max_generated_len = 3
results = {}
mem_info()
for test_id, prompt_ids in prompt_map.items(): 
    print("Example", test_id)
    
    train_ids = [tuple(ids) for ids in prompt_ids] 
    preds = []
    
    test_text = test[int(test_id)]
    input_text = [create_model_input_text(test_text, [train[int(z)] for z in a], 'BoolQ', tokenizer) for a in prompt_ids]
    batch_tokens = tokenizer(input_text, padding=True, return_tensors="pt").to('cuda:0')

    batch_size = 16
    num_batches = int(batch_tokens['input_ids'].shape[0] / batch_size)
    for batch in batch_tokens['input_ids'].chunk(num_batches):
        outputs = model.generate(
            batch,
            max_new_tokens=max_generated_len,
            temperature=0,
            return_dict_in_generate=True,
            output_scores=True,
            eos_token_id=198,  # special character 'ċ' (bytecode for new line?) NOTE use this for generation
        )

        preds.extend(tokenizer.batch_decode(outputs.sequences[:, -max_generated_len:]))

        mem_info()
    
    results[test_id] = {x[0]: x[1] for x in zip(train_ids, preds)}
    print(results[test_id])

Free(GB): 22.368550912
Example 22
Free(GB): 12.98169856
Free(GB): 11.207507968
Free(GB): 11.207507968
Free(GB): 11.207507968
{(2342, 2342): 'False\nPass', (2342, 3554): 'False\nPass', (2342, 6094): 'False\nPass', (2342, 7165): 'False\nPass', (2342, 9199): 'False\nPass', (2342, 453): 'False\nPass', (2342, 6114): 'False\nPass', (2342, 7378): 'False\nPass', (3554, 2342): 'True\nPass', (3554, 3554): 'True\nPass', (3554, 6094): 'False\nPass', (3554, 7165): 'True\nPass', (3554, 9199): 'True\nPass', (3554, 453): 'True\nPass', (3554, 6114): 'True\nPass', (3554, 7378): 'False\nPass', (6094, 2342): 'False\nPass', (6094, 3554): 'False\nPass', (6094, 6094): 'False\nPass', (6094, 7165): 'False\nPass', (6094, 9199): 'False\nPass', (6094, 453): 'False\nPass', (6094, 6114): 'False\nPass', (6094, 7378): 'False\nPass', (7165, 2342): 'False\nPass', (7165, 3554): 'True\nPass', (7165, 6094): 'False\nPass', (7165, 7165): ' True\nPass', (7165, 9199): 'False\nPass', (7165, 453): 'False\nPass', (7165, 6114): '

Free(GB): 1.497694208
Free(GB): 1.497694208
Free(GB): 1.497694208
Free(GB): 1.497694208
{(453, 453): 'False\nPass', (453, 69): 'False\nPass', (453, 9199): 'False\nPass', (453, 2342): 'False\nPass', (453, 3554): 'False\nPass', (453, 6114): 'False\nPass', (453, 6817): 'False\nPass', (453, 5710): 'True\nPass', (69, 453): 'True\nPass', (69, 69): 'True\nPass', (69, 9199): 'True\nPass', (69, 2342): 'True\nPass', (69, 3554): 'True\nPass', (69, 6114): 'True\nPass', (69, 6817): 'True\nPass', (69, 5710): 'True\nPass', (9199, 453): 'False\nPass', (9199, 69): 'True\nPass', (9199, 9199): 'False\nPass', (9199, 2342): 'False\nPass', (9199, 3554): 'False\nPass', (9199, 6114): 'False\nPass', (9199, 6817): 'True\nPass', (9199, 5710): 'True\nPass', (2342, 453): 'False\nPass', (2342, 69): 'True\nPass', (2342, 9199): 'False\nPass', (2342, 2342): 'False\nPass', (2342, 3554): 'True\nPass', (2342, 6114): 'False\nPass', (2342, 6817): 'True\nPass', (2342, 5710): 'True\nPass', (3554, 453): 'True\nPass', (3554, 6

In [16]:
print(torch.cuda.memory_summary())

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 1            |        cudaMalloc retries: 2         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |   36388 MB |   43081 MB |    2989 GB |    2954 GB |
|       from large pool |   36380 MB |   43073 MB |    2968 GB |    2933 GB |
|       from small pool |       8 MB |      10 MB |      21 GB |      21 GB |
|---------------------------------------------------------------------------|
| Active memory         |   36388 MB |   43081 MB |    2989 GB |    2954 GB |
|       from large pool |   36380 MB |   43073 MB |    2968 GB |    2933 GB |
|       from small pool |       8 MB |      10 MB |      21 GB |      21 GB |
|---------------------------------------------------------------

In [20]:
outputs

NameError: name 'outputs' is not defined

In [18]:
print(torch.cuda.mem_get_info()[0] / (10**9))
gc.collect()
print(torch.cuda.mem_get_info()[0] / (10**9))
torch.cuda.empty_cache()
print(torch.cuda.mem_get_info()[0] / (10**9))

7.432634368
7.432634368
7.432634368


In [19]:
i = 0
others = 0
parameters = 0
for obj in gc.get_objects():
    try:
        if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)):
            print(type(obj), obj.size(), obj.get_device(), obj.storage().nbytes() / 10 **9)
            if isinstance(obj, torch.nn.parameter.Parameter):
                parameters+= obj.storage().nbytes()
            else:
                others+= obj.storage().nbytes()
            i+=1
    except:
        pass
print("Tensors", i)
print("Parameters(GB)", parameters / 10 **9)
print("Others(GB)", others / 10 **9)

<class 'torch.nn.parameter.Parameter'> torch.Size([50272, 4096]) 0 0.823656448
<class 'torch.nn.parameter.Parameter'> torch.Size([2050, 4096]) 0 0.0335872
<class 'torch.nn.parameter.Parameter'> torch.Size([4096]) 0 1.6384e-05
<class 'torch.nn.parameter.Parameter'> torch.Size([4096]) 0 1.6384e-05
<class 'torch.nn.parameter.Parameter'> torch.Size([4096]) 0 1.6384e-05
<class 'torch.nn.parameter.Parameter'> torch.Size([4096]) 0 1.6384e-05
<class 'torch.nn.parameter.Parameter'> torch.Size([16384, 4096]) 0 0.268435456
<class 'torch.nn.parameter.Parameter'> torch.Size([16384]) 0 6.5536e-05
<class 'torch.nn.parameter.Parameter'> torch.Size([4096, 16384]) 0 0.268435456
<class 'torch.nn.parameter.Parameter'> torch.Size([4096]) 0 1.6384e-05
<class 'torch.nn.parameter.Parameter'> torch.Size([4096]) 0 1.6384e-05
<class 'torch.nn.parameter.Parameter'> torch.Size([4096]) 0 1.6384e-05
<class 'torch.nn.parameter.Parameter'> torch.Size([4096]) 0 1.6384e-05
<class 'torch.nn.parameter.Parameter'> torch.Si

