# Gradient Checkpoint with Blurr
> Train larger-than-memory HuggingFace models using the new GradientCheckpointing callback.

- toc: true
- branch: master
- badges: true
- comments: true
- categories: [HuggingFace, Fastai, Blurr]
- permalink: /gradient_checkpointing/

In [1]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
# export
import importlib, sys, torch
from typing import Any, Callable, Dict, List, Optional, Union, Type

from fastcore.all import *
from fastai.callback.all import *
from fastai.imports import *
from fastai.learner import *
from fastai.torch_core import *
from transformers import PreTrainedModel

In [3]:
# hide_input
import gc, pdb

import GPUtil as GPU
from IPython.display import display
from fastai.text.all import *
from fastcore.test import *
from nbdev.showdoc import show_doc

from blurr.text.modeling.all import *

ModuleNotFoundError: No module named 'jsonschema'

We'll begin

In [None]:
def clear_memory():
    torch.cuda.empty_cache()
    gc.collect()

In [None]:
def gpu_memory(device_idx=nvidia_smi_idx):
    return GPU.getGPUs()[device_idx].memoryUsed

In [None]:
path = untar_data(URLs.IMDB_SAMPLE)
model_path = Path("models")
imdb_df = pd.read_csv(path / "texts.csv")

In [None]:
learner = BlearnerForSequenceClassification.from_data(
    imdb_df, 
    "roberta-large", 
    dl_kwargs={"bs": 4}
)


In [None]:
learn.fit_one_cycle(1, lr_max=1e-3)

base_mem = gpu_memory()
print(f"{base_mem} MBs used.")

# Clear gpu memory
clear_memory()

In [None]:
learner.fit_one_cycle(1, lr_max=1e-3, cbs=[GradientCheckpointing()])

check_mem = gpu_memory()
print(f"{check_mem} MBs used.")

test_eq(base_mem > check_mem, True)