Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/lightning/fabric/plugins/precision/bitsandbytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,10 @@ def init_context(self) -> ContextManager:
" `ignore_modules` or remove the `init_module` context manager."
)
dtype_ctx = _DtypeContextManager(self.dtype)
stack = ExitStack()
stack.enter_context(dtype_ctx)
# TODO: this could also support replacing `Embedding` and `Conv1D`
context_manager = _ClassReplacementContextManager({"torch.nn.Linear": self._linear_cls})
stack = ExitStack()
stack.enter_context(dtype_ctx)
stack.enter_context(context_manager)
return stack

Expand Down
13 changes: 7 additions & 6 deletions src/lightning/fabric/plugins/precision/transformer_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,8 @@ def convert_module(self, module: torch.nn.Module) -> torch.nn.Module:
return module

def init_context(self) -> ContextManager:
dtype_ctx = _DtypeContextManager(self.dtype)
stack = ExitStack()
stack.enter_context(_DtypeContextManager(self.dtype))

if self.replace_layers:
import transformer_engine.pytorch as te

Expand All @@ -107,15 +106,17 @@ def init_context(self) -> ContextManager:
}
)
stack.enter_context(context_manager)
stack.enter_context(dtype_ctx)
return stack

def forward_context(self) -> ContextManager:
stack = ExitStack()
stack.enter_context(_DtypeContextManager(self.dtype))

dtype_ctx = _DtypeContextManager(self.dtype)
import transformer_engine.pytorch as te

stack.enter_context(te.fp8_autocast(enabled=True, fp8_recipe=self.recipe))
autocast_ctx = te.fp8_autocast(enabled=True, fp8_recipe=self.recipe)
stack = ExitStack()
stack.enter_context(dtype_ctx)
stack.enter_context(autocast_ctx)
return stack

def convert_input(self, data: Any) -> Any:
Expand Down
3 changes: 2 additions & 1 deletion src/lightning/fabric/strategies/deepspeed.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,10 +350,11 @@ def module_init_context(self, empty_init: Optional[bool] = None) -> ContextManag
raise NotImplementedError(
f"`{empty_init=}` is not a valid choice with `DeepSpeedStrategy` when ZeRO stage 3 is enabled."
)
module_sharded_ctx = self.module_sharded_context()
stack = ExitStack()
if not self.zero_stage_3:
stack.enter_context(super().module_init_context(empty_init=empty_init))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for completeness, it would probably also be better to apply it to this line right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this one matters because in the super() call, all the ctxmanagers are instantiated before any is entered

stack.enter_context(self.module_sharded_context())
stack.enter_context(module_sharded_ctx)
return stack

def module_sharded_context(self) -> ContextManager:
Expand Down
6 changes: 4 additions & 2 deletions src/lightning/fabric/strategies/fsdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,8 @@ def module_to_device(self, module: Module) -> None:
pass

def module_init_context(self, empty_init: Optional[bool] = None) -> ContextManager:
precision_init_ctx = self.precision.init_context()
module_sharded_ctx = self.module_sharded_context()
stack = ExitStack()
if _TORCH_GREATER_EQUAL_2_1 and empty_init:
# Materialization happens in `setup`. When modules get wrapped by FSDP, the sequence of operations is:
Expand All @@ -341,8 +343,8 @@ def module_init_context(self, empty_init: Optional[bool] = None) -> ContextManag
stack.enter_context(torch.device("meta"))
elif _TORCH_GREATER_EQUAL_1_13:
stack.enter_context(_EmptyInit(enabled=bool(empty_init)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why isn't it applied in other places here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

torch.device wont raise an exception but I missed this _EmptyInit

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you include it in #18734?

stack.enter_context(self.precision.init_context())
stack.enter_context(self.module_sharded_context())
stack.enter_context(precision_init_ctx)
stack.enter_context(module_sharded_ctx)
return stack

def module_sharded_context(self) -> ContextManager:
Expand Down
6 changes: 4 additions & 2 deletions src/lightning/fabric/strategies/strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,11 @@ def process_dataloader(self, dataloader: DataLoader) -> DataLoader:

def tensor_init_context(self) -> ContextManager:
"""Controls how tensors get created (device, dtype)."""
precision_init_ctx = self.precision.init_context()
stack = ExitStack()
if _TORCH_GREATER_EQUAL_2_0:
stack.enter_context(self.root_device)
stack.enter_context(self.precision.init_context())
stack.enter_context(precision_init_ctx)
return stack

def module_init_context(self, empty_init: Optional[bool] = None) -> ContextManager:
Expand All @@ -137,10 +138,11 @@ def module_init_context(self, empty_init: Optional[bool] = None) -> ContextManag
If ``None``, the strategy will decide. Some strategies may not support all options.

"""
tensor_init_ctx = self.tensor_init_context()
stack = ExitStack()
if _TORCH_GREATER_EQUAL_1_13:
stack.enter_context(_EmptyInit(enabled=bool(empty_init)))
stack.enter_context(self.tensor_init_context())
stack.enter_context(tensor_init_ctx)
return stack

def setup_module_and_optimizers(
Expand Down
6 changes: 4 additions & 2 deletions src/lightning/fabric/strategies/xla_fsdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,11 +194,13 @@ def module_to_device(self, module: Module) -> None:
pass

def module_init_context(self, empty_init: Optional[bool] = None) -> ContextManager:
precision_init_ctx = self.precision.init_context()
module_sharded_ctx = self.module_sharded_context()
stack = ExitStack()
if _TORCH_GREATER_EQUAL_1_13:
stack.enter_context(_EmptyInit(enabled=bool(empty_init)))
stack.enter_context(self.precision.init_context())
stack.enter_context(self.module_sharded_context())
stack.enter_context(precision_init_ctx)
stack.enter_context(module_sharded_ctx)
return stack

def module_sharded_context(self) -> ContextManager:
Expand Down