diff --git a/questions/180_gradient_clipping/description.md b/questions/180_gradient_clipping/description.md new file mode 100644 index 00000000..1f6aa67f --- /dev/null +++ b/questions/180_gradient_clipping/description.md @@ -0,0 +1,3 @@ +## Problem + +Write a Python function `clip_gradients` that takes a numpy array of gradients and a float `max_norm`, and returns a new numpy array where the gradients are clipped so that their L2 norm does not exceed `max_norm`. If the L2 norm of the input gradients is less than or equal to `max_norm`, return the gradients unchanged. If it exceeds `max_norm`, scale all gradients so that their L2 norm equals `max_norm`. Only use standard Python and numpy. The returned array should be of type float and have the same shape as the input. diff --git a/questions/180_gradient_clipping/example.json b/questions/180_gradient_clipping/example.json new file mode 100644 index 00000000..88cdd989 --- /dev/null +++ b/questions/180_gradient_clipping/example.json @@ -0,0 +1,5 @@ +{ + "input": "import numpy as np\ngradients = np.array([3.0, 4.0])\nmax_norm = 5.0\nclipped = clip_gradients(gradients, max_norm)\nprint(clipped)", + "output": "[3. 4.]", + "reasoning": "The L2 norm of [3.0, 4.0] is 5.0, which is equal to max_norm, so the gradients are returned unchanged." +} diff --git a/questions/180_gradient_clipping/learn.md b/questions/180_gradient_clipping/learn.md new file mode 100644 index 00000000..2afb4fe7 --- /dev/null +++ b/questions/180_gradient_clipping/learn.md @@ -0,0 +1,41 @@ +# **Gradient Clipping** + +## **1. Definition** +Gradient clipping is a technique used in machine learning to prevent the gradients from becoming too large during training, which can destabilize the learning process. It is especially important in training deep neural networks, where gradients can sometimes explode to very large values (the "exploding gradients" problem). + +**Gradient clipping** works by scaling the gradients if their norm exceeds a specified threshold (max_norm). The most common form is L2-norm clipping, where the entire gradient vector is rescaled so that its L2 norm is at most `max_norm`. + +## **2. Why Use Gradient Clipping?** +* **Stabilizes Training:** Prevents the optimizer from making excessively large updates, which can cause the loss to diverge or become NaN. +* **Enables Deeper Networks:** Makes it feasible to train deeper or recurrent neural networks, where exploding gradients are more likely. +* **Improves Convergence:** Helps the model converge more reliably by keeping updates within a reasonable range. + +## **3. Gradient Clipping Mechanism** +Given a gradient vector $g$ and a maximum norm $M$ (max_norm), the clipped gradient $g'$ is computed as: + +$$ +\text{if } \|g\|_2 \leq M: \\ +\quad g' = g \\ +\text{else:} \\ +\quad g' = g \times \frac{M}{\|g\|_2} +$$ + +Where: +* $g$: The original gradient vector (numpy array) +* $M$: The maximum allowed L2 norm (max_norm) +* $\|g\|_2$: The L2 norm of $g$ +* $g'$: The clipped gradient vector + +**Example:** +If $g = [6, 8]$ and $M = 5$: +* $\|g\|_2 = \sqrt{6^2 + 8^2} = 10$ +* Since $10 > 5$, we scale $g$ by $5/10 = 0.5$, so $g' = [3, 4]$ + +## **4. Applications of Gradient Clipping** +Gradient clipping is widely used in training: +* **Recurrent Neural Networks (RNNs):** To prevent exploding gradients in long sequences. +* **Deep Neural Networks:** For stable training of very deep architectures. +* **Reinforcement Learning:** Where gradients can be highly variable. +* **Any optimization problem** where gradient explosion is a risk. + +Gradient clipping is a simple yet powerful tool to ensure stable and effective training in modern machine learning workflows. diff --git a/questions/180_gradient_clipping/meta.json b/questions/180_gradient_clipping/meta.json new file mode 100644 index 00000000..f4ca3b27 --- /dev/null +++ b/questions/180_gradient_clipping/meta.json @@ -0,0 +1,15 @@ +{ + "id": "180", + "title": "Gradient Clipping (L2 Norm)", + "difficulty": "easy", + "category": "Machine Learning", + "video": "", + "likes": "0", + "dislikes": "0", + "contributor": [ + { + "profile_link": "https://github.com/komaksym", + "name": "komaksym" + } + ] +} diff --git a/questions/180_gradient_clipping/pytorch/solution.py b/questions/180_gradient_clipping/pytorch/solution.py new file mode 100644 index 00000000..9b74bcbd --- /dev/null +++ b/questions/180_gradient_clipping/pytorch/solution.py @@ -0,0 +1,2 @@ +def your_function(...): + ... diff --git a/questions/180_gradient_clipping/pytorch/starter_code.py b/questions/180_gradient_clipping/pytorch/starter_code.py new file mode 100644 index 00000000..d3e5beb5 --- /dev/null +++ b/questions/180_gradient_clipping/pytorch/starter_code.py @@ -0,0 +1,2 @@ +def your_function(...): + pass diff --git a/questions/180_gradient_clipping/pytorch/tests.json b/questions/180_gradient_clipping/pytorch/tests.json new file mode 100644 index 00000000..e4e4b180 --- /dev/null +++ b/questions/180_gradient_clipping/pytorch/tests.json @@ -0,0 +1,6 @@ +[ + { + "test": "print(your_function(...))", + "expected_output": "..." + } +] diff --git a/questions/180_gradient_clipping/solution.py b/questions/180_gradient_clipping/solution.py new file mode 100644 index 00000000..9a588833 --- /dev/null +++ b/questions/180_gradient_clipping/solution.py @@ -0,0 +1,21 @@ +import numpy as np + + +def clip_gradients(gradients, max_norm): + """ + Clips the gradients so that their L2 norm does not exceed max_norm. + If the L2 norm is less than or equal to max_norm, returns the gradients unchanged. + Otherwise, scales the gradients so that their L2 norm equals max_norm. + + Args: + gradients (np.ndarray): The input gradients (any shape). + max_norm (float): The maximum allowed L2 norm. + + Returns: + np.ndarray: The clipped gradients, same shape as input. + """ + norm = np.linalg.norm(gradients) + if norm <= max_norm or norm == 0: + return gradients.astype(float) + else: + return (gradients * (max_norm / norm)).astype(float) diff --git a/questions/180_gradient_clipping/starter_code.py b/questions/180_gradient_clipping/starter_code.py new file mode 100644 index 00000000..8da6391a --- /dev/null +++ b/questions/180_gradient_clipping/starter_code.py @@ -0,0 +1,18 @@ +import numpy as np + + +# Implement your function below. +def clip_gradients(gradients, max_norm): + """ + Clips the gradients so that their L2 norm does not exceed max_norm. + If the L2 norm is less than or equal to max_norm, returns the gradients unchanged. + Otherwise, scales the gradients so that their L2 norm equals max_norm. + + Args: + gradients (np.ndarray): The input gradients (any shape). + max_norm (float): The maximum allowed L2 norm. + + Returns: + np.ndarray: The clipped gradients, same shape as input. + """ + pass diff --git a/questions/180_gradient_clipping/tests.json b/questions/180_gradient_clipping/tests.json new file mode 100644 index 00000000..8efd3dc8 --- /dev/null +++ b/questions/180_gradient_clipping/tests.json @@ -0,0 +1,30 @@ +[ + { + "test": "import numpy as np\ngradients = np.array([3.0, 4.0])\nmax_norm = 5.0\nprint(clip_gradients(gradients, max_norm))", + "expected_output": "[3. 4.]" + }, + { + "test": "import numpy as np\ngradients = np.array([6.0, 8.0])\nmax_norm = 5.0\nprint(clip_gradients(gradients, max_norm))", + "expected_output": "[3. 4.]" + }, + { + "test": "import numpy as np\ngradients = np.array([0.0, 0.0])\nmax_norm = 1.0\nprint(clip_gradients(gradients, max_norm))", + "expected_output": "[0. 0.]" + }, + { + "test": "import numpy as np\ngradients = np.array([1.0, 2.0, 2.0])\nmax_norm = 3.0\nprint(clip_gradients(gradients, max_norm))", + "expected_output": "[1. 2. 2.]" + }, + { + "test": "import numpy as np\ngradients = np.array([10.0, 0.0])\nmax_norm = 5.0\nprint(clip_gradients(gradients, max_norm))", + "expected_output": "[5. 0.]" + }, + { + "test": "import numpy as np\ngradients = np.array([-3.0, -4.0])\nmax_norm = 5.0\nprint(clip_gradients(gradients, max_norm))", + "expected_output": "[-3. -4.]" + }, + { + "test": "import numpy as np\ngradients = np.array([-6.0, -8.0])\nmax_norm = 5.0\nprint(clip_gradients(gradients, max_norm))", + "expected_output": "[-3. -4.]" + } +] diff --git a/questions/180_gradient_clipping/tinygrad/solution.py b/questions/180_gradient_clipping/tinygrad/solution.py new file mode 100644 index 00000000..9b74bcbd --- /dev/null +++ b/questions/180_gradient_clipping/tinygrad/solution.py @@ -0,0 +1,2 @@ +def your_function(...): + ... diff --git a/questions/180_gradient_clipping/tinygrad/starter_code.py b/questions/180_gradient_clipping/tinygrad/starter_code.py new file mode 100644 index 00000000..d3e5beb5 --- /dev/null +++ b/questions/180_gradient_clipping/tinygrad/starter_code.py @@ -0,0 +1,2 @@ +def your_function(...): + pass diff --git a/questions/180_gradient_clipping/tinygrad/tests.json b/questions/180_gradient_clipping/tinygrad/tests.json new file mode 100644 index 00000000..e4e4b180 --- /dev/null +++ b/questions/180_gradient_clipping/tinygrad/tests.json @@ -0,0 +1,6 @@ +[ + { + "test": "print(your_function(...))", + "expected_output": "..." + } +]