Fix some typos (#1407)

* Fix some typos Signed-off-by: Alexander Seiler <seileralex@gmail.com> * Update test/features.jl Co-authored-by: Brian Chen <ToucheSir@users.noreply.github.com> --------- Signed-off-by: Alexander Seiler <seileralex@gmail.com> Co-authored-by: Brian Chen <ToucheSir@users.noreply.github.com>
FluxML · Mar 30, 2023 · 413728b · 413728b
1 parent c2a736a
commit 413728b
Show file tree

Hide file tree

Showing 8 changed files with 11 additions and 11 deletions.
diff --git a/docs/src/adjoints.md b/docs/src/adjoints.md
@@ -7,7 +7,7 @@
     To define custom sensitivities using ChainRulesCore, define a `ChainRulesCore.rrule(f, args...; kwargs...)`. Head to [ChainRules project's documentation](https://www.juliadiff.org/ChainRulesCore.jl/stable/) for more information.
     **If you are defining your custom adjoints using ChainRulesCore then you do not need to read this page**, and can consider it as documenting a legacy feature.
 
-    This page exists to descibe how Zygote works, and how adjoints can be directly defined for Zygote.
+    This page exists to describe how Zygote works, and how adjoints can be directly defined for Zygote.
     Defining adjoints this way does not make them accessible to other AD systems, but does let you do things that directly depend on how Zygote works.
     It allows for specific definitions of adjoints that are only defined for Zygote (which might work differently to more generic definitions defined for all AD).
 

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -131,7 +131,7 @@ julia> gradient(colordiff, RGB(1, 0, 0), RGB(0, 1, 0))
 
 ## Explicit and Implicit Parameters
 
-It's easy to work with even very large and complex models, and there are few ways to do this. Autograd-style models pass around a collection of weights. Depending on how you write your model, there are multiple ways to *explicity* take gradients with respect to parameters. For example, the function `linear` accepts the parameters as an argument to the model. So, we directly pass in the parameters, `θ`, as an argument to the function being differentiated.
+It's easy to work with even very large and complex models, and there are few ways to do this. Autograd-style models pass around a collection of weights. Depending on how you write your model, there are multiple ways to *explicitly* take gradients with respect to parameters. For example, the function `linear` accepts the parameters as an argument to the model. So, we directly pass in the parameters, `θ`, as an argument to the function being differentiated.
 
 ```@docs
 gradient(f, args...)

diff --git a/src/compiler/chainrules.jl b/src/compiler/chainrules.jl
@@ -56,15 +56,15 @@ function has_chain_rrule(T)
   # It can be seen that checking if it matches is the correct way to decide if we should use the rrule or not.
 
 
-  if !is_ambig && matching_cr_sig(no_rrule_m, rrule_m)  # Not ambigious, and opted-out.
+  if !is_ambig && matching_cr_sig(no_rrule_m, rrule_m)  # Not ambiguous, and opted-out.
     # Return instance for configured_rrule_m as that will be invalidated 
     # directly if configured rule added, or indirectly if unconfigured rule added
     # Do not need an edge for `no_rrule` as no addition of methods to that can cause this
     # decision to need to be revisited (only changes to `rrule`), since we are already not
     # using the rrule, so not using more rules wouldn't change anything.
     return false, configured_rrule_m.instance
   else
-    # Either is ambigious, and we should try to use it, and then error
+    # Either is ambiguous, and we should try to use it, and then error
     # or we are uses a rrule, no need to add any edges for `rrule`, as it will generate 
     # code with natural edges if a new method is defined there.
     # We also do not need an edge to `no_rrule`, as any time a method is added to `no_rrule`
@@ -78,7 +78,7 @@ matching_cr_sig(t, s) = matching_cr_sig(t.method.sig, s.method.sig)
 matching_cr_sig(::DataType, ::UnionAll) = false
 matching_cr_sig(::UnionAll, ::DataType) = false
 matching_cr_sig(t::Type, s::Type) = type_tuple_tail(t) == type_tuple_tail(s)
-matching_cr_sig(::Any, ::Nothing) = false  # ambigious https://github.com/FluxML/Zygote.jl/issues/1234
+matching_cr_sig(::Any, ::Nothing) = false  # ambiguous https://github.com/FluxML/Zygote.jl/issues/1234
 
 type_tuple_tail(d::DataType) = Tuple{d.parameters[2:end]...}
 function type_tuple_tail(d::UnionAll)

diff --git a/src/compiler/interface.jl b/src/compiler/interface.jl
@@ -168,7 +168,7 @@ gradient
     Params([A, B])
 
 Container for implicit parameters, used when differentiating
-a zero-argument funtion `() -> loss(A, B)` with respect to `A, B`.
+a zero-argument function `() -> loss(A, B)` with respect to `A, B`.
 """
 struct Params{B <: Buffer}
   order::B

diff --git a/src/lib/array.jl b/src/lib/array.jl
@@ -608,7 +608,7 @@ end
 # ChainRules has this also but does not use FillArrays, so we have our own definition
 # for improved performance. See https://github.com/JuliaDiff/ChainRules.jl/issues/46
 Zygote.@adjoint function LinearAlgebra.tr(x::AbstractMatrix)
-  # x is a squre matrix checked by tr,
+  # x is a square matrix checked by tr,
   # so we could just use Eye(size(x, 1))
   # to create a Diagonal
   tr(x), function (Δ::Number)

diff --git a/src/lib/broadcast.jl b/src/lib/broadcast.jl
@@ -369,7 +369,7 @@ using GPUArraysCore  # replaces @require CUDA block, weird indenting to preserve
     sum(xs, dims = dims), Δ -> (placeholder .= Δ,)
   end
 
-  # Make sure sum(f, ::CuArray) uses broadcase through forward-mode defined above
+  # Make sure sum(f, ::CuArray) uses broadcast through forward-mode defined above
   # Not the ChainRules.rrule which will use the Zygote.Context and thus not be GPU compatible
   function _pullback(cx::AContext, ::typeof(sum), f, xs::AbstractGPUArray)
     res, back = _pullback(cx, (f, xs) -> sum(f.(xs)), f, xs)

diff --git a/src/lib/grad.jl b/src/lib/grad.jl
@@ -11,7 +11,7 @@ Use gradient checkpointing on the call `f(xs...)`. This means that
 `checkpointed(f, xs...) === f(xs...)`, but when computing the derivative
 intermediate results from the forward pass of `f` will not be stored. Instead the forward
 pass will be repeated, when computing the derivative.
-This saves memory at the cost of increasing exectution time.
+This saves memory at the cost of increasing execution time.
 
 !!! warning
     If `f` is not a pure function, `checkpointed` will likely give wrong results.

diff --git a/test/features.jl b/test/features.jl
@@ -322,7 +322,7 @@ end[1] == 5
 
 @test gradient(x -> one(eltype(x)), rand(10))[1] === nothing
 
-# Thre-way control flow merge
+# Three-way control flow merge
 @test gradient(1) do x
   if x > 0
     x *= 2
@@ -486,7 +486,7 @@ end
   @test gradient(x -> (getindex.(x).^2)[1], Ref.(1:3))[1][1] == (x=2.0,)  # rest are (x = 0.0,), but nothing would be OK too
   @test gradient(x -> (prod.(getindex.(x)))[1], Ref.(eachcol([1 2; 3 4])))[1][1] == (x = [3.0, 1.0],)
 
-  # Broadcasting over Ref is handled specially. Tested elsehwere too.
+  # Broadcasting over Ref is handled specially. Tested elsewhere too.
   @test gradient(x -> sum(sum, x .* [1,2,3]), Ref([4,5])) == ((x = [6.0, 6.0],),)
   @test gradient(x -> sum(sum, Ref(x) .* [1,2,3]), [4,5]) == ([6.0, 6.0],)
 end