# Algorithm to Generate Decimal Representation
A function that takes a _base_, a vector of _digits_ and an _exponent_,
    then returns a decimal represetntation of the number.



In [18]:
function getDecimal(base :: Int64, d :: Vector{Int64}, e :: Int64)
    """
    A function that takes a base, a vector of digits and an exponent,
    then returns a decimal represetntation of the number.
    """
    num = 0
    base = float(base)
    for i = 0:length(d)-1
        num += d[i+1]*base^(-i)
    end
    return num*base^e
end

getDecimal (generic function with 1 method)

# Algorithm to Generate a Base Representation


In [19]:
function getDigits(decimalNum, base :: Int64, digits :: Int64)
    """
    A function that takes a decimal representation of a number and
    returns its representation in a specific base up to a certain
    number of digits.
    """
    base = float(base)
    e = floor(Int64,log(base,decimalNum))
    d = zeros(Int64,digits)
    num = decimalNum/(base^e)
    for j = 1:digits
        d[j] = floor(Int64,num)
        num = (num - d[j])*base
    end

    return d, e
end

decimal = 16.625

16.625

In [20]:
#Example 1
base, digits = 2, 4
d, e = getDigits(decimal, base, digits)
printstyled("EXAMPLE 1:\n", color=:red)
println("$decimal has a expansion in base $base with precision $digits \nof $d with exponent $e\n")
approx = getDecimal(base,d,e)
println("The decimal representation of $base-ary representation is $approx.\n")


[31mEXAMPLE 1:[39m
16.625 has a expansion in base 2 with precision 4 
of [1, 0, 0, 0] with exponent 4

The decimal representation of 2-ary representation is 16.0.



In [8]:
#Example 2
base, digits = 2, 8
d, e = getDigits(decimal, base, digits)
printstyled("EXAMPLE 2:\n", color=:red)
println("$decimal has a expansion in base $base with precision $digits \nof $d with exponent $e\n")
approx = getDecimal(base,d,e)
println("The decimal representation of $base-ary representation is $approx.\n")


[31mEXAMPLE 2:[39m
16.625 has a expansion in base 2 with precision 8 
of [1, 0, 0, 0, 0, 1, 0, 1] with exponent 4

The decimal representation of 2-ary representation is 16.625.



In [9]:
#Example 3
base, digits = 3, 8
d, e = getDigits(decimal, base, digits)
printstyled("EXAMPLE 3:\n", color=:red)
println("$decimal has a expansion in base $base with precision $digits \nof $d with exponent $e\n")
approx = getDecimal(base,d,e)
println("The decimal representation of $base-ary representation is $approx.\n")


[31mEXAMPLE 3:[39m
16.625 has a expansion in base 3 with precision 8 
of [1, 2, 1, 1, 2, 1, 2, 1] with exponent 2

The decimal representation of 3-ary representation is 16.62139917695473.



In [11]:
#Half Precision Values

printstyled("Nonnormalized Smallest Positive 16 Bit Float\n", color=:red)
num1 = nextfloat(Float16(0.0))
num2 = 2.0^(-24)
println("$num1 == 2.0^(-24) => $(num1 == num2)\n")

printstyled("Largest Positive 16 Bit Float\n", color=:red)
num1 = prevfloat(Inf16)
num2 = getDecimal(2, ones(Int64,11),15)
println("$num1 == getDecimal(2, ones(Int64,11),15) => $(num1 == num2)\n")

printstyled("Example representation\n", color=:red)
println("Float16(1.391) has representation\n$(bitstring(Float16(1.391)))\n")

printstyled("Note: Binary Representation\n", color=:red)
d,e = getDigits(1.391, 2, 11)
println("Digits: $d")
println("Exponent: $e")

[31mNonnormalized Smallest Positive 16 Bit Float[39m
6.0e-8 == 2.0^(-24) => true

[31mLargest Positive 16 Bit Float[39m
6.55e4 == getDecimal(2, ones(Int64,11),15) => true

[31mExample representation[39m
Float16(1.391) has representation
0011110110010000

[31mNote: Binary Representation[39m
Digits: [1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
Exponent: 0


In [12]:
#Single Precision Values

printstyled("Nonnormalized Smallest Positive 32 Bit Float\n", color=:red)
num1 = nextfloat(Float32(0.0))
num2 = 2.0^(-149)
println("$num1 == 2.0^(-149) => $(num1 == num2)\n")

printstyled("Largest Positive 32 Bit Float\n", color=:red)
num1 = prevfloat(Inf32)
num2 = getDecimal(2, ones(Int64,24),127)
println("$num1 == getDecimal(2, ones(Int64,24),127) => $(num1 == num2)\n")

[31mNonnormalized Smallest Positive 32 Bit Float[39m
1.0e-45 == 2.0^(-149) => true

[31mLargest Positive 32 Bit Float[39m
3.4028235e38 == getDecimal(2, ones(Int64,24),127) => true



In [13]:
#Floating Point Approximation Errors

#Example 1
printstyled("EXAMPLE 1:\n", color=:red)
val1 = 1f8
pert1 = 4f0
println("32 Bit: $val1 == $val1 + $pert1?
    $(val1+pert1 == val1)\n")

#Example 2
printstyled("EXAMPLE 2:\n", color=:red)
val1 = 1f3
pert1 = 4f0
pert2 = 1f-5
println("32 Bit: $val1 == $val1 + $pert1? $(val1+pert1 == val1)\n")
println("32 Bit: $val1 == $val1 + $pert2? $(val1+pert2 == val1)\n")


[31mEXAMPLE 1:[39m
32 Bit: 1.0e8 == 1.0e8 + 4.0?
    true

[31mEXAMPLE 2:[39m
32 Bit: 1000.0 == 1000.0 + 4.0? false

32 Bit: 1000.0 == 1000.0 + 1.0e-5? true



In [14]:
# Values corresponding to 1 ULP

#Example 3
printstyled("EXAMPLE 3:\n", color=:red)
val1 = 1.438e3
eps1 = eps(val1)
println("The value corresponding to 1 ULP for $val1 is $eps1 \n")


[31mEXAMPLE 3:[39m
The value corresponding to 1 ULP for 1438.0 is 2.2737367544323206e-13 



In [16]:
# Machine Precision

#Example 4
printstyled("EXAMPLE 4:\n", color=:red)
val1 = 1.0
eps1 = eps(val1)
println("The value corresponding to 64 bit machine epsilon is $eps1 \n")
val1 = 1f0
eps1 = eps(val1)
println("The value corresponding to 32 bit machine epsilon is $eps1 \n")
val1 = Float16(1.0)
eps1 = eps(val1)
println("The value corresponding to 16 bit machine epsilon is $eps1 \n")


[31mEXAMPLE 4:[39m
The value corresponding to 64 bit machine epsilon is 2.220446049250313e-16 

The value corresponding to 32 bit machine epsilon is 1.1920929e-7 

The value corresponding to 16 bit machine epsilon is 0.000977 



In [17]:
# Addition Relative Error

#Example 4
printstyled("EXAMPLE 4:\n", color=:red)
val1 = 1.1
val2 = 0.1
val3 = 1.2
println("$val1 + $val2 = $val3, but in floating point arithmetic, $val1 + $val2 = $(val1 + val2), which is a relative error of $(((val1 + val2)- val3)/val3).\n")
println("Note, twice 64 bit machine epsilon is $(2*eps(1.0))")

[31mEXAMPLE 4:[39m
1.1 + 0.1 = 1.2, but in floating point arithmetic, 1.1 + 0.1 = 1.2000000000000002, which is a relative error of 1.8503717077085943e-16.

Note, twice 64 bit machine epsilon is 4.440892098500626e-16
