In [None]:
# script source --> https://colab.research.google.com/github/ageron/julia_notebooks/blob/master/Julia_Colab_Notebook_Template.ipynb

%%shell
set -e

#---------------------------------------------------#
JULIA_VERSION="1.8.3" # any version ≥ 0.7.0
JULIA_PACKAGES="IJulia"
JULIA_PACKAGES_IF_GPU="CUDA" # or CuArrays for older Julia versions
JULIA_NUM_THREADS=2
#---------------------------------------------------#

if [ -z `which julia` ]; then
  # Install Julia
  JULIA_VER=`cut -d '.' -f -2 <<< "$JULIA_VERSION"`
  echo "Installing Julia $JULIA_VERSION on the current Colab Runtime..."
  BASE_URL="https://julialang-s3.julialang.org/bin/linux/x64"
  URL="$BASE_URL/$JULIA_VER/julia-$JULIA_VERSION-linux-x86_64.tar.gz"
  wget -nv $URL -O /tmp/julia.tar.gz # -nv means "not verbose"
  tar -x -f /tmp/julia.tar.gz -C /usr/local --strip-components 1
  rm /tmp/julia.tar.gz

  # Install Packages
  nvidia-smi -L &> /dev/null && export GPU=1 || export GPU=0
  if [ $GPU -eq 1 ]; then
    JULIA_PACKAGES="$JULIA_PACKAGES $JULIA_PACKAGES_IF_GPU"
  fi
  for PKG in `echo $JULIA_PACKAGES`; do
    echo "Installing Julia package $PKG..."
    julia -e 'using Pkg; pkg"add '$PKG'; precompile;"' &> /dev/null
  done

  # Install kernel and rename it to "julia"
  echo "Installing IJulia kernel..."
  julia -e 'using IJulia; IJulia.installkernel("julia", env=Dict(
      "JULIA_NUM_THREADS"=>"'"$JULIA_NUM_THREADS"'"))'
  KERNEL_DIR=`julia -e "using IJulia; print(IJulia.kerneldir())"`
  KERNEL_NAME=`ls -d "$KERNEL_DIR"/julia*`
  mv -f $KERNEL_NAME "$KERNEL_DIR"/julia

  echo ''
  echo "Successfully installed `julia -v`!"
  echo "Please reload this page (press Ctrl+R, ⌘+R, or the F5 key) then"
  echo "jump to the 'Checking the Installation' section."
fi

Installing Julia 1.8.3 on the current Colab Runtime...
2023-08-23 22:25:41 URL:https://storage.googleapis.com/julialang2/bin/linux/x64/1.8/julia-1.8.3-linux-x86_64.tar.gz [130030846/130030846] -> "/tmp/julia.tar.gz" [1]
Installing Julia package IJulia...
Installing Julia package CUDA...
Installing IJulia kernel...
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mInstalling julia kernelspec in /root/.local/share/jupyter/kernels/julia-1.8

Successfully installed julia version 1.8.3!
Please reload this page (press Ctrl+R, ⌘+R, or the F5 key) then
jump to the 'Checking the Installation' section.




In [None]:
versioninfo()

Julia Version 1.8.3
Commit 0434deb161e (2022-11-14 20:14 UTC)
Platform Info:
  OS: Linux (x86_64-linux-gnu)
  CPU: 2 × Intel(R) Xeon(R) CPU @ 2.20GHz
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-13.0.1 (ORCJIT, broadwell)
  Threads: 2 on 2 virtual cores
Environment:
  LD_LIBRARY_PATH = /usr/local/nvidia/lib:/usr/local/nvidia/lib64
  JULIA_NUM_THREADS = 2


In [1]:
using Pkg
Pkg.add(["Flux", "MLDatasets", "JSON", "CUDA", "Statistics"])

[32m[1m    Updating[22m[39m registry at `~/.julia/registries/General.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m   Installed[22m[39m OffsetArrays ──────────────── v1.12.10
[32m[1m   Installed[22m[39m cuDNN ─────────────────────── v1.1.0
[32m[1m   Installed[22m[39m ContextVariablesX ─────────── v0.1.3
[32m[1m   Installed[22m[39m ShowCases ─────────────────── v0.1.0
[32m[1m   Installed[22m[39m LoggingExtras ─────────────── v1.0.1
[32m[1m   Installed[22m[39m NNlibCUDA ─────────────────── v0.2.7
[32m[1m   Installed[22m[39m Unitful ───────────────────── v1.16.3
[32m[1m   Installed[22m[39m ZipFile ───────────────────── v0.10.1
[32m[1m   Installed[22m[39m ConcurrentUtilities ───────── v2.2.1
[32m[1m   Installed[22m[39m InlineStrings ─────────────── v1.4.0
[32m[1m   Installed[22m[39m Optimisers ────────────────── v0.2.20
[32m[1m   Installed[22m[39m NNlib ─────────────────────── v0.8.21
[32m[1m   Installed[22m[39m Ini

In [1]:
using Flux
using Flux.Data: DataLoader
using MLDatasets

# Load MNIST dataset
train_X, train_y = MNIST(split=:train)[:]
test_X, test_y = MNIST(split=:test)[:]

# Reshape dataset
train_X = reshape(train_X, (28, 28, 1, :))
test_X = reshape(test_X, (28, 28, 1, :))

# One-hot encode target values
train_y = Flux.onehotbatch(train_y, 0:9)
test_y = Flux.onehotbatch(test_y, 0:9)

# Normalize data
train_X /= 255.0
test_X /= 255.0

println("Shape of train_X: $(size(train_X))")
println("Shape of test_X: $(size(test_X))")


Shape of train_X: (28, 28, 1, 60000)
Shape of test_X: (28, 28, 1, 10000)


# GPU

In [2]:
using Flux

calc_device = gpu

model = Chain(
    Conv((3, 3), 1=>32, pad=(1, 1), selu),
    MaxPool((2, 2)),
    Conv((3, 3), 32=>64, pad=(1, 1), selu),
    MaxPool((2, 2)),
    x -> reshape(x, :, size(x, 4)),  # Flatten layer
    Dense(3136, 64),
    selu,
    Dense(64, 10),  # No activation here
    softmax
)  |> calc_device


Chain(
  Conv((3, 3), 1 => 32, selu, pad=1),   [90m# 320 parameters[39m
  MaxPool((2, 2)),
  Conv((3, 3), 32 => 64, selu, pad=1),  [90m# 18_496 parameters[39m
  MaxPool((2, 2)),
  var"#1#2"(),
  Dense(3136 => 64),                    [90m# 200_768 parameters[39m
  NNlib.selu,
  Dense(64 => 10),                      [90m# 650 parameters[39m
  NNlib.softmax,
) [90m                  # Total: 8 arrays, [39m220_234 parameters, 1.719 KiB.

In [3]:
# Define loss function and optimizer
using Statistics

loss(x, y) = Flux.crossentropy(model(x), y)
accuracy(X, y) = Statistics.mean(Flux.onecold(model(X)) .== Flux.onecold(y))
optimizer = ADAM(0.001)

Adam(0.001, (0.9, 0.999), 1.0e-8, IdDict{Any, Any}())

In [23]:


function get_gpu_memory_usage()
    # Get the current memory usage of the GPU
    test = CUDA.get_current_device().memory_allocated()

    return test
end

get_gpu_memory_usage (generic function with 2 methods)

In [22]:
CUDA.memory_status()

LoadError: ignored

In [5]:
using Flux
using CUDA

# Training configuration
batch_size = 64
num_epochs = 10

# Initialize variables for monitoring
total_training_time = 0.0
total_memory_usage = 0
throughputs = []

monitoring_data = Dict(
    "epoch" => [],
    "epoch_memory_usage" => [],
    "epoch_time" => [],
    "batch_processing_time" => [],
    "throughput" => [],
    "accuracy" => [],
    "loss" => [],
)

# Start training loop
for epoch in 1:num_epochs
    println("Epoch $epoch/$num_epochs")

    # Initialize variables for epoch-level monitoring
    total_batch_processing_time = 0.0
    epoch_memory_use = 0
    epoch_start_time = time()

    loss_value = 0.0
    accuracy_value = 0.0

    coef = 0


    # mem_info = CUDA.memoryinfo()

    for batch_start in 1:batch_size:size(train_X, 4)


        batch_start_time = time()

        batch_end = min(batch_start + batch_size - 1, size(train_X, 4))

        batch_x = train_X[:, :, :, batch_start:batch_end]
        batch_y = train_y[:, batch_start:batch_end]


        # Perform training step
        #loss_value = Flux.train!(loss, Flux.params(model), DataLoader((batch_x, batch_y)), optimizer)

        x, y = calc_device(batch_x), calc_device(batch_y)
        gradients = Flux.gradient(() -> loss(x, y), Flux.params(model))
        Flux.Optimise.update!(optimizer, Flux.params(model), gradients)

        # collect the loss and accuracy from each batch
        loss_value += loss(x, y)
        accuracy_value += accuracy(x, y)
        # accuracy_value += (accuracy(x, y) / 938)


        batch_processing_time = time() - batch_start_time
        total_batch_processing_time += batch_processing_time

        # Calculate memory usage
        # epoch_memory_use += CUDA.memory_status().free

        coef +=1
    end

    CUDA.memory_status()

    #epoch_memory_use = mem_info.total - mem_info.free

    accuracy_value /= coef
    loss_value /= coef

    epoch_end_time = time()
    epoch_time = epoch_end_time - epoch_start_time
    total_training_time += epoch_time
    total_memory_usage += epoch_memory_use

    num_samples = size(train_X, 4)
    throughput = num_samples / epoch_time
    push!(throughputs, throughput)

    println(" - Memory Usage: $epoch_memory_use bytes")
    println(" - Epoch Time: $epoch_time seconds")
    println(" - Batch Processing Time: $total_batch_processing_time seconds")
    println(" - Throughput: $throughput samples/second")

    # Evaluate accuracy and convergence
    println(" - Accuracy: $accuracy_value")
    println(" - Loss: $loss_value")

    push!(monitoring_data["epoch"], epoch)
    push!(monitoring_data["epoch_memory_usage"], epoch_memory_use)
    push!(monitoring_data["epoch_time"], epoch_time)
    push!(monitoring_data["batch_processing_time"], total_batch_processing_time)
    push!(monitoring_data["throughput"], throughput)
    push!(monitoring_data["accuracy"], accuracy_value)
    push!(monitoring_data["loss"], loss_value)
end


println("Total Training Time: $total_training_time seconds")
println("Average Memory Usage: $(total_memory_usage / num_epochs) bytes")


Epoch 1/10
Effective GPU memory usage: 99.81% (14.720 GiB/14.748 GiB)
Memory pool usage: 4.054 GiB (14.094 GiB reserved)
 - Memory Usage: 0 bytes
 - Epoch Time: 7.473342180252075 seconds
 - Batch Processing Time: 7.009091377258301 seconds
 - Throughput: 8028.536436956805 samples/second
 - Accuracy: 0.956972947761194
 - Loss: 0.14257603423319448
Epoch 2/10
Effective GPU memory usage: 99.81% (14.720 GiB/14.748 GiB)
Memory pool usage: 6.130 GiB (14.094 GiB reserved)
 - Memory Usage: 0 bytes
 - Epoch Time: 5.853379964828491 seconds
 - Batch Processing Time: 5.842292070388794 seconds
 - Throughput: 10250.48781396819 samples/second
 - Accuracy: 0.9734141791044776
 - Loss: 0.08795969742104542
Epoch 3/10
Effective GPU memory usage: 99.81% (14.720 GiB/14.748 GiB)
Memory pool usage: 8.191 GiB (14.094 GiB reserved)
 - Memory Usage: 0 bytes
 - Epoch Time: 5.81629204750061 seconds
 - Batch Processing Time: 5.804774522781372 seconds
 - Throughput: 10315.850633013404 samples/second
 - Accuracy: 0.980

LoadError: ignored

In [6]:
using JSON
json_filename = "GPU_JULIA_epoch_data.json"

stringdata = JSON.json(monitoring_data)

# write the file with the stringdata variable information
open(json_filename, "w") do f
        write(f, stringdata)
     end

1085

# CPU

In [None]:
using Flux

calc_device = cpu

model = Chain(
    Conv((3, 3), 1=>32, pad=(1, 1), selu),
    MaxPool((2, 2)),
    Conv((3, 3), 32=>64, pad=(1, 1), selu),
    MaxPool((2, 2)),
    x -> reshape(x, :, size(x, 4)),  # Flatten layer
    Dense(3136, 64),
    selu,
    Dense(64, 10),  # No activation here
    softmax
)  |> calc_device


Chain(
  Conv((3, 3), 1 => 32, selu, pad=1),   [90m# 320 parameters[39m
  MaxPool((2, 2)),
  Conv((3, 3), 32 => 64, selu, pad=1),  [90m# 18_496 parameters[39m
  MaxPool((2, 2)),
  var"#1#2"(),
  Dense(3136 => 64),                    [90m# 200_768 parameters[39m
  NNlib.selu,
  Dense(64 => 10),                      [90m# 650 parameters[39m
  NNlib.softmax,
) [90m                  # Total: 8 arrays, [39m220_234 parameters, 861.398 KiB.

In [None]:
# Define loss function and optimizer
using Statistics

loss(x, y) = Flux.crossentropy(model(x), y)
accuracy(X, y) = Statistics.mean(Flux.onecold(model(X)) .== Flux.onecold(y))
optimizer = ADAM(0.001)

Adam(0.001, (0.9, 0.999), 1.0e-8, IdDict{Any, Any}())

In [None]:
using Flux
using Flux.Data: DataLoader
using MLDatasets
using JSON
using CUDA


# Training configuration
batch_size = 64
num_epochs = 10

# Initialize variables for monitoring
total_training_time = 0.0
total_memory_usage = 0
throughputs = []

monitoring_data = Dict(
    "epoch" => [],
    "epoch_memory_usage" => [],
    "epoch_time" => [],
    "batch_processing_time" => [],
    "throughput" => [],
    "accuracy" => [],
    "loss" => [],
)

# Start training loop
for epoch in 1:num_epochs
    println("Epoch $epoch/$num_epochs")

    # Initialize variables for epoch-level monitoring
    total_batch_processing_time = 0.0
    epoch_memory_use = 0
    epoch_start_time = time()

    loss_value = 0.0
    accuracy_value = 0.0



    for batch_start in 1:batch_size:size(train_X, 4)


        batch_start_time = time()

        batch_end = min(batch_start + batch_size - 1, size(train_X, 4))

        batch_x = train_X[:, :, :, batch_start:batch_end]
        batch_y = train_y[:, batch_start:batch_end]


        # Perform training step
        #loss_value = Flux.train!(loss, Flux.params(model), DataLoader((batch_x, batch_y)), optimizer)

        x, y = calc_device(batch_x), calc_device(batch_y)
        gradients = Flux.gradient(() -> loss(x, y), Flux.params(model))
        Flux.Optimise.update!(optimizer, Flux.params(model), gradients)

        # collect the loss and accuracy from each batch
        loss_value += (loss(x, y) / 938)
        accuracy_value += (accuracy(x, y) / 938)


        batch_processing_time = time() - batch_start_time
        total_batch_processing_time += batch_processing_time

        # Calculate memory usage
        # epoch_memory_use += CUDA.memory_status().free
    end

    epoch_end_time = time()
    epoch_time = epoch_end_time - epoch_start_time
    total_training_time += epoch_time
    total_memory_usage += epoch_memory_use

    num_samples = size(train_X, 4)
    throughput = num_samples / epoch_time
    push!(throughputs, throughput)

    println(" - Memory Usage: $epoch_memory_use bytes")
    println(" - Epoch Time: $epoch_time seconds")
    println(" - Batch Processing Time: $total_batch_processing_time seconds")
    println(" - Throughput: $throughput samples/second")

    # Evaluate accuracy and convergence
    println(" - Accuracy: $accuracy_value")
    println(" - Loss: $loss_value")

    push!(monitoring_data["epoch"], epoch)
    push!(monitoring_data["epoch_memory_usage"], epoch_memory_use)
    push!(monitoring_data["epoch_time"], epoch_time)
    push!(monitoring_data["batch_processing_time"], total_batch_processing_time)
    push!(monitoring_data["throughput"], throughput)
    push!(monitoring_data["accuracy"], accuracy_value)
    push!(monitoring_data["loss"], loss_value)
end


println("Total Training Time: $total_training_time seconds")
println("Average Memory Usage: $(total_memory_usage / num_epochs) bytes")
println("Average Throughput: $(sum(throughputs) / num_epochs) samples/second"))


Epoch 1/10
 - Memory Usage: 0 bytes
 - Epoch Time: 6.511749029159546 seconds
 - Batch Processing Time: 6.502673149108887 seconds
 - Throughput: 9214.114323405372 samples/second
 - Accuracy: 0.869986007462691
 - Loss: 0.43604071559457225
Epoch 2/10
 - Memory Usage: 0 bytes
 - Epoch Time: 4.527939081192017 seconds
 - Batch Processing Time: 4.519378185272217 seconds
 - Throughput: 13251.06166936427 samples/second
 - Accuracy: 0.9611040778251647
 - Loss: 0.13175277564869248
Epoch 3/10
 - Memory Usage: 0 bytes
 - Epoch Time: 4.935431003570557 seconds
 - Batch Processing Time: 4.925978183746338 seconds
 - Throughput: 12156.992967097052 samples/second
 - Accuracy: 0.9773121002132252
 - Loss: 0.076912162796134
Epoch 4/10


In [None]:
using JSON
json_filename = "CPU_JULIA_epoch_data.json"

stringdata = JSON.json(monitoring_data)

# write the file with the stringdata variable information
open(json_filename, "w") do f
        write(f, stringdata)
     end

1107

In [None]:
loss(train_X, test_y)
accuracy(train_X, test_y)

LoadError: ignored

LoadError: ignored

1-element Vector{StepRange{Int64, Int64}}:
 1:64:59969