In [1]:
import Pkg; Pkg.activate(".julia/environments/v1.10/Project.toml")
Pkg.add(["SCS", "Convex","PlotThemes","Images","MLDatasets"]) 
using Plots, Random, NBInclude, LinearAlgebra, Statistics, Distributions, Convex, SCS, MLDatasets, DataFrames
using Plots.PlotMeasures
using StatsBase: sample
theme(:dao)

[32m[1m  Activating[22m[39m project at `~/.julia/environments/v1.10`
[32m[1m    Updating[22m[39m registry at `~/.julia/registries/General.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.10/Project.toml`
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.10/Manifest.toml`


In [2]:
# Defining some activation functions
ReLU(z) = @. ifelse.(z > 0, z, 0)
sigmoid(z) =  @. 1 / (1 + ℯ^(-z))


function softmax(z::Matrix)
    # Compute the maximum for each column, subtract from each element in the column
    max_z = maximum(z, dims=1)
    e_z = exp.(z .- max_z)
    
    # Compute the sum for each row
    sum_e_z = sum(e_z, dims=1)
    # Normalize by dividing each element by the sum of its row
    return e_z ./ sum_e_z
end

function softmax(z::Vector)
    """Compute softmax values for each set of scores in the vector z.
    Use this function when you use the activation function on one vector at a time"""
    e_z = exp.(z .- maximum(z))
        
    return e_z ./ sum(e_z)
end
function SplitData(xData, yData, train_size::Float64)
    N = length(xData[:,1])
    NumTrain = Int(round(N*train_size))

    train_indices = sample(1:N, NumTrain; replace=false)
    all_indices = collect(1:N)
    test_indices = [i for i in all_indices if i ∉ train_indices]

    xTrain = xData[train_indices,:]
    yTrain = yData[train_indices,:]
    xTest = xData[test_indices,:]
    yTest = yData[test_indices,:]
    return xTrain, xTest, yTrain, yTest
end

SplitData (generic function with 1 method)

In [3]:
Random.seed!(2024)
#ϵ=Normal(0, 0.02) # Noise

x = randn(2)  # network input. This is a single input with two features
W1 = randn(4, 2)  # first layer weights
b1 = randn(4)  #Bias 

4-element Vector{Float64}:
 -0.22852008785201214
 -0.34769203595359444
  0.3180495800939038
 -0.7653991690703775

In [4]:
z1 = W1*x+b1
a1=ReLU.(z1)
W2 = randn(8, 4)  # first layer weights
b2 = randn(8)  #Bias 
z2 = W2*a1+b2
a2=ReLU.(z2)

8-element Vector{Real}:
 1.855287304698327
 1.801756780214216
 0.025477130911291224
 1.4214739945434274
 2.289976906436366
 0
 0
 0

In [5]:
function create_layers(network_input_size, layer_output_sizes)
    input = network_input_size
    l = []
    for (i,output) in enumerate(layer_output_sizes)
        W2 = randn(output, input)  # first layer weights
        b2 = randn(output)  #Bias 
        input=output
        push!(l, (W2,b2))
        
    end
    return l
end

create_layers (generic function with 1 method)

In [6]:
function feed_forward(layers, input, activation_functions)
    a = input
    for ((W,b), func) in zip(layers, activation_functions)
        z = W*a+b
        a = func.(z)
    end
    return a
end

feed_forward (generic function with 1 method)

In [7]:
input_size=8
output_size=2
x = rand(8)
y=rand(2)
funcs=[ReLU, ReLU,ReLU,ReLU, softmax]
l = create_layers(input_size, [10, 16, 6, output_size])
predict = feed_forward(l, x, funcs)

2-element Vector{Float64}:
 26.195322082227882
  7.895130947384336

In [8]:
function create_layers_batch(network_input_size, layer_output_sizes)
    input = network_input_size
    l = []
    for (i,output) in enumerate(layer_output_sizes)
        W2 = randn(output, input)  # first layer weights
        b2 = randn(output)  #Bias 
        input=output
        push!(l, (W2 , b2))
        
    end
    return l
end
l = create_layers_batch(2, [3, 4])
l[1][1]

3×2 Matrix{Float64}:
 -0.041321  -0.540299
 -0.132765   1.14861
  0.166539  -0.172541

In [9]:
function feed_forward_batched(layers, input, activation_functions)
    a = input #(num_features, batchsize)
    for ((W,b), func) in zip(layers, activation_functions)
        z = a*W' .+ b'
        a = func(z)
    end
    return a
end

feed_forward_batched (generic function with 1 method)

In [11]:
dataset = Iris()

dataset Iris:
  metadata   =>    Dict{String, Any} with 4 entries
  features   =>    150×4 DataFrame
  targets    =>    150×1 DataFrame
  dataframe  =>    150×5 DataFrame

In [12]:
XData, yData = Iris(as_df=false)[:]
shuffle = sample(1:length(yData), length(yData), replace=true)
X_data = XData[:,shuffle]
y_data = yData[shuffle]

150-element Vector{InlineStrings.String15}:
 "Iris-virginica"
 "Iris-versicolor"
 "Iris-setosa"
 "Iris-setosa"
 "Iris-virginica"
 "Iris-virginica"
 "Iris-setosa"
 "Iris-versicolor"
 "Iris-versicolor"
 "Iris-versicolor"
 "Iris-virginica"
 "Iris-versicolor"
 "Iris-virginica"
 ⋮
 "Iris-versicolor"
 "Iris-versicolor"
 "Iris-versicolor"
 "Iris-setosa"
 "Iris-virginica"
 "Iris-virginica"
 "Iris-setosa"
 "Iris-setosa"
 "Iris-virginica"
 "Iris-virginica"
 "Iris-virginica"
 "Iris-setosa"

In [30]:
function one_hot(y::Array)
    # Find unique classes (species) in y
    unique_classes = unique(y)
    
    # Create a dictionary to map each class to an index
    class_to_index = Dict(class => i for (i, class) in enumerate(unique_classes))
    
    # Initialize a matrix to store one-hot encoded vectors
    one_hot_matrix = zeros( length(unique_classes), length(y))
    
    # Populate the matrix
    for (i, class) in enumerate(y)
        class_index = class_to_index[class]
        one_hot_matrix[class_index, i] = 1.0
    end
    
    return one_hot_matrix, unique_classes  # Return the matrix and the unique class labels
end
function one_hot_argmax(A::AbstractMatrix)
    # Find the index of the maximum element in each row
    indices = argmax(A, dims=2)  # Returns a 150×1 array of indices (1, 2, or 3)
    
    # Initialize a matrix of zeros with the same size as A
    one_hot = zeros(size(A))
    
    # Get the row indices (1 to 150)
    rows = axes(A, 1)
    
    # Flatten the indices array to a vector
    cols = vec(indices)
    # Create Cartesian indices for each position to set to 1
    inds = CartesianIndex.(cols)
    # Set the corresponding positions in one_hot to 1
    one_hot[inds] .= 1
    
    return one_hot
end

one_hot_argmax (generic function with 1 method)

In [31]:
y_data_OH, labels = one_hot(y_data)

([1.0 0.0 … 0.0 1.0; 0.0 1.0 … 0.0 0.0; 0.0 0.0 … 1.0 0.0], InlineStrings.String15["Iris-virginica", "Iris-setosa", "Iris-versicolor"])

Network must have input of 4 and output of 3

In [32]:
function cross_entropy(y_true, y_pred)
    # Ensure numerical stability by adding a small epsilon to avoid log(0)
    eps = 1e-15
    y_pred_clipped = clamp.(y_pred, eps, 1.0 - eps)
    #println(size(y_true, 2))
    #display(y)
    # Calculate the cross-entropy loss for each sample
    ce_loss = -sum(y_true .* log.(y_pred_clipped)) / size(y_true, 1)  # size(y_true, 2) is the number of samples
    
    return ce_loss
end

cross_entropy (generic function with 1 method)

In [33]:
function accuracy(y_true, y_pred)
    #y = (batch, feature)
    batch = size(y_true)[2]
    acc=0
    for i in 1:batch
        if (y_true[i,:]==y_pred[i,:])
            acc+=1
        end
    end
    return acc/batch
end

accuracy (generic function with 1 method)

In [38]:
dataset = Iris()
XData, yData = Iris(as_df=false)[:]
shuffle = sample(1:length(yData), length(yData), replace=true)
X_data = XData[:,shuffle]
y_data = yData[shuffle]
y_data_OH, labels = one_hot(y_data)
y_data_OH=y_data_OH'
X_data=X_data'

# Data
batch_size = 20
input_dimension = 4
output_dimension = 3
input = X_data[1:batch_size, :]#randn(batch_size, input_dimension)
y = y_data_OH[1:batch_size, :]  # Should be one-hot encoded if using softmax with cross-entropy

# Define activation functions
activation_functions = [sigmoid, sigmoid, sigmoid, softmax]

# Create layers
layers = create_layers_batch(input_dimension, [20, 200, 120, output_dimension])
out = feed_forward_batched(layers, input, activation_functions)
out=one_hot_argmax(out)
cross_entropy(y, out)
accuracy(y, out)

0.3333333333333333