In [1]:
using Random
using CSV
using DataFrames
using Impute               
using Statistics
include("../src/activationFunctions.jl")
include("../src/networkFunctions.jl")

rnn (generic function with 1 method)

In [2]:
# Define a simple StandardScaler
struct StandardScaler
    mean::Vector{Float64}
    std::Vector{Float64}
end

function fit_scaler(X::AbstractMatrix)
    m = vec(mean(X, dims=1))
    s = vec(std(X, dims=1))
    return StandardScaler(m, s)
end

function transform(s::StandardScaler, X::AbstractMatrix)
    # Replace zero std values with 1.0 to avoid division by zero.
    std_adj = map(x -> x == 0 ? 1.0 : x, s.std)
    return (X .- s.mean') ./ std_adj'
end

function fit_transform(X::AbstractMatrix)
    s = fit_scaler(X)
    return transform(s, X), s
end

# Read CSV file into a DataFrame.
data = CSV.read("clean_weather.csv", DataFrame)

# Forward-fill missing values.
data = Impute.locf(data)

# Define column names.
PREDICTORS = ["tmax", "tmin", "rain"]
TARGET = "tmax_tomorrow"

# Ensure that the predictor columns have no missing values.
for col in PREDICTORS
    data[!, col] = coalesce.(data[!, col], 0.0)
end

# Also coalesce the target column.
data[!, TARGET] = coalesce.(data[!, TARGET], 0.0)

# Extract the predictor columns as a Matrix using hcat.
predictor_matrix = hcat([data[!, col] for col in PREDICTORS]...)

# Apply standard scaling using the custom scaler.
scaled_predictors, scaler = fit_transform(predictor_matrix)

# (Optional) Reassign the scaled predictors back into the DataFrame.
for (i, col) in enumerate(PREDICTORS)
    data[!, col] = scaled_predictors[:, i]
end

# Set random seed for reproducibility.
Random.seed!(0)

# Split the data.
n = nrow(data)
idx1 = floor(Int, 0.7 * n)
idx2 = floor(Int, 0.85 * n)

train_data = data[1:idx1, :]
valid_data = data[idx1+1:idx2, :]
test_data  = data[idx2+1:end, :]

# Extract predictors and target values as matrices.
train_x = hcat([train_data[!, col] for col in PREDICTORS]...)
train_y = reshape(train_data[:, TARGET], :, 1)

valid_x = hcat([valid_data[!, col] for col in PREDICTORS]...)
valid_y = reshape(valid_data[:, TARGET], :, 1)

test_x = hcat([test_data[!, col] for col in PREDICTORS]...)
test_y = reshape(test_data[:, TARGET], :, 1)


2027×1 Matrix{Float64}:
 68.0
 67.0
 67.0
 65.0
 66.0
 65.0
 69.0
 76.0
 83.0
 85.0
  ⋮
 62.0
 61.0
 60.0
 62.0
 67.0
 66.0
 70.0
 62.0
 64.0

In [3]:
function square_elementwise(x::ReverseNode)
    out_val = x.value .^ 2
    out = ReverseNode(out_val)
    push!(out.children, (x, δ -> δ .* (2 .* x.value)))
    return out
end

function sum_r(x::ReverseNode)
    s = sum(x.value)          
    out = ReverseNode(s)
    push!(out.children, (x, δ -> fill(δ, size(x.value))))
    return out
end

function abs_r(x::ReverseNode)
    out_val = abs.(x.value)
    out = ReverseNode(out_val)
    push!(out.children, (x, δ -> δ .* sign.(x.value)))  
    return out
end

function mae_loss(y_true::ReverseNode, y_pred::ReverseNode)
    diff = y_pred - y_true
    abs_diff = abs_r(diff)
    total_loss = sum_r(abs_diff)
    n = length(y_true.value)
    return total_loss / lift(n)
end

# Layer configuration (using dictionaries)
layer_conf = [
    Dict("type" => "input", "units" => 3),
    Dict("type" => "rnn", "hidden" => 4, "units" => 1)
]

epochs = 250
lr = 1e-5
sequence_length = 7
activation_function = "tanh"

rnn_print(train_x, train_y, valid_x, valid_y, layer_conf, epochs, lr, sequence_length, activation_function, mae_loss)

Epoch: 10 train loss 64.72492676869594 valid loss 66.557450476798
Epoch: 20 train loss 64.54096239402082 valid loss 66.37777159691505
Epoch: 30 train loss 64.3539899583781 valid loss 66.19545646345601
Epoch: 40 train loss 64.15842234807124 valid loss 66.00524076337211
Epoch: 50 train loss 63.94855186236193 valid loss 65.80157451775185
Epoch: 60 train loss 63.719574833904 valid loss 65.57948110602553
Epoch: 70 train loss 63.469401457412374 valid loss 65.33641520669042
Epoch: 80 train loss 63.1982773508188 valid loss 65.07240634707019
Epoch: 90 train loss 62.90564793157505 valid loss 64.78701363540827
Epoch: 100 train loss 62.588827554740185 valid loss 64.47742621283338
Epoch: 110 train loss 62.24345020420786 valid loss 64.1387334086018
Epoch: 120 train loss 61.86403240530246 valid loss 63.764517500405226
Epoch: 130 train loss 61.44501922886782 valid loss 63.348006375977505
Epoch: 140 train loss 60.98298589520002 valid loss 62.88471538652683
Epoch: 150 train loss 60.479601377139446 valid