In [1]:
using Pkg
Pkg.instantiate()
include("../MyReverseDiff.jl")
include("../MyMlp.jl")

using .MyReverseDiff
using .MyMlp
using JLD2
using Printf
using BenchmarkTools
using LinearAlgebra
using Distributions
using Random
using MLDatasets
using Plots
using Statistics
using DataFrames
using MLDataUtils


[32m[1m   Installed[22m[39m x265_jll ───────────────────── v3.5.0+0
[32m[1m   Installed[22m[39m GR_jll ─────────────────────── v0.73.14+0
[32m[1m   Installed[22m[39m Libmount_jll ───────────────── v2.41.0+0
[32m[1m   Installed[22m[39m libdecor_jll ───────────────── v0.2.2+0
[32m[1m   Installed[22m[39m libfdk_aac_jll ─────────────── v2.0.3+0
[32m[1m   Installed[22m[39m JpegTurbo_jll ──────────────── v3.1.1+0
[32m[1m   Installed[22m[39m OffsetArrays ───────────────── v1.17.0
[32m[1m   Installed[22m[39m HypergeometricFunctions ────── v0.3.28
[32m[1m   Installed[22m[39m Opus_jll ───────────────────── v1.3.3+0
[32m[1m   Installed[22m[39m LERC_jll ───────────────────── v4.0.1+0
[32m[1m   Installed[22m[39m LoggingExtras ──────────────── v1.1.0
[32m[1m   Installed[22m[39m ShowCases ──────────────────── v0.1.0
[32m[1m   Installed[22m[39m StatsFuns ──────────────────── v1.5.0
[32m[1m   Installed[22m[39m Accessors ──────────────────── v0.1.4

## Przygotowanie danych IMDB

In [2]:
X_train = Matrix(load("../../dataset/imdb_dataset_prepared.jld2", "X_train"));
y_train = Matrix(load("../../dataset/imdb_dataset_prepared.jld2", "y_train"));
X_test = Matrix(load("../../dataset/imdb_dataset_prepared.jld2", "X_test"));
y_test = Matrix(load("../../dataset/imdb_dataset_prepared.jld2", "y_test"));

##  Trening modelu

In [3]:
#   Definicja rozmiarów modelu
input_size = size(X_train, 1) # Liczba cech
hidden_size = 8
output_size = 1
batch_size = 64

#   Inicjalizacja modelu (Chain) (raz)
model = Chain(
    Dense(input_size, hidden_size, relu; weight_init=xavier_uniform,  name="layer1"),
    Dense(hidden_size, output_size, σ; weight_init=xavier_uniform, name="layer2")
)

#   Utworzenie początkowych węzłów Constant dla danych wejściowych i etykiet
x_input_node = Constant(zeros(Float32, input_size, batch_size))
y_label_node = Constant(zeros(Float32, output_size, batch_size))

#   Budowanie grafu treningowego
loss_node, model_output_node, order = build_graph!(model, binarycrossentropy, x_input_node, y_label_node; loss_name="loss")

optimizer_state = setup_optimizer(Adam(), model)


epochs = 5


for epoch in 1:epochs
    # --- Tasowanie zbioru treningowego NA NOWO w każdej epoce ---
    permutation = randperm(size(X_train, 2))
    X_train_shuffled_epoch = X_train[:, permutation]
    y_train_shuffled_epoch = y_train[:, permutation]
    num_batches = ceil(Int, size(X_train, 2) / batch_size)

    loss_value = 0.0

    t = @elapsed begin

    for i in 1:num_batches

        start_idx = (i - 1) * batch_size + 1
        end_idx = min(i * batch_size, size(X_train, 2))
        x_batch = X_train_shuffled_epoch[:, start_idx:end_idx]
        y_batch = y_train_shuffled_epoch[:, start_idx:end_idx]

        current_batch_size = size(x_batch, 2)
        view(x_input_node.output, :, 1:current_batch_size) .= x_batch
        view(y_label_node.output, :, 1:current_batch_size) .= y_batch


        forward!(order)

        backward!(order)

        step!(optimizer_state)
        loss_value += loss_node.output

    end
end
    avg_loss_epoch = loss_value / num_batches

    println(@sprintf("Epoch: %d (%.2fs) \tTrain: (l: %.2f)", epoch, t, avg_loss_epoch))
end

Epoch: 1 (7.77s) 	Train: (l: 0.67)
Epoch: 2 (1.14s) 	Train: (l: 0.58)
Epoch: 3 (0.99s) 	Train: (l: 0.48)
Epoch: 4 (1.01s) 	Train: (l: 0.38)
Epoch: 5 (1.07s) 	Train: (l: 0.31)


##  Test modelu

In [4]:
# --- Test Evaluation ---

batch_size = 64
num_test_samples = size(X_test, 2)
num_batches = ceil(Int, num_test_samples / batch_size)
total_test_loss_sum = 0.0
total_correct_predictions = 0.0

t_test = @elapsed begin
    for i in 1:num_batches

        start_idx = (i - 1) * batch_size + 1
        end_idx = min(i * batch_size, num_test_samples)
        x_batch_test = X_test[:, start_idx:end_idx]
        y_batch_test = y_test[:, start_idx:end_idx]

        # Aktualna liczba próbek w bieżącym batchu (może być mniejsza dla ostatniego batcha)
        current_test_batch_size = size(x_batch_test, 2)

        view(x_input_node.output, :, 1:current_test_batch_size) .= x_batch_test
        view(y_label_node.output, :, 1:current_test_batch_size) .= y_batch_test

        forward!(order)

        predictions = view(model_output_node.output, :, 1:current_test_batch_size)


        batch_loss = loss_node.output
        
        total_test_loss_sum += batch_loss * current_test_batch_size # Sumuj stratę, uwzględniając rozmiar batcha

        # --- Oblicz dokładność na bieżącym batchu testowym ---
        # Dla klasyfikacji binarnej z progiem 0.5 (lub innym, w zależności od problemu)
        batch_accuracy = sum((predictions .> 0.5f0) .== y_batch_test) / current_test_batch_size
        total_correct_predictions += batch_accuracy * current_test_batch_size # Sumuj poprawne predykcje
    end
end

# --- Oblicz średnią stratę i średnią dokładność na całym zbiorze testowym ---
avg_test_loss = total_test_loss_sum / num_test_samples
avg_test_accuracy = total_correct_predictions / num_test_samples * 100.0

println(@sprintf("Test Loss (czas: %.2fs): %.4f", t_test, avg_test_loss))
println("Test Accuracy: $avg_test_accuracy %")


Test Loss (czas: 0.45s): 0.3911
Test Accuracy: 86.65 %
