In [22]:
using Distributions

# Generic sample-size formula for relative error
# accuracy = 0.05 means Â±5%
# confidence = 0.95 means z = 1.96 (two-sided)
function required_samples(mu, sigma; accuracy=0.01, confidence=0.95)
    z = quantile(Normal(), (1 + confidence)/2)
    delta = accuracy * mu                 # relative half-width
    n = (z * sigma / delta)^2
    return ceil(Int, n)
end

# --- 1. Uniform(a, b) ---
function samples_uniform(a, b; accuracy=0.01, confidence=0.95)
    mu = (a + b) / 2
    sigma = (b - a) / sqrt(12)
    return required_samples(mu, sigma; accuracy, confidence)
end

# --- 2. Normal(mu, sigma) ---
function samples_normal(mu, sigma; accuracy=0.01, confidence=0.95)
    return required_samples(mu, sigma; accuracy, confidence)
end

# --- 3. Lognormal underlying Normal(muY, sigmaY) ---
function samples_lognormal(muY, sigmaY; accuracy=0.01, confidence=0.95)
    # mean and variance of lognormal
    muX = exp(muY + sigmaY^2 / 2)
    varX = (exp(sigmaY^2) - 1) * exp(2*muY + sigmaY^2)
    sigmaX = sqrt(varX)
    return required_samples(muX, sigmaX; accuracy, confidence)
end

# -------------------------------------------------------------------
# Example calculations (same examples as earlier)
println("Uniform(0, 1):            ", samples_uniform(0, 1))
println("Normal(mu=1, sigma=0.2):  ", samples_normal(1, 1))
println("Lognormal N(0,1):         ", samples_lognormal(1, 1))


Uniform(0, 1):            12805
Normal(mu=1, sigma=0.2):  38415
Lognormal N(0,1):         66008


In [None]:
N_values = [1000, 15000, 40000, 70000]

distribution = ["normal", "lognormal", "uniform"]
n_cluster = [10, 20, 30, 40, 50]
ports = [6, 7, 8, 9, 10]

for d in distribution
    for p in ports
        for nn in N_values
            det_file = "/Users/konggullerod/Documents/Dokumenter/Skole - DTU/7. Semester/Bachelor projekt/data/deterministic/objective_value_deterministic_comparison_L_$(p)_$(nn)_$(d).csv"
            
            try
                det_df = CSV.read(det_file, DataFrame)
                key_ev = "EV_$(p)_$(nn)_$(d)"
                # EV: deterministic solution
                push!(results[key_ev], mean(det_df[:,1]))
            catch e
                println("Deterministic file $det_file not found, skipping combination d=$d, p=$p, nn=$nn")
                continue
            end

            for n in n_cluster
                exp_stoch_file = "/Users/konggullerod/Documents/Dokumenter/Skole - DTU/7. Semester/Bachelor projekt/data/expected_stochastic/objective_value_expected_stochastic_comparison_L_$(p)_$(nn)_$(n)_$(d).csv"
                stoch_file = "/Users/konggullerod/Documents/Dokumenter/Skole - DTU/7. Semester/Bachelor projekt/data/stochastic/objective_value_stochastic_comparison_L_$(p)_$(nn)_$(n)_$(d).csv"
                
                try
                    exp_stoch_df = CSV.read(exp_stoch_file, DataFrame)
                    stoch_df = CSV.read(stoch_file, DataFrame)

                    key_rp = "RP_$(p)_$(nn)_$(n)_$(d)"
                    key_eev = "EEV_$(p)_$(nn)_$(n)_$(d)"
                    key_vss = "VSS_$(p)_$(nn)_$(n)_$(d)"

                    # RP: stochastic solution
                    push!(results[key_rp], mean(stoch_df[:,1]))
                    # EEV: deterministic solution applied to all scenarios
                    push!(results[key_eev], mean(exp_stoch_df[:,1]))

                    # VSS: Value of Stochastic Solution
                    push!(results[key_vss], results[key_eev][end] - results[key_rp][end])
                catch e
                    println("Stochastic files for n=$n not found, skipping n=$n for d=$d, p=$p, nn=$nn")
                    continue
                end
            end

            # Plot for this combination if we have data
            if length(results["EV_$(p)_$(nn)_$(d)"]) > 0 && length(results["RP_$(p)_$(nn)_$(n_cluster[1])_$(d)"]) > 0
                EV = results["EV_$(p)_$(nn)_$(d)"][1]
                EEV_values = [results["EEV_$(p)_$(nn)_$(n)_$(d)"][1] for n in n_cluster if haskey(results, "EEV_$(p)_$(nn)_$(n)_$(d)") && !isempty(results["EEV_$(p)_$(nn)_$(n)_$(d)"])]
                RP_values = [results["RP_$(p)_$(nn)_$(n)_$(d)"][1] for n in n_cluster if haskey(results, "RP_$(p)_$(nn)_$(n)_$(d)") && !isempty(results["RP_$(p)_$(nn)_$(n)_$(d)"])]
                VSS_values = [results["VSS_$(p)_$(nn)_$(n)_$(d)"][1] for n in n_cluster if haskey(results, "VSS_$(p)_$(nn)_$(n)_$(d)") && !isempty(results["VSS_$(p)_$(nn)_$(n)_$(d)"])]

                if !isempty(EEV_values) && !isempty(RP_values) && !isempty(VSS_values)
                    plot(n_cluster[1:length(RP_values)], RP_values, label="RP", xlabel="Number of Clusters", ylabel="Objective Value", title="Distribution: $(d), ports=$(p), N=$(nn)", linewidth=2)
                    plot!(n_cluster[1:length(EEV_values)], EEV_values, label="EEV", linewidth=2)
                    hline!([EV], label="EV", linewidth=2, color=:blue)
                    plot!(n_cluster[1:length(VSS_values)], VSS_values, label="VSS", linewidth=2, color=:red)
                    
                    # Save the plot
                    savefig("/Users/konggullerod/Documents/Dokumenter/Skole - DTU/7. Semester/Bachelor projekt/plots/comparison/plot_$(d)_$(p)_$(nn).png")
                end
            end
        end
    end
end

Deterministic file /Users/konggullerod/Documents/Dokumenter/Skole - DTU/7. Semester/Bachelor projekt/data/deterministic/objective_value_deterministic_comparison_L_6_15000_normal.csv not found, skipping combination d=normal, p=6, nn=15000
Deterministic file /Users/konggullerod/Documents/Dokumenter/Skole - DTU/7. Semester/Bachelor projekt/data/deterministic/objective_value_deterministic_comparison_L_6_40000_normal.csv not found, skipping combination d=normal, p=6, nn=40000
Deterministic file /Users/konggullerod/Documents/Dokumenter/Skole - DTU/7. Semester/Bachelor projekt/data/deterministic/objective_value_deterministic_comparison_L_6_70000_normal.csv not found, skipping combination d=normal, p=6, nn=70000
Deterministic file /Users/konggullerod/Documents/Dokumenter/Skole - DTU/7. Semester/Bachelor projekt/data/deterministic/objective_value_deterministic_comparison_L_7_1000_normal.csv not found, skipping combination d=normal, p=7, nn=1000
Deterministic file /Users/konggullerod/Documents/D