
# COE241 / COS868 — Projeto (Parte 1) — NDT (Internet)
Este notebook automatiza **EDA, MLE e Inferência Bayesiana** para o dataset de testes NDT.

> **Arquivos esperados**  
> - CSV: `/mnt/data/ndt_tests_tratado.csv` (colunas: `timestamp`, `download_throughput_bps`, `rtt_download_sec`, `upload_throughput_bps`, `rtt_upload_sec`, `packet_loss_percent`, `client`, `server`).
> - Saídas: serão salvas em `/mnt/data/ndt_outputs/`.


In [46]:

import os, sys, math, json, warnings
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

warnings.filterwarnings("ignore")

DATA_PATH = "ndt_tests_corrigido.csv"
OUT_DIR = os.path.expanduser("~/Desktop/ndt_outputs")
os.makedirs(OUT_DIR, exist_ok=True)




if not os.path.exists(DATA_PATH):
    print("❌ CSV não encontrado:", DATA_PATH)
    print("Por favor, suba o arquivo 'ndt_tests_tratado.csv' em /mnt/data e rode novamente.")
    # Criar um CSV de exemplo vazio (schema) para referência
    schema_cols = ["timestamp","download_throughput_bps","rtt_download_sec","upload_throughput_bps","rtt_upload_sec","packet_loss_percent","client","server"]
    pd.DataFrame(columns=schema_cols).to_csv(os.path.join(OUT_DIR,"schema_example.csv"), index=False)
    raise SystemExit(0)
    
df = pd.read_csv(DATA_PATH)
# Normaliza nomes de colunas se necessário
df.columns = [c.strip().lower() for c in df.columns]
# Tenta mapear colunas alternativas
rename_map = {
    'throughput_download_bps':'download_throughput_bps',
    'throughput_upload_bps':'upload_throughput_bps',
    'rtt_download':'rtt_download_sec',
    'rtt_upload':'rtt_upload_sec',
    'loss_percent':'packet_loss_percent'
}
for k,v in rename_map.items():
    if k in df.columns and v not in df.columns:
        df.rename(columns={k:v}, inplace=True)

# --- Filtragem (como a professora pediu) ---
cols_to_check = [
    'download_throughput_bps',
    'upload_throughput_bps',
    'rtt_download_sec',
    'rtt_upload_sec',
    'packet_loss_percent'
]

# Remove linhas com qualquer valor negativo
df = df[(df[cols_to_check] >= 0).all(axis=1)]

# Remove linhas com valores NaN
df = df.dropna(subset=cols_to_check)


# Corrigir valores absurdos (acima de 10^10) dividindo por 1e6 (supõe erro de escala)
df.loc[df['download_throughput_bps'] > 1e10, 'download_throughput_bps'] /= 1e6
df.loc[df['upload_throughput_bps'] > 1e10, 'upload_throughput_bps'] /= 1e6

# Converter para Mbps para graficar
df['download_throughput_mbps'] = df['download_throughput_bps'] / 1e6
df['upload_throughput_mbps'] = df['upload_throughput_bps'] / 1e6

# Converte timestamp
if 'timestamp' in df.columns:
    df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
else:
    raise SystemExit("❌ Coluna 'timestamp' ausente no CSV.")

# Checagens básicas
expected_cols = {'download_throughput_bps','upload_throughput_bps','rtt_download_sec','rtt_upload_sec','packet_loss_percent','client','server'}
missing = expected_cols - set(df.columns)
if missing:
    raise SystemExit(f"❌ Colunas ausentes no CSV: {missing}")

# Limpezas simples
df = df.dropna(subset=['download_throughput_bps','upload_throughput_bps','rtt_download_sec','rtt_upload_sec','packet_loss_percent','client','server'])
df = df.sort_values('timestamp').reset_index(drop=True)

df.head(3)


Unnamed: 0,timestamp,download_throughput_bps,rtt_download_sec,upload_throughput_bps,rtt_upload_sec,packet_loss_percent,client,server,download_throughput_mbps,upload_throughput_mbps
0,2025-08-01 00:02:31+00:00,14791410.0,0.398051,2468495.0,0.342112,9.224381,client10,server06,14.791412,2.468495
1,2025-08-01 00:57:47+00:00,25061060.0,0.339698,157726000.0,0.011713,2.720267,client12,server07,25.06106,157.725964
2,2025-08-01 00:58:15+00:00,731632300.0,0.01,371222100.0,0.014137,0.896117,client11,server05,731.632346,371.222115


## 1) EDA — estatísticas por cliente e por servidor

In [47]:
metrics = ['download_throughput_bps','upload_throughput_bps',
           'rtt_download_sec','rtt_upload_sec','packet_loss_percent']

def describe_group(g):
    q = g.quantile([0.5,0.9,0.99]).rename(index={0.5:'q50',0.9:'q90',0.99:'q99'})
    desc = pd.DataFrame({
        'count': g.count(),
        'mean': g.mean(),
        'median': g.median(),
        'var': g.var(ddof=1),   # variância amostral (padrão)
        'std': g.std(ddof=1),   # desvio padrão amostral
        'min': g.min(),
        'max': g.max()
    })
    return desc.join(q)

by_client = df.groupby('client')[metrics].apply(describe_group)
by_server = df.groupby('server')[metrics].apply(describe_group)

by_client.to_csv(os.path.join(OUT_DIR, 'eda_by_client.csv'))
by_server.to_csv(os.path.join(OUT_DIR, 'eda_by_server.csv'))

print("✅ Gravado: eda_by_client.csv, eda_by_server.csv em", OUT_DIR)

✅ Gravado: eda_by_client.csv, eda_by_server.csv em /Users/pv/Desktop/ndt_outputs


### Seleção automática de 2 entidades contrastantes (cliente/servidor)

In [48]:

# Critérios simples: escolhe dois clientes com menor e maior mediana de rtt_download_sec
client_stats = df.groupby('client')['rtt_download_sec'].median().sort_values()
sel_clients = [client_stats.index[0], client_stats.index[-1]] if len(client_stats)>=2 else list(client_stats.index)

# Para servidores, mesma lógica
server_stats = df.groupby('server')['rtt_download_sec'].median().sort_values()
sel_servers = [server_stats.index[0], server_stats.index[-1]] if len(server_stats)>=2 else list(server_stats.index)

print("Clientes selecionados:", sel_clients)
print("Servidores selecionados:", sel_servers)

# Dados filtrados
df_c1 = df[df.client==sel_clients[0]]
df_c2 = df[df.client==sel_clients[-1]]
df_s1 = df[df.server==sel_servers[0]]
df_s2 = df[df.server==sel_servers[-1]]

len(df_c1), len(df_c2), len(df_s1), len(df_s2)


Clientes selecionados: ['client06', 'client10']
Servidores selecionados: ['server07', 'server05']


(533, 664, 3744, 545)

### Plots (hist, boxplot, scatter) — salvos em `/mnt/data/ndt_outputs/plots_*.png`

In [49]:

def save_hist(series, title, fname):
    plt.figure()
    plt.hist(series.dropna(), bins=40, density=True)
    plt.title(title)
    plt.xlabel(series.name)
    plt.ylabel('density')
    plt.tight_layout()
    plt.savefig(os.path.join(OUT_DIR, fname))
    plt.close()

def save_box(series_a, series_b, labels, title, fname):
    plt.figure()
    plt.boxplot([series_a.dropna(), series_b.dropna()], labels=labels, vert=True)
    plt.title(title)
    plt.tight_layout()
    plt.savefig(os.path.join(OUT_DIR, fname))
    plt.close()

def save_scatter(x, y, title, fname):
    plt.figure()
    plt.scatter(x, y, s=10, alpha=0.6)
    plt.title(title)
    plt.xlabel(x.name)
    plt.ylabel(y.name)
    plt.tight_layout()
    plt.savefig(os.path.join(OUT_DIR, fname))
    plt.close()


# Corrigir valores absurdos de throughput (erro de notação científica)
df.loc[df['download_throughput_bps'] > 1e10, 'download_throughput_bps'] /= 1e6
df.loc[df['upload_throughput_bps'] > 1e10, 'upload_throughput_bps'] /= 1e6

# Criar colunas convertidas para Mbps (valores reais para gráficos)
df['download_throughput_mbps'] = df['download_throughput_bps'] / 1e6
df['upload_throughput_mbps'] = df['upload_throughput_bps'] / 1e6

# Substituir as métricas antigas pelas novas (caso queira usar as versões corrigidas)
metrics = ['download_throughput_mbps', 'upload_throughput_mbps', 'rtt_download_sec', 'rtt_upload_sec', 'packet_loss_percent']

print("✅ Dados corrigidos e normalizados (valores em Mbps).")


# Exemplos de gráficos para clientes selecionados
for m in metrics:
    save_hist(df_c1[m], f"Hist {m} — {sel_clients[0]}", f"hist_{m}_{sel_clients[0]}.png")
    save_hist(df_c2[m], f"Hist {m} — {sel_clients[-1]}", f"hist_{m}_{sel_clients[-1]}.png")
    save_box(df_c1[m], df_c2[m], sel_clients, f"Boxplot {m} — clientes contrastantes", f"box_{m}_clients.png")

# Scatter: escolher par relevante (throughput vs RTT)
save_scatter(df['download_throughput_bps'], df['rtt_download_sec'], "Scatter: download_throughput_bps vs rtt_download_sec", "scatter_down_vs_rttd.png")
save_scatter(df['upload_throughput_bps'], df['rtt_upload_sec'], "Scatter: upload_throughput_bps vs rtt_upload_sec", "scatter_up_vs_rttu.png")

print("✅ Gráficos salvos em", OUT_DIR)


✅ Dados corrigidos e normalizados (valores em Mbps).
✅ Gráficos salvos em /Users/pv/Desktop/ndt_outputs


## 2) MLE — parametrizações e estimativas

In [50]:

# Utilitários de MLE sem SciPy

def mle_normal_params(x):
    mu = np.mean(x)
    sigma2 = np.mean((x - mu)**2)  # MLE
    return mu, sigma2

# Digamma e trigamma aproximadas (Abramowitz & Stegun style)
def digamma(x):
    x = float(x)
    result = 0.0
    while x < 6.0:
        result -= 1.0/x
        x += 1.0
    f = 1.0/(x*x)
    result += math.log(x) - 0.5/x - f*(1.0/12.0 - f*(1.0/120.0 - f*(1.0/252.0)))
    return result

def trigamma(x):
    x = float(x)
    result = 0.0
    while x < 6.0:
        result += 1.0/(x*x)
        x += 1.0
    f = 1.0/(x*x)
    result += 1.0/x + f*(1.0/2.0 + f*(1.0/6.0 - f*(1.0/30.0)))
    return result

def mle_gamma_k_beta(y, max_iter=50, tol=1e-8):
    y = np.asarray(y, dtype=float)
    y = y[y>0]
    n = y.size
    if n==0:
        return np.nan, np.nan
    mean_y = y.mean()
    mean_logy = np.mean(np.log(y))
    # Inicializa k via método dos momentos
    var_y = y.var(ddof=0)
    k = (mean_y**2)/var_y if var_y>0 else 1.0
    k = max(k, 1e-6)
    # Newton-Raphson para resolver: log(k) - digamma(k) = log(mean) - mean(log y)
    target = math.log(mean_y) - mean_logy
    for _ in range(max_iter):
        f = math.log(k) - digamma(k) - target
        g = 1.0/k - trigamma(k)
        step = f/g
        k_new = k - step
        if k_new <= 0:
            k_new = k/2
        if abs(k_new - k) < tol:
            k = k_new
            break
        k = k_new
    beta = k / mean_y
    return float(k), float(beta)

# Estimações
mu_rtt_d, s2_rtt_d = mle_normal_params(df['rtt_download_sec'].values)
mu_rtt_u, s2_rtt_u = mle_normal_params(df['rtt_upload_sec'].values)

k_down, beta_down = mle_gamma_k_beta(df['download_throughput_bps'].values)
k_up,   beta_up   = mle_gamma_k_beta(df['upload_throughput_bps'].values)

# Para perda: converter percent para contagem com nt=1000
NT = 1000
x_losses = np.rint((df['packet_loss_percent'].clip(lower=0)/100.0) * NT).astype(int)
p_mle = x_losses.sum() / (NT*len(x_losses))  # MLE da Binomial

mle_summary = pd.DataFrame({
    'model': ['Normal RTT download','Normal RTT upload','Gamma throughput download','Gamma throughput upload','Binomial perda (p)'],
    'theta1': [mu_rtt_d, mu_rtt_u, k_down, k_up, p_mle],
    'theta2': [s2_rtt_d, s2_rtt_u, beta_down, beta_up, np.nan]
})
mle_summary.to_csv(os.path.join(OUT_DIR,'mle_summary.csv'), index=False)
mle_summary


Unnamed: 0,model,theta1,theta2
0,Normal RTT download,0.048477,0.004606155
1,Normal RTT upload,0.037491,0.003735033
2,Gamma throughput download,1.49595,2.952534e-09
3,Gamma throughput upload,1.194078,3.157995e-09
4,Binomial perda (p),0.019002,


### Diagnósticos de ajuste (hist + densidade via simulação, QQ via simulação)

In [51]:

rng = np.random.default_rng(7)

def qq_plot_sim(data, sim_sampler, title, fname, n_sim=2000):
    data = np.asarray(data)
    data = data[~np.isnan(data)]
    if len(data)==0:
        return
    xq = np.quantile(data, np.linspace(0.01,0.99,99))
    sim = sim_sampler(n_sim)
    yq = np.quantile(sim, np.linspace(0.01,0.99,99))
    plt.figure()
    plt.scatter(xq, yq, s=10)
    mn, mx = min(xq.min(), yq.min()), max(xq.max(), yq.max())
    plt.plot([mn,mx],[mn,mx])
    plt.title(title)
    plt.xlabel("Dados (quantis)")
    plt.ylabel("Modelo (quantis)")
    plt.tight_layout()
    plt.savefig(os.path.join(OUT_DIR, fname))
    plt.close()

def overlay_hist_with_sim(data, sim_sampler, title, fname, bins=40):
    data = np.asarray(data)
    data = data[~np.isnan(data)]
    if len(data)==0:
        return
    plt.figure()
    plt.hist(data, bins=bins, density=True, alpha=0.6, label="dados")
    sim = sim_sampler(50000)
    plt.hist(sim, bins=bins, density=True, histtype='step', label="modelo")
    plt.legend()
    plt.title(title)
    plt.tight_layout()
    plt.savefig(os.path.join(OUT_DIR, fname))
    plt.close()

# RTT download ~ Normal(mu, s2)
overlay_hist_with_sim(df['rtt_download_sec'].values, lambda n: rng.normal(mu_rtt_d, math.sqrt(s2_rtt_d), size=n),
                      "RTT download — hist vs sim Normal", "diag_hist_rttd.png")
qq_plot_sim(df['rtt_download_sec'].values, lambda n: rng.normal(mu_rtt_d, math.sqrt(s2_rtt_d), size=n),
            "RTT download — QQ Normal (sim)", "diag_qq_rttd.png")

# RTT upload ~ Normal
overlay_hist_with_sim(df['rtt_upload_sec'].values, lambda n: rng.normal(mu_rtt_u, math.sqrt(s2_rtt_u), size=n),
                      "RTT upload — hist vs sim Normal", "diag_hist_rttu.png")
qq_plot_sim(df['rtt_upload_sec'].values, lambda n: rng.normal(mu_rtt_u, math.sqrt(s2_rtt_u), size=n),
            "RTT upload — QQ Normal (sim)", "diag_qq_rttu.png")

# Throughput ~ Gamma(k, beta) (rate)
overlay_hist_with_sim(df['download_throughput_bps'].values, lambda n: rng.gamma(shape=k_down, scale=1.0/beta_down, size=n),
                      "Throughput download — hist vs sim Gamma", "diag_hist_tdown.png")
qq_plot_sim(df['download_throughput_bps'].values, lambda n: rng.gamma(shape=k_down, scale=1.0/beta_down, size=n),
            "Throughput download — QQ Gamma (sim)", "diag_qq_tdown.png")

overlay_hist_with_sim(df['upload_throughput_bps'].values, lambda n: rng.gamma(shape=k_up, scale=1.0/beta_up, size=n),
                      "Throughput upload — hist vs sim Gamma", "diag_hist_tup.png")
qq_plot_sim(df['upload_throughput_bps'].values, lambda n: rng.gamma(shape=k_up, scale=1.0/beta_up, size=n),
            "Throughput upload — QQ Gamma (sim)", "diag_qq_tup.png")

print("✅ Diagnósticos salvos em", OUT_DIR)


✅ Diagnósticos salvos em /Users/pv/Desktop/ndt_outputs


## 3) Inferência Bayesiana (priors conjugadas) e preditiva

In [52]:

# Split 70/30 temporal
cut_idx = int(len(df)*0.7)
train = df.iloc[:cut_idx].copy()
test  = df.iloc[cut_idx:].copy()

# --- Normal-Normal (RTT) ---
# Prior fraca: mu0 = média de treino; tau0^2 = 10 * s2_mle (variância conhecida = s2_mle)
def normal_normal_posterior(train_series):
    x = train_series.values
    mu0 = float(np.mean(x))
    s2  = float(np.mean((x-mu0)**2))  # σ^2 "conhecida" ≈ MLE do treino
    tau0_2 = 10.0 * s2
    n = len(x)
    xbar = float(np.mean(x))
    tau_n2 = 1.0 / (1.0/tau0_2 + n/s2)
    mu_n   = tau_n2 * (mu0/tau0_2 + n*xbar/s2)
    return dict(mu0=mu0, s2=s2, tau0_2=tau0_2, mu_n=mu_n, tau_n2=tau_n2)

post_rtt_d = normal_normal_posterior(train['rtt_download_sec'])
post_rtt_u = normal_normal_posterior(train['rtt_upload_sec'])

# preditiva: N(mu_n, s2 + tau_n2)
def normal_predictive_stats(post):
    mean = post['mu_n']
    var  = post['s2'] + post['tau_n2']
    return mean, var

pred_mu_rtt_d, pred_var_rtt_d = normal_predictive_stats(post_rtt_d)
pred_mu_rtt_u, pred_var_rtt_u = normal_predictive_stats(post_rtt_u)

# --- Beta-Binomial (perda) ---
NT = 1000
x_train = np.rint((train['packet_loss_percent'].clip(lower=0)/100.0)*NT).astype(int)
a0, b0 = 1.0, 1.0  # uniforme
a_n = a0 + x_train.sum()
b_n = b0 + NT*len(x_train) - x_train.sum()

# Estatísticas preditivas (fração)
pred_mean_loss_frac = a_n/(a_n + b_n)
pred_var_loss_count = (NT * (a_n*b_n*(a_n + b_n + NT))) / (((a_n + b_n)**2) * (a_n + b_n + 1))
pred_var_loss_frac  = pred_var_loss_count / (NT**2)

# --- Gamma-Gamma (throughput) ---
def gamma_mle_k_beta(x):
    return mle_gamma_k_beta(x)

def gamma_gamma_posterior(train_series):
    y = train_series.values
    y = y[y>0]
    k_hat, beta_hat = gamma_mle_k_beta(y)
    # prior fraca para beta: Gamma(a0,b0)
    a0, b0 = 1.0, 1e-6
    a_n = a0 + len(y)*k_hat
    b_n = b0 + float(y.sum())
    return dict(k_hat=k_hat, a_n=a_n, b_n=b_n)

post_tdown = gamma_gamma_posterior(train['download_throughput_bps'])
post_tup   = gamma_gamma_posterior(train['upload_throughput_bps'])

def beta_prime_predictive_stats(k_hat, a_n, b_n):
    # média existe se a_n>1; var se a_n>2
    mean = (k_hat * b_n) / (a_n - 1) if a_n>1 else np.nan
    var  = (k_hat * (k_hat + a_n - 1) * (b_n**2)) / ((a_n - 1)**2 * (a_n - 2)) if a_n>2 else np.nan
    return mean, var

pred_mean_tdown, pred_var_tdown = beta_prime_predictive_stats(post_tdown['k_hat'], post_tdown['a_n'], post_tdown['b_n'])
pred_mean_tup,   pred_var_tup   = beta_prime_predictive_stats(post_tup['k_hat'],   post_tup['a_n'],   post_tup['b_n'])

# Avaliação vs teste
def eval_pred_vs_test(pred_mean, pred_var, test_series):
    m = float(np.mean(test_series))
    v = float(np.var(test_series, ddof=0))
    return dict(pred_mean=pred_mean, pred_var=pred_var, test_mean=m, test_var=v)

eval_rtt_d = eval_pred_vs_test(pred_mu_rtt_d, pred_var_rtt_d, test['rtt_download_sec'])
eval_rtt_u = eval_pred_vs_test(pred_mu_rtt_u, pred_var_rtt_u, test['rtt_upload_sec'])

test_loss_frac = (np.rint((test['packet_loss_percent'].clip(lower=0)/100.0)*NT)/NT).values
eval_loss = eval_pred_vs_test(pred_mean_loss_frac, pred_var_loss_frac, test_loss_frac)

eval_tdown = eval_pred_vs_test(pred_mean_tdown, pred_var_tdown, test['download_throughput_bps'])
eval_tup   = eval_pred_vs_test(pred_mean_tup,   pred_var_tup,   test['upload_throughput_bps'])

bayes_eval = pd.DataFrame([
    {'var':'RTT download', **eval_rtt_d},
    {'var':'RTT upload', **eval_rtt_u},
    {'var':'Loss fraction', **eval_loss},
    {'var':'Throughput download', **eval_tdown},
    {'var':'Throughput upload', **eval_tup},
])
bayes_eval.to_csv(os.path.join(OUT_DIR,'bayes_predictive_eval.csv'), index=False)
bayes_eval


Unnamed: 0,var,pred_mean,pred_var,test_mean,test_var
0,RTT download,0.04982119,0.004924732,0.04534205,0.003851082
1,RTT upload,0.03898611,0.004046071,0.03400305,0.002993802
2,Loss fraction,0.022637,2.212902e-05,0.0105193,0.0004801272
3,Throughput download,492360800.0,1.696509e+17,540046300.0,1.112477e+17
4,Throughput upload,367120500.0,1.089246e+17,403760900.0,9.809829e+16


## 4) Comparação MLE vs Bayes (pontuais)

In [53]:

comp = {
    'RTT download (mu)': {'MLE': float(np.mean(df['rtt_download_sec'])), 'Bayes E[mu|r]': float(post_rtt_d['mu_n'])},
    'RTT upload (mu)':   {'MLE': float(np.mean(df['rtt_upload_sec'])), 'Bayes E[mu|r]': float(post_rtt_u['mu_n'])},
    'Loss p':            {'MLE': float((np.rint((df['packet_loss_percent'].clip(lower=0)/100.0)*NT).sum())/(NT*len(df))), 'Bayes E[p|r]': float(a_n/(a_n+b_n))},
    'Tput down (beta)':  {'MLE': float(mle_gamma_k_beta(df['download_throughput_bps'].values)[1]), 'Bayes E[beta|r]': float(post_tdown['a_n']/post_tdown['b_n'])},
    'Tput up (beta)':    {'MLE': float(mle_gamma_k_beta(df['upload_throughput_bps'].values)[1]),   'Bayes E[beta|r]': float(post_tup['a_n']/post_tup['b_n'])},
}
comp_df = pd.DataFrame(comp).T
comp_df.to_csv(os.path.join(OUT_DIR,'mle_vs_bayes_point_estimates.csv'))
comp_df


Unnamed: 0,MLE,Bayes E[mu|r],Bayes E[p|r],Bayes E[beta|r]
RTT download (mu),0.04847744,0.049821,,
RTT upload (mu),0.03749119,0.038986,,
Loss p,0.01900155,,0.022637,
Tput down (beta),2.952534e-09,,,2.903605e-09
Tput up (beta),3.157995e-09,,,3.372188e-09


## 5) Exporta um resumo rápido em CSVs

In [54]:

summary_paths = {
    "EDA por cliente": os.path.join(OUT_DIR,'eda_by_client.csv'),
    "EDA por servidor": os.path.join(OUT_DIR,'eda_by_server.csv'),
    "MLE resumo": os.path.join(OUT_DIR,'mle_summary.csv'),
    "Bayes preditiva vs teste": os.path.join(OUT_DIR,'bayes_predictive_eval.csv'),
    "Comparação MLE vs Bayes": os.path.join(OUT_DIR,'mle_vs_bayes_point_estimates.csv'),
}
summary_paths


{'EDA por cliente': '/Users/pv/Desktop/ndt_outputs/eda_by_client.csv',
 'EDA por servidor': '/Users/pv/Desktop/ndt_outputs/eda_by_server.csv',
 'MLE resumo': '/Users/pv/Desktop/ndt_outputs/mle_summary.csv',
 'Bayes preditiva vs teste': '/Users/pv/Desktop/ndt_outputs/bayes_predictive_eval.csv',
 'Comparação MLE vs Bayes': '/Users/pv/Desktop/ndt_outputs/mle_vs_bayes_point_estimates.csv'}