In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

: 

In [14]:
# Đọc dữ liệu
serial = pd.read_csv("gwo_serial.csv")
parallel = pd.read_csv("gwo_parallel.csv")
# print(serial.head())
# print(parallel.head())

In [15]:
# Ghép dữ liệu serial & parallel
merged = parallel.merge(
    serial[["problem", "N", "POP_SIZE", "avg_ms"]],
    on=["problem", "N", "POP_SIZE"],
    suffixes=("_par", "_ser")
)


# Tính speedup & efficiency
merged["speedup"] = merged["avg_ms_ser"] / merged["avg_ms_par"]
merged["efficiency"] = merged["speedup"] / merged["threads"]

# Lưu lại kết quả
merged.to_csv("gwo_summary.csv", index=False)

print("== Một vài dòng kết quả tính speedup/efficiency ==")
print(merged[["problem", "N", "POP_SIZE", "threads",
              "avg_ms_ser", "avg_ms_par",
              "speedup", "efficiency"]].head())


# Vẽ biểu đồ speedup & efficiency cho từng cặp (problem, N, POP_SIZE)
out_dir = Path("plots")
out_dir.mkdir(exist_ok=True)

configs = merged[["problem", "N", "POP_SIZE"]].drop_duplicates()

for _, row in configs.iterrows():
    prob = row["problem"]
    N = row["N"]
    pop = row["POP_SIZE"]

    sub = merged[
        (merged["problem"] == prob) &
        (merged["N"] == N) &
        (merged["POP_SIZE"] == pop)
    ].sort_values("threads")

    if sub.empty:
        continue

    # Threads phải là số nguyên để dùng làm ticks
    threads = sub["threads"].astype(int).to_list()

    # ----- Speedup -----
    plt.figure()
    plt.plot(threads, sub["speedup"], marker="o")
    plt.xlabel("Threads")
    plt.ylabel("Speedup")
    plt.title(f"{prob} - N={N}, POP={pop} - Speedup")

    plt.xticks(threads)  # ép trục X hiển thị đúng số luồng
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(out_dir / f"speedup_{prob}_N{N}_POP{pop}.png", dpi=150)
    plt.close()

    # ----- Efficiency -----
    plt.figure()
    plt.plot(threads, sub["efficiency"], marker="o")
    plt.xlabel("Threads")
    plt.ylabel("Efficiency")
    plt.title(f"{prob} - N={N}, POP={pop} - Efficiency")

    plt.xticks(threads)  # ép integer ticks
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(out_dir / f"efficiency_{prob}_N{N}_POP{pop}.png", dpi=150)
    plt.close()

print("Đã lưu:")
print("  - gwo_summary.csv")
print("  - các biểu đồ speedup_*.png, efficiency_*.png trong thư mục 'plots'")


== Một vài dòng kết quả tính speedup/efficiency ==
     problem   N  POP_SIZE  threads  avg_ms_ser  avg_ms_par   speedup  \
0  Rastrigin  10        50        1        26.4        37.8  0.698413   
1  Rastrigin  10        50        2        26.4        93.3  0.282958   
2  Rastrigin  10        50        4        26.4       114.9  0.229765   
3  Rastrigin  10        50        8        26.4       162.8  0.162162   
4  Rastrigin  10        50       16        26.4       290.1  0.091003   

   efficiency  
0    0.698413  
1    0.141479  
2    0.057441  
3    0.020270  
4    0.005688  
Đã lưu:
  - gwo_summary.csv
  - các biểu đồ speedup_*.png, efficiency_*.png trong thư mục 'plots'


In [17]:
merged = parallel.merge(
    serial,
    on=["problem", "N", "POP_SIZE"],
    suffixes=("_par", "_ser")
)

merged["speedup"] = merged["avg_ms_ser"] / merged["avg_ms_par"]
merged["efficiency"] = merged["speedup"] / merged["threads"]

out = Path("plots_analysis")
out.mkdir(exist_ok=True)

# ---------------------------------------------------------
# 1) Efficiency vs POP (fix N)
# ---------------------------------------------------------
for N in sorted(merged["N"].unique()):
    sub = merged[(merged["N"] == N) & (merged["threads"] == max(merged["threads"]))]
    plt.figure(figsize=(6,4))
    plt.plot(sub["POP_SIZE"], sub["efficiency"], marker="o")
    plt.title(f"Efficiency vs POP (fixed N={N})")
    plt.xlabel("Population Size (POP)")
    plt.ylabel("Efficiency")
    plt.grid(True)
    plt.savefig(out / f"eff_vs_pop_N{N}.png", dpi=150)
    plt.close()

# ---------------------------------------------------------
# 2) Efficiency vs N (fix POP)
# ---------------------------------------------------------
for pop in sorted(merged["POP_SIZE"].unique()):
    sub = merged[(merged["POP_SIZE"] == pop) & (merged["threads"] == max(merged["threads"]))]
    plt.figure(figsize=(6,4))
    plt.plot(sub["N"], sub["efficiency"], marker="o")
    plt.title(f"Efficiency vs N (fixed POP={pop})")
    plt.xlabel("N (dimensions)")
    plt.ylabel("Efficiency")
    plt.grid(True)
    plt.savefig(out / f"eff_vs_N_POP{pop}.png", dpi=150)
    plt.close()

# ---------------------------------------------------------
# 3) Heatmap N × POP → Efficiency (threads = max)
# ---------------------------------------------------------
heat = merged[merged["threads"] == merged["threads"].max()]
pivot = heat.pivot_table(
    index="N",
    columns="POP_SIZE",
    values="efficiency"
)

plt.figure(figsize=(8,6))
sns.heatmap(pivot, annot=True, fmt=".2f", cmap="viridis")
plt.title("Efficiency Heatmap (higher = better parallel speedup)")
plt.savefig(out / "heatmap_efficiency.png", dpi=150)
plt.close()
