# Task 1.4 & 1.5 — Nelder–Mead and DIRECT (Minimal)
*Libraries*: only `numpy`, `math`, `time`, `matplotlib`.

**What this notebook does**
- Implements Nelder–Mead (simplex local search) and DIRECT (global partitioning) following the handout.
- Runs both on Ackley, Branin, Rosenbrock (coef 5), Rastrigin (2D).
- Prints a console results table with columns: `function, start, method, x found, f(x found), iterations, f evals, time s`.
- Saves trajectory plots:
  - Nelder–Mead: contour + simplex vertices + centroid path → `plot <func> nelder trajectory.png`
  - DIRECT: sampled centers (scatter) + best point → `plot <func> direct samples.png`


In [1]:
# Cell 1 — Imports & helpers
import numpy as np
import math, time, os
import matplotlib.pyplot as plt

plt.rcParams.update({"figure.dpi": 120})

def ensure_dir(path):
    d = os.path.dirname(path)
    if d and not os.path.exists(d):
        os.makedirs(d)

def savefig(path):
    ensure_dir(path)
    plt.savefig(path, bbox_inches="tight")
    plt.close()

def fmt_vec(v):
    return f"({v[0]:.6f}, {v[1]:.6f})"

def fmt_s(x):
    return f"{x:.6f}"

def print_table(rows, header):
    print(header)
    for r in rows:
        # r = (function, start_tuple, method, x_found_vec2, fval, iterations, fevals, time_sec)
        print(f"{r[0]:10s} | {r[1]} | {r[2]:14s} | {fmt_vec(r[3])} | {fmt_s(r[4])} | {r[5]:10d} | {r[6]:7d} | {fmt_s(r[7])}")

In [2]:
# Cell 2 — Test functions, starts (NM), bounds (DIRECT)
def ackley(xy):
    x, y = xy
    term1 = -20.0 * math.exp(-0.2 * math.sqrt(0.5*(x*x + y*y)))
    term2 = -math.exp(0.5*(math.cos(2*math.pi*x)+math.cos(2*math.pi*y)))
    return term1 + term2 + math.e + 20.0

def branin(xy):
    x, y = xy
    a = 1.0; b = 5.1/(4*math.pi**2); c = 5.0/math.pi; r = 6.0; s = 10.0; t = 1.0/(8*math.pi)
    return a*(y - b*x*x + c*x - r)**2 + s*(1 - t)*math.cos(x) + s

def rosenbrock(xy):
    x, y = xy
    return (1.0 - x)**2 + 5.0*(y - x*x)**2

def rastrigin(xy):
    x, y = xy
    return 20.0 + (x*x - 10.0*math.cos(2*math.pi*x)) + (y*y - 10.0*math.cos(2*math.pi*y))

KNOWN_MINIMA = {
    "ackley": [(0.0, 0.0)],
    "branin": [(math.pi, 2.275)],
    "rosenbrock": [(1.0, 1.0)],
    "rastrigin": [(0.0, 0.0)]
}

STARTS_NM = {
    "ackley":     np.array([0.0, 1.0]),
    "branin":     np.array([2.0, 2.0]),
    "rosenbrock": np.array([-1.5, 2.0]),
    "rastrigin":  np.array([2.5, 2.5])
}

BOUNDS_DIR = {
    "ackley":     (np.array([-2.0, -3.0]), np.array([4.0, 3.0])),
    "branin":     (np.array([0.0, 0.0]),   np.array([4.0, 4.0])),
    "rosenbrock": (np.array([-5.0, 0.0]),  np.array([2.0, 4.0])),
    "rastrigin":  (np.array([-1.0, -1.0]), np.array([6.0, 6.0]))
}

FUNS = {"ackley": ackley, "branin": branin, "rosenbrock": rosenbrock, "rastrigin": rastrigin}

In [3]:
# Cell 3 — Evaluation wrapper with counting
class FWrap:
    def __init__(self, f, name):
        self.f = f; self.name = name; self.evals = 0
    def __call__(self, x):
        self.evals += 1
        return float(self.f(np.array(x, float)))

In [4]:
# Cell 4 — Nelder–Mead (centroid path + simplex vertices)
def nelder_mead(fw, x0, eps=1e-6, kmax=100, alpha=1.0, beta=2.0, gamma=0.5, delta=0.05):
    x0 = np.array(x0, float)
    n = len(x0)
    S = [x0.copy()]
    for i in range(n):
        e = np.zeros(n); e[i] = 1.0
        S.append(x0 + delta * e)
    S = np.array(S)
    y = np.array([fw(S[i]) for i in range(n+1)])
    it = 0
    centroids = []
    simplices = []
    while it < kmax and np.std(y) > eps:
        p = np.argsort(y); S = S[p]; y = y[p]
        xl, yl = S[0], y[0]
        xh, yh = S[-1], y[-1]
        xs, ys = S[-2], y[-2]
        xm = np.mean(S[:-1], axis=0)
        centroids.append(xm.copy()); simplices.append(S.copy())
        xr = xm + alpha*(xm - xh); yr = fw(xr)
        if yr < yl:
            xe = xm + beta*(xr - xm); ye = fw(xe)
            if ye < yr: S[-1], y[-1] = xe, ye
            else:       S[-1], y[-1] = xr, yr
        elif yr >= ys:
            if yr < yh:
                xh, yh = xr, yr
                S[-1], y[-1] = xr, yr
            xc = xm + gamma*(xh - xm); yc = fw(xc)
            if yc > yh:
                for i in range(1, n+1):
                    S[i] = (S[i] + xl)/2.0
                    y[i] = fw(S[i])
            else:
                S[-1], y[-1] = xc, yc
        else:
            S[-1], y[-1] = xr, yr
        it += 1
    p = np.argsort(y); S = S[p]; y = y[p]
    xm = np.mean(S[:-1], axis=0)
    centroids.append(xm.copy()); simplices.append(S.copy())
    return {'x': S[0], 'f': y[0], 'iters': it, 'fevals': fw.evals,
            'centroids': np.array(centroids), 'simplices': simplices}

In [5]:
# Cell 5 — Plotting Nelder–Mead trajectory

def _ab_from_unit(xu, a, b):  # unit -> original
    return a + xu * (b - a)

def _half_diag(depths):
    d = np.array(depths, dtype=float)
    return 0.5 * np.linalg.norm(3.0 ** (-d))

def _divide_min_depths(fu, c, y, depths):
    depths = list(depths)
    dmin = min(depths)
    I = [i for i, di in enumerate(depths) if di == dmin]
    out = []

    # 计算每个被切分维度的位移步长 δ_i = 3^{-(depth_i+1)}
    steps = {}
    for i in I:
        steps[i] = 3.0 ** (-(depths[i] + 1))

    # 子中心：c ± δ e_i
    for i in I:
        e = np.zeros_like(c); e[i] = 1.0
        ci1 = c + steps[i] * e
        ci2 = c - steps[i] * e
        yi1 = fu(ci1); yi2 = fu(ci2)
        d_new = depths.copy(); d_new[i] += 1
        out.append([ci1, yi1, d_new])
        out.append([ci2, yi2, d_new])

    # 保留中心 c，并对所有被切分维度把深度 +1
    d_keep = depths.copy()
    for i in I:
        d_keep[i] += 1
    out.append([c, y, d_keep])
    return out

def _select_potentially_optimal(intervals, eps, fbest):
    pts = []
    for idx, (c, y, depths) in enumerate(intervals):
        di = _half_diag(depths)
        pts.append((di, y, idx))
    pts.sort(key=lambda t: (t[0], t[1]))  # d 升序, f 升序

    # 构造 lower convex hull（单调栈），加上 d 相等时的去重与除零保护
    hull = []
    for (d, f, idx) in pts:
        while len(hull) >= 2:
            d1, f1, i1 = hull[-1]
            d2, f2, i2 = hull[-2]
            # 避免除零：若 d==d1==d2，保留较小 f 的那个
            if abs((d1 - d2)) < 1e-14:
                if f1 >= f: hull.pop()
                else: break
                continue
            if abs((d - d1)) < 1e-14:  # 与上一个 d 相同
                if f1 <= f:  # 新点更差，丢掉新点
                    break
                else:        # 新点更好，丢掉旧点
                    hull.pop()
                    continue
            # 斜率比较，保证下凸（lower）结构
            slope1 = (f1 - f2) / (d1 - d2)
            slope2 = (f - f1) / (d - d1)
            if slope2 >= slope1 - 1e-18:
                hull.pop()
            else:
                break
        hull.append((d, f, idx))

    # 容差过滤：只保留足够好的点
    thr = fbest + eps * abs(fbest)
    sel = [intervals[idx] for (_, f, idx) in hull if f <= thr]
    # 至少要返回一个
    if not sel:
        # 返回 f 最小的一个
        best_idx = min(range(len(intervals)), key=lambda j: intervals[j][1])
        sel = [intervals[best_idx]]
    return sel

def direct(fw, a, b, eps=1e-6, kmax=150):
    a = np.array(a, float); b = np.array(b, float); n = len(a)
    assert n == 2, "This minimal implementation expects 2D."

    def fu(xu):
        x = _ab_from_unit(xu, a, b)
        return fw(x)

    # 初始化
    c0 = np.full(n, 0.5)
    intervals = [[c0, fu(c0), [0]*n]]
    samples = [c0.copy()]
    fbest = intervals[0][1]
    cbest = c0.copy()

    it = 0
    while it < kmax:
        # 选择“潜在最优”区间
        S = _select_potentially_optimal(intervals, eps, fbest)

        # 从列表中移除 S（避免 numpy 比较歧义，用 id 判等）
        S_ids = set(id(iv) for iv in S)
        keep = [iv for iv in intervals if id(iv) not in S_ids]

        # 对每个被选区间做三等分
        new_list = []
        for (c, y, depths) in S:
            divs = _divide_min_depths(fu, c, y, depths)
            for (ci, yi, di) in divs:
                new_list.append([ci, yi, di])
                samples.append(ci.copy())
                if yi < fbest:
                    fbest, cbest = yi, ci.copy()

        # 更新集合
        intervals = keep + new_list
        it += 1

    x_best = _ab_from_unit(cbest, a, b)
    return {'x': x_best, 'f': float(fbest), 'iters': it, 'fevals': fw.evals, 'samples': np.array(samples)}


In [6]:
# Cell 6 — Plotting (NM trajectory and DIRECT samples)
def contour_with_fun(fun, xmin, xmax, ymin, ymax, N=250):
    X = np.linspace(xmin, xmax, N); Y = np.linspace(ymin, ymax, N)
    XX, YY = np.meshgrid(X, Y); ZZ = np.zeros_like(XX)
    for i in range(N):
        for j in range(N):
            ZZ[i,j] = fun(np.array([XX[i,j], YY[i,j]]))
    plt.contour(XX, YY, ZZ, levels=30)
    return X, Y

def plot_nm(fun, res, fname, minima, N=250):
    # 收集顶点与终点
    xs, ys = [], []
    for S in res['simplices']:
        xs += [p[0] for p in S]; ys += [p[1] for p in S]
    xs += [res['x'][0]]; ys += [res['x'][1]]
    for (mx, my) in minima:
        xs.append(mx); ys.append(my)

    # 自适应窗口 + 正方形 + 安全边距
    x_min, x_max = min(xs), max(xs)
    y_min, y_max = min(ys), max(ys)
    span_x = max(x_max - x_min, 1e-6)
    span_y = max(y_max - y_min, 1e-6)
    span   = max(span_x, span_y)         # 用较大跨度做正方形
    cx, cy = 0.5*(x_min+x_max), 0.5*(y_min+y_max)
    pad    = 0.10 * span                 # 10% 边距
    xmin, xmax = cx - 0.5*span - pad, cx + 0.5*span + pad
    ymin, ymax = cy - 0.5*span - pad, cy + 0.5*span + pad

    # 等高线
    X = np.linspace(xmin, xmax, N); Y = np.linspace(ymin, ymax, N)
    XX, YY = np.meshgrid(X, Y); ZZ = np.zeros_like(XX)
    for i in range(N):
        for j in range(N):
            ZZ[i, j] = fun(np.array([XX[i, j], YY[i, j]]))
    plt.figure()
    plt.contour(XX, YY, ZZ, levels=30)

    # 画 simplex 边 & 重心轨迹
    for S in res['simplices']:
        P = np.vstack([S, S[0]])
        plt.plot(P[:, 0], P[:, 1], marker='o', linewidth=1, alpha=0.8)
    C = res['centroids']
    plt.plot(C[:, 0], C[:, 1], marker='x', linewidth=1.5, label='centroid path')

    # 关键点
    plt.scatter([res['simplices'][0][0, 0]], [res['simplices'][0][0, 1]], marker='s', s=40, label='start (vertex)')
    plt.scatter([res['x'][0]], [res['x'][1]], marker='*', s=80, label='final')
    for (mx, my) in minima:
        plt.scatter([mx], [my], marker='+', s=60, label='global min')

    # 关键：等比例坐标 + 固定窗口
    ax = plt.gca()
    ax.set_aspect('equal', adjustable='box')
    ax.set_xlim(xmin, xmax); ax.set_ylim(ymin, ymax)

    plt.legend(); plt.title('Nelder–Mead'); plt.xlabel('x'); plt.ylabel('y')
    savefig(fname)

def plot_direct(fun, res, fname, a, b, minima):
    # 将采样中心从 unit 立方映射回原区间
    su = res['samples']
    so = a + su * (b - a)

    # 动态计算显示窗口（保证方形比例）
    x_min, x_max = np.min(so[:,0]), np.max(so[:,0])
    y_min, y_max = np.min(so[:,1]), np.max(so[:,1])
    for (mx, my) in minima:
        x_min, x_max = min(x_min, mx), max(x_max, mx)
        y_min, y_max = min(y_min, my), max(y_max, my)
    span = max(x_max - x_min, y_max - y_min)
    cx, cy = 0.5*(x_min + x_max), 0.5*(y_min + y_max)
    pad = 0.1 * span
    xmin, xmax = cx - 0.5*span - pad, cx + 0.5*span + pad
    ymin, ymax = cy - 0.5*span - pad, cy + 0.5*span + pad

    # 绘制等高线和采样点
    plt.figure()
    contour_with_fun(fun, xmin, xmax, ymin, ymax, N=200)
    plt.scatter(so[:,0], so[:,1], s=10, alpha=0.6, label='sampled centers')
    plt.scatter([res['x'][0]], [res['x'][1]], marker='*', s=80, label='best found')
    for (mx, my) in minima:
        plt.scatter([mx], [my], marker='+', s=60, label='global min')

    ax = plt.gca()
    ax.set_aspect('equal', adjustable='box')
    ax.set_xlim(xmin, xmax)
    ax.set_ylim(ymin, ymax)
    plt.legend()
    plt.title('DIRECT samples')
    plt.xlabel('x'); plt.ylabel('y')
    plt.savefig(fname, bbox_inches='tight')
    plt.close()




In [7]:
# Cell 7 — Run experiments (save plots + print results)
rows = []
eps = 1e-6; kmax_nm = 100; kmax_dir = 150

for name in ['ackley', 'branin', 'rosenbrock', 'rastrigin']:
    f = FUNS[name]

    # ---------- Nelder–Mead ----------
    x0 = STARTS_NM[name]
    fw_nm = FWrap(f, name + '_nm')
    t0 = time.time()
    res_nm = nelder_mead(fw_nm, x0, eps=eps, kmax=kmax_nm, delta=0.05)
    t1 = time.time()
    plot_nm(f, res_nm, f"plot_{name}_Nelder_trajectory.png", KNOWN_MINIMA[name])
    rows.append((
        name,
        (float(x0[0]), float(x0[1])),
        'Nelder–Mead',
        res_nm['x'],
        res_nm['f'],
        res_nm['iters'],
        fw_nm.evals,
        t1 - t0
    ))

    # ---------- DIRECT ----------
    a, b = BOUNDS_DIR[name]
    fw_dir = FWrap(f, name + '_dir')
    t0 = time.time()
    res_dir = direct(fw_dir, a, b, eps=eps, kmax=kmax_dir)
    t1 = time.time()
    #plot_direct(f, res_dir, f"plot_{name}_direct_samples.png", a, b, KNOWN_MINIMA[name])
    rows.append((
        name,
        (tuple(a.tolist()), tuple(b.tolist())),  # 记录 DIRECT 的搜索边界
        'DIRECT',
        res_dir['x'],
        res_dir['f'],
        res_dir['iters'],
        fw_dir.evals,
        t1 - t0
    ))

print_table(
    rows,
    "function   | start           | method         | x found                 | f(x found) | iterations | f evals | time s"
)


KeyboardInterrupt: 

In [None]:
# Cell 8 — Comparative check (table only)
comp_rows = []
by_func = {}
for r in rows:
    by_func.setdefault(r[0], []).append(r)

print("function   | f(NM)       | evals(NM) | time(NM) | f(DIRECT)  | evals(DIR) | time(DIR)")
for name in ['ackley','branin','rosenbrock','rastrigin']:
    nm = next(rr for rr in by_func[name] if rr[2].startswith('Nelder'))
    dr = next(rr for rr in by_func[name] if rr[2].startswith('DIRECT'))
    print(f"{name:10s} | {nm[4]:.6e} | {nm[6]:9d} | {nm[7]:.4f} | {dr[4]:.6e} | {dr[6]:9d} | {dr[7]:.4f}")

function   | f(NM)       | evals(NM) | time(NM) | f(DIRECT)  | evals(DIR) | time(DIR)
ackley     | 2.579929e+00 |        37 | 0.0044 | 5.913550e-07 |       847 | 0.0207
branin     | 3.978889e-01 |        54 | 0.0022 | 4.010529e-01 |     28259 | 7.9408
rosenbrock | 1.161162e-06 |        86 | 0.0000 | 6.085802e+00 |     29197 | 7.9557
rastrigin  | 1.790920e+01 |        67 | 0.0000 | 2.000491e+00 |     27271 | 7.1457
