## <font color='blue'>Neural net experiments</font>

In [3]:
# Neural Net Experiments for Homework 10, Question 5
# --------------------------------------------------
# This script automates the experiments described in Question 5.
# It uses the provided helper functions and model structure, and
# extends them for the two-hidden-layer case. Results are printed
# for LaTeX table inclusion.

import numpy as np
import torch
import matplotlib.pyplot as plt
import os

# --------------------------
# Helper Functions (from test.tex)
# --------------------------

def load_data(datafile):
    data = np.loadtxt(datafile)
    n, p = data.shape
    rawx = data[:, 0:2]
    rawy = data[:, 2]
    x = torch.tensor(rawx, dtype=torch.float)
    y = torch.reshape(torch.tensor((rawy + 1.0) / 2.0, dtype=torch.float), [n, 1])
    return x, y

def plot_data(x, y, show=True):
    x_min = min(x[:, 0]) - 1
    x_max = max(x[:, 0]) + 1
    y_min = min(x[:, 1]) - 1
    y_max = max(x[:, 1]) + 1
    pos = (torch.squeeze(y) == 1)
    neg = (torch.squeeze(y) == 0)
    plt.plot(x[pos, 0], x[pos, 1], 'ro')
    plt.plot(x[neg, 0], x[neg, 1], 'k^')
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    if show:
        plt.show()

def plot_boundary(x, y, model, title=None, show=True):
    x_min = min(x[:, 0]) - 1
    x_max = max(x[:, 0]) + 1
    y_min = min(x[:, 1]) - 1
    y_max = max(x[:, 1]) + 1
    delta = 0.05
    xx, yy = np.meshgrid(np.arange(x_min, x_max, delta), np.arange(y_min, y_max, delta))
    grid = np.c_[xx.ravel(), yy.ravel()]
    gn, gp = grid.shape
    Z = np.zeros(gn)
    for i in range(gn):
        pred = model(torch.tensor(grid[i, :], dtype=torch.float))
        Z[i] = int(pred > 0.5)
    Z = Z.reshape(xx.shape)
    plt.pcolormesh(xx, yy, Z, cmap=plt.cm.PRGn, vmin=-3, vmax=3)
    pos = (torch.squeeze(y) == 1)
    neg = (torch.squeeze(y) == 0)
    plt.plot(x[pos, 0], x[pos, 1], 'ro')
    plt.plot(x[neg, 0], x[neg, 1], 'k^')
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    if title is not None:
        plt.title(title)
    if show:
        plt.show()

def error_rate(y1, y2):
    sum = 0.0
    for i in range(0, y1.size()[0]):
        sum += ((y1[i] - 0.5) * (y2[i] - 0.5) <= 0.0)
    return int(sum)

# --------------------------
# Model Definitions
# --------------------------

def make_one_hidden_layer_model(input_dim, hidden_dim):
    # Returns a model with one hidden layer of size hidden_dim
    return torch.nn.Sequential(
        torch.nn.Linear(input_dim, hidden_dim),
        torch.nn.ReLU(),
        torch.nn.Linear(hidden_dim, 1),
        torch.nn.Sigmoid()
    )

def make_two_hidden_layer_model(input_dim, hidden_dim):
    # Returns a model with two hidden layers, each of size hidden_dim
    return torch.nn.Sequential(
        torch.nn.Linear(input_dim, hidden_dim),
        torch.nn.ReLU(),
        torch.nn.Linear(hidden_dim, hidden_dim),
        torch.nn.ReLU(),
        torch.nn.Linear(hidden_dim, 1),
        torch.nn.Sigmoid()
    )

# --------------------------
# Training Function
# --------------------------

def train_model(model, x, y, max_iters=20000, tol=1e-4, learning_rate=0.5, print_every=1000):
    loss_fn = torch.nn.BCELoss()
    prev_loss = 1.0
    done = False
    t = 1
    best_model_state = None
    best_error = y.size()[0]
    best_iter = 0
    while not done and t < max_iters:
        y_pred = model(x)
        t += 1
        loss = loss_fn(y_pred, y)
        err = error_rate(y_pred, y)
        if err < best_error:
            best_error = err
            best_model_state = {k: v.clone() for k, v in model.state_dict().items()}
            best_iter = t
        if t % print_every == 0:
            print(f"Iteration {t}: loss {loss.item():.5f} errors {err}")
            if abs(prev_loss - loss.item()) < tol:
                done = True
            prev_loss = loss.item()
        model.zero_grad()
        loss.backward()
        with torch.no_grad():
            for param in model.parameters():
                param -= learning_rate * (1.0 / np.sqrt(t)) * param.grad
    # Restore best model
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    return model, best_iter, best_error

# --------------------------
# Experiment 1: Five Datasets, One Hidden Layer
# --------------------------

def experiment_one_hidden_layer(data_files, H_values, runs_per_setting=5, plot_dir="plots"):
    results = []
    os.makedirs(plot_dir, exist_ok=True)
    for datafile in data_files:
        x, y = load_data(datafile)
        for H in H_values:
            best_error = y.size()[0]
            best_model = None
            best_iter = 0
            for run in range(runs_per_setting):
                torch.manual_seed(run)  # For reproducibility
                model = make_one_hidden_layer_model(2, H)
                model, iters, err = train_model(model, x, y)
                if err < best_error:
                    best_error = err
                    best_model = model
                    best_iter = iters
            # Plot and save the best model for this setting
            plot_path = os.path.join(plot_dir, f"{os.path.splitext(datafile)[0]}_H{H}.png")
            plot_boundary(x, y, best_model, title=f"{datafile}, H={H}", show=False)
            plt.savefig(plot_path)
            plt.close()
            results.append({
                "data": datafile,
                "H": H,
                "iterations": best_iter,
                "error_rate": best_error,
                "plot_path": plot_path
            })
            print(f"Best for {datafile} H={H}: error={best_error}, iterations={best_iter}")
    return results

# --------------------------
# Experiment 2: Noisy Data, Two Hidden Layers
# --------------------------

def generate_noisy_data(n=800, seed=0):
    np.random.seed(seed)
    X_train = np.random.rand(n, 2)
    x1 = X_train[:, 0]
    x2 = X_train[:, 1]
    y_train = ((np.exp(-((x1 - 0.5) * 6) ** 2) * 2 * ((x1 - 0.5) * 6) + 1) / 2 - x2) > 0
    idx = np.random.choice(range(n), size=(int(n * 0.03),))
    y_train[idx] = ~y_train[idx]
    x = torch.tensor(X_train, dtype=torch.float) * 10
    y = torch.reshape(torch.tensor(y_train, dtype=torch.float), [n, 1])
    return x, y

def experiment_two_hidden_layers(H, runs=10, error_threshold=100, plot_dir="plots"):
    x, y = generate_noisy_data()
    best_error = y.size()[0]
    best_model = None
    best_iter = 0
    for run in range(runs):
        torch.manual_seed(run)
        model = make_two_hidden_layer_model(2, H)
        model, iters, err = train_model(model, x, y)
        if err < best_error:
            best_error = err
            best_model = model
            best_iter = iters
        if best_error < error_threshold:
            break
    plot_path = os.path.join(plot_dir, f"noisy_H{H}.png")
    plot_boundary(x, y, best_model, title=f"Noisy Data, H={H}", show=False)
    plt.savefig(plot_path)
    plt.close()
    print(f"Best for noisy data H={H}: error={best_error}, iterations={best_iter}")
    return {
        "data": "noisy",
        "H": H,
        "iterations": best_iter,
        "error_rate": best_error,
        "plot_path": plot_path
    }

# --------------------------
# Main Execution
# --------------------------

if __name__ == "__main__":
    # Experiment 1: Five datasets, two H values each
    data_files = [f"data{i}.txt" for i in range(1, 6)]
    H_values = [2, 4]  # Example values; adjust as needed
    results_one = experiment_one_hidden_layer(data_files, H_values, runs_per_setting=5)

    # Experiment 2: Noisy data, two hidden layers
    results_two = []
    for H in [2, 4]:
        res = experiment_two_hidden_layers(H, runs=10, error_threshold=100)
        results_two.append(res)

    # Print summary for LaTeX table
    print("\nSummary Table (for LaTeX):")
    print("data\tH value\titerations\terror rate")
    for r in results_one + results_two:
        print(f"{r['data']}\t{r['H']}\t{r['iterations']}\t{r['error_rate']}")

    print("\nPlots saved in 'plots/' directory.")

Iteration 1000: loss 0.61841 errors 16
Iteration 2000: loss 0.61774 errors 16
Iteration 3000: loss 0.61751 errors 16
Iteration 4000: loss 0.61740 errors 16
Iteration 5000: loss 0.61734 errors 16
Iteration 1000: loss 0.39306 errors 12
Iteration 2000: loss 0.38810 errors 12
Iteration 3000: loss 0.38611 errors 12
Iteration 4000: loss 0.38494 errors 12
Iteration 5000: loss 0.38414 errors 11
Iteration 6000: loss 0.38357 errors 12
Iteration 7000: loss 0.38313 errors 12
Iteration 8000: loss 0.38283 errors 12
Iteration 9000: loss 0.38261 errors 12
Iteration 10000: loss 0.38244 errors 12
Iteration 11000: loss 0.38232 errors 12
Iteration 12000: loss 0.38223 errors 12
Iteration 1000: loss 0.52880 errors 16
Iteration 2000: loss 0.52534 errors 16
Iteration 3000: loss 0.52391 errors 16
Iteration 4000: loss 0.52305 errors 16
Iteration 5000: loss 0.52245 errors 16
Iteration 6000: loss 0.52201 errors 16
Iteration 7000: loss 0.52168 errors 16
Iteration 8000: loss 0.52142 errors 16
Iteration 9000: loss 0

  xx, yy = np.meshgrid(np.arange(x_min, x_max, delta), np.arange(y_min, y_max, delta))


Best for data1.txt H=2: error=10, iterations=185
Iteration 1000: loss 0.53264 errors 16
Iteration 2000: loss 0.52905 errors 16
Iteration 3000: loss 0.52738 errors 16
Iteration 4000: loss 0.52639 errors 16
Iteration 5000: loss 0.52573 errors 16
Iteration 6000: loss 0.52527 errors 16
Iteration 7000: loss 0.52494 errors 16
Iteration 8000: loss 0.52468 errors 16
Iteration 9000: loss 0.52448 errors 16
Iteration 10000: loss 0.52433 errors 16
Iteration 11000: loss 0.52420 errors 16
Iteration 12000: loss 0.52410 errors 16
Iteration 13000: loss 0.52401 errors 16
Iteration 1000: loss 0.39353 errors 12
Iteration 2000: loss 0.38142 errors 13
Iteration 3000: loss 0.36019 errors 8
Iteration 4000: loss 0.34463 errors 8
Iteration 5000: loss 0.32585 errors 6
Iteration 6000: loss 0.30225 errors 5
Iteration 7000: loss 0.27058 errors 4
Iteration 8000: loss 0.24378 errors 4
Iteration 9000: loss 0.22141 errors 4
Iteration 10000: loss 0.20394 errors 4
Iteration 11000: loss 0.18539 errors 2
Iteration 12000: l

[{'data': 'data1.txt', 'H': 2, 'iterations': 185, 'error_rate': 10, 'plot_path': 'plots/data1_H2.png'}, {'data': 'data1.txt', 'H': 4, 'iterations': 15955, 'error_rate': 0, 'plot_path': 'plots/data1_H4.png'}, {'data': 'data2.txt', 'H': 2, 'iterations': 386, 'error_rate': 0, 'plot_path': 'plots/data2_H2.png'}, {'data': 'data2.txt', 'H': 4, 'iterations': 506, 'error_rate': 0, 'plot_path': 'plots/data2_H4.png'}, {'data': 'data3.txt', 'H': 2, 'iterations': 15257, 'error_rate': 5, 'plot_path': 'plots/data3_H2.png'}, {'data': 'data3.txt', 'H': 4, 'iterations': 4062, 'error_rate': 7, 'plot_path': 'plots/data3_H4.png'}, {'data': 'data4.txt', 'H': 2, 'iterations': 2686, 'error_rate': 0, 'plot_path': 'plots/data4_H2.png'}, {'data': 'data4.txt', 'H': 4, 'iterations': 46, 'error_rate': 0, 'plot_path': 'plots/data4_H4.png'}, {'data': 'data5.txt', 'H': 2, 'iterations': 2044, 'error_rate': 0, 'plot_path': 'plots/data5_H2.png'}, {'data': 'data5.txt', 'H': 4, 'iterations': 3334, 'error_rate': 0, 'plot_p

In [None]:
%matplotlib inline
import numpy as np
import torch
import matplotlib.pyplot as plt

### <font color='blue'>1. Various helper functions</font>

This function loads in a data set

In [None]:
def load_data(datafile):
    data = np.loadtxt(datafile)
    n,p = data.shape
    rawx = data[:,0:2]
    rawy = data[:,2]
    x = torch.tensor(rawx, dtype=torch.float)
    y = torch.reshape(torch.tensor((rawy+1.0)/2.0, dtype=torch.float), [n,1])
    return x,y

This function plots the data set

In [None]:
def plot_data(x,y):
    x_min = min(x[:,0]) - 1
    x_max = max(x[:,0]) + 1
    y_min = min(x[:,1]) - 1
    y_max = max(x[:,1]) + 1
    pos = (torch.squeeze(y) == 1)
    neg = (torch.squeeze(y) == 0)
    plt.plot(x[pos,0], x[pos,1], 'ro')
    plt.plot(x[neg,0], x[neg,1], 'k^')
    plt.xlim(x_min,x_max)
    plt.ylim(y_min,y_max)
    plt.show()

This function plots a decision boundary as well as the data points

In [None]:
def plot_boundary(x,y,model):
    
    x_min = min(x[:,0]) - 1
    x_max = max(x[:,0]) + 1
    y_min = min(x[:,1]) - 1
    y_max = max(x[:,1]) + 1

    delta = 0.05
    xx, yy = np.meshgrid(np.arange(x_min, x_max, delta), np.arange(y_min, y_max, delta))
    grid = np.c_[xx.ravel(), yy.ravel()]
    gn, gp = grid.shape
    Z = np.zeros(gn)
    for i in range(gn):
        pred = model(torch.tensor(grid[i,:], dtype=torch.float))
        Z[i] = int(pred > 0.5)

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    plt.pcolormesh(xx, yy, Z, cmap=plt.cm.PRGn, vmin=-3, vmax=3)

    # Plot also the training points
    pos = (torch.squeeze(y) == 1)
    neg = (torch.squeeze(y) == 0)
    plt.plot(x[pos,0], x[pos,1], 'ro')
    plt.plot(x[neg,0], x[neg,1], 'k^')

    plt.xlim(x_min,x_max)
    plt.ylim(y_min,y_max)
    plt.show()

This function computes the error rate of the predicted labels `y1` given the true labels `y2`.


In [None]:
def error_rate(y1, y2):
    sum = 0.0
    for i in range(0,y1.size()[0]):
        sum += ((y1[i]-0.5) * (y2[i]-0.5) <= 0.0)
    return int(sum)

### <font color='blue'>2. Experiments with toy data</font>

Let's load in one of the data sets and print it.

In [None]:
x,y = load_data('data1.txt')
plot_data(x,y)

Next, we train a feedforward net on it. This takes many iterations of gradient descent (backpropagation). We'll print the status every 1000 iterations.

In [None]:
# Now train a neural net
#
# d is input dimension
# H is hidden dimension
d = 2
H = 4

# Use the nn package to define our model as a sequence of layers. nn.Sequential
# is a Module which contains other Modules, and applies them in sequence to
# produce its output. Each Linear Module computes output from input using a
# linear function, and holds internal Tensors for its weight and bias.
model = torch.nn.Sequential(
    torch.nn.Linear(d, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, 1),
    torch.nn.Sigmoid()
)

# The nn package also contains definitions of popular loss functions; in this
# case we will use binary cross entropy (BCE) as our loss function.
loss_fn = torch.nn.BCELoss()

prev_loss = 1.0
learning_rate = 0.25
done = False
t = 1
tol = 1e-4
while not(done):
    # Forward pass: compute predicted y by passing x to the model. Module objects
    # override the __call__ operator so you can call them like functions. When
    # doing so you pass a Tensor of input data to the Module and it produces
    # a Tensor of output data.
    y_pred = model(x)
    t = t+1
    # Compute and print loss. We pass Tensors containing the predicted and true
    # values of y, and the loss function returns a Tensor containing the
    # loss.
    loss = loss_fn(y_pred, y)
    if t % 1000 == 0:
        print('Iteration %d: loss %0.5f errors %d' % 
              (t, loss.item(), error_rate(y_pred, y)))
        if (prev_loss - loss.item() < tol):
            done = True
        prev_loss = loss.item()

    # Zero the gradients before running the backward pass.
    model.zero_grad()

    # Backward pass: compute gradient of the loss with respect to all the learnable
    # parameters of the model. Internally, the parameters of each Module are stored
    # in Tensors with requires_grad=True, so this call will compute gradients for
    # all learnable parameters in the model.
    loss.backward()

    # Update the weights using gradient descent. Each parameter is a Tensor, so
    # we can access its gradients like we did before.
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * (1.0/np.sqrt(t)) * param.grad
print("Number of training errors:", error_rate(model(x), y))

Now let's see what kind of a boundary we got!

In [None]:
plot_boundary(x,y,model)

### <font color='blue'>3. A different data set</font>

The code in the next cell generates a data set of 800 points in which the labels are noisy.

In [None]:
n = 800
np.random.seed(0)
X_train = np.random.rand(n,2)
x1 = X_train[:,0]
x2 = X_train[:,1]
y_train = ((np.exp(-((x1-0.5)*6)**2)*2*((x1-0.5)*6)+1)/2-x2)>0 

idx = np.random.choice(range(n),size=(int(n*0.03),))
y_train[idx] = ~y_train[idx]
x = torch.tensor(X_train, dtype=torch.float) * 10
y = torch.reshape(torch.tensor(y_train, dtype=torch.float), [n,1])
plot_data(x,y)

<font color='magenta'>Define a neural net with <b>two</b> hidden layers, each containing the same number of nodes. <em>Hint:</em> Start with the code above and just make a small tweak to it.</font>

<font color='magenta'>Train the net a few times, and print the decision boundary for the best (lowest-error) model that you find.</font>