# Download the datasets

In [None]:
from IPython.display import clear_output

In [None]:
# Dataset 1a
! gdown 1u_VR07Kee92JrhAGq3VeXFR28uoZgxCX
! gdown 1IvZk4IvzHVnEWGFqKZoea_OoqEEagLPs
! gdown 1UItAFItujkbAo_RMouBnzBOPZ2306J7K

clear_output()

In [None]:
# Dataset 1b
! gdown 1iAPQ4tZIN1b7p3InunX5KbFd_8xMczVP
! gdown 1BJekqgyr8tf_q_c3RQyPSpZNUwh5Ojhs
! gdown 1BGG5CgFE3WClWVQPj4NJe_4jcSJ5PatO

clear_output()

In [None]:
# Dataset 2
! gdown 18nytesvrVSgyEApS9HqDBmop6vp5Rx-s
! gdown 1aHIU8LzMreWJyn6roXFFwUA9IIs4Rrmy
! gdown 1W0pGwuBlXZ8dnoZhvf8rOJD1zaG_8Htb
! gdown 1G5yg9ZF9Wtx5JiIVANISlwdgUBC_d5iP
! gdown 1ppBq_NSdtbMO6OGCi0I9mXJd5kGH6n_F
! gdown 1QmHYtmKFPLL-3TxMWKa5DI4bHcAq6e5A

clear_output()

# Imports

In [None]:
import pandas as pd
import numpy as np

from sklearn.svm import SVC
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, confusion_matrix

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import matplotlib.pyplot as plt

# Perceptron Model

In [None]:
class Perceptron(nn.Module):
    def __init__(self, input_size):
        super(Perceptron, self).__init__()
        self.linear = nn.Linear(input_size, 1)

    def forward(self, x):
        out = self.linear(x)
        return out, torch.where(out > 0, torch.tensor(1.0, requires_grad=False), torch.tensor(0.0, requires_grad=False))

# Multi-Layer Feed Forward Neural Network Model

In [None]:
class MLFFNN(nn.Module):
    def __init__(self, input_size, num_hidden_layers, num_hidden_nodes, num_output_nodes):
        super(MLFFNN, self).__init__()

        self.num_classes = num_output_nodes

        self.layers = []

        self.layers += [nn.Linear(input_size, num_hidden_nodes[0]), nn.ReLU()]

        for i in range(num_hidden_layers - 1):
            self.layers += [nn.Linear(num_hidden_nodes[i], num_hidden_nodes[i+1]), nn.ReLU()]

        self.layers += [nn.Linear(num_hidden_nodes[-1], num_output_nodes)]

        self.fully_connected_layers = nn.Sequential(*self.layers)
        self.softmax = nn.Softmax()

    def forward(self, input):
        outputs = []
        for index, layer in enumerate(self.layers):
            input = layer(input)
            if(index % 2 == 0):
                outputs.append(input)

        softmax_out = self.softmax(input)
        pred = torch.argmax(softmax_out, 1)
        return input, pred, outputs

# Support Vector Machine Model

In [None]:
def plot_training_data_with_decision_boundary(kernel, X, y, model_name, degree=None, gamma=None):
    # Train the SVC
    clf = None
    if(kernel == "linear"):
        clf = SVC(kernel=kernel).fit(X, y)
    elif(kernel == "polynomial"):
        clf = SVC(kernel="poly", degree=degree).fit(X, y)
    elif(kernel == "gaussian"):
        clf = SVC(kernel="rbf", gamma=gamma).fit(X, y)

    # Settings for plotting
    _, ax = plt.subplots(figsize=(10, 8))
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    # x_min, x_max, y_min, y_max = -3, 3, -3, 3
    ax.set(xlim=(x_min, x_max), ylim=(y_min, y_max))

    # Plot decision boundary and margins
    common_params = {"estimator": clf, "X": X, "ax": ax}
    DecisionBoundaryDisplay.from_estimator(
        **common_params,
        response_method="predict",
        plot_method="pcolormesh",
        alpha=0.3,
    )
    DecisionBoundaryDisplay.from_estimator(
        **common_params,
        response_method="decision_function",
        plot_method="contour",
        levels=[-1, 0, 1],
        colors=["k", "k", "k"],
        linestyles=["--", "-", "--"],
    )

    # Plot bigger circles around samples that serve as support vectors
    ax.scatter(
        clf.support_vectors_[:, 0],
        clf.support_vectors_[:, 1],
        s=250,
        facecolors="none",
        edgecolors="k",
    )
    # Plot samples by color and add legend
    scatter = ax.scatter(X[:, 0], X[:, 1], c=y, s=150, edgecolors="k")
    ax.legend(*scatter.legend_elements(), loc="upper right", title="Classes")
    ax.set_title(f" Decision boundaries of SVM with {kernel} kernel")

    plt.savefig(model_name+".jpeg", dpi=300, format="jpeg")
    plt.close()

    return clf

# DataLoader and Trainer

In [None]:
class loader(Dataset):
    def __init__(self, X, y, model=None):
        self.X = X
        self.y = y
        self.num_classes = len(torch.unique(y))
        self.model = model

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, index):
        if(self.model == "perceptron"):
            return self.X[index], self.y[index]
        else:
            return self.X[index], nn.functional.one_hot(self.y[index], num_classes=self.num_classes)


def train(num_epochs, model_name, model, criterion, optimizer, train_loader, train_data, val_data, val_label, surface_plot_epochs=None, verbose=False):
    num_samples = len(train_loader)
    loss_dict = {"epoch" : [], "loss" : []}

    device = "cuda" if torch.cuda.is_available() else "cpu"

    model = model.to(device)

    for epoch in range(num_epochs):
        total_loss:float = 0.0
        correct = 0

        for (input, labels) in train_loader:
            model.train()

            input = input.to(device)
            labels = labels.to(device)

            output, pred, _ = model(input)

            correct += (pred == torch.argmax(labels, dim=1)).sum().item()

            labels = labels.to(torch.float32)
            loss = criterion(output, labels)

            total_loss += loss.item()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        if(surface_plot_epochs is not None and (epoch + 1) in surface_plot_epochs):
            plot_node_surface(model, f"{model_name}, epoch={epoch+1}", train_data)

        if verbose and ((epoch + 1) % 100 == 0):
            val_data = val_data.to(device)
            val_label = val_label.to(device)
            model.eval()
            with torch.no_grad():
                _, val_pred, _ = model(val_data)
                val_correct = (val_pred == val_label).sum().item()
            print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss / num_samples}, Train Correct: {correct}, Val Correct: {val_correct}")

        if (epoch + 1) % 50 == 0:
            loss_dict["epoch"].append(epoch + 1)
            loss_dict["loss"].append(total_loss / num_samples)

    if surface_plot_epochs is not None:
        plot_node_surface(model, f"{model_name}, epoch={num_epochs}", train_data)

    df = pd.DataFrame(loss_dict)
    df.to_csv(f"{model_name}.csv", index=False)

def train_perceptron(num_epochs, model_name, model, criterion, optimizer, train_loader, val_data, val_label, verbose=False):
    num_samples = len(train_loader)
    loss_dict = {"epoch" : [], "loss" : []}

    for epoch in range(num_epochs):
        total_loss:float = 0.0
        correct = 0

        for (input, labels) in train_loader:
            model.train()

            output, pred = model(input)

            correct += (pred == labels).sum().item()

            loss = criterion(output, labels)

            total_loss += loss.item()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        if verbose and ((epoch + 1) % 100 == 0):
            model.eval()
            with torch.no_grad():
                _, val_pred = model(val_data)
                val_correct = (val_pred == val_label).sum().item()
            print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss / num_samples}, Train Correct: {correct}, Val Correct: {val_correct}")

        if (epoch + 1) % 50 == 0:
            loss_dict["epoch"].append(epoch + 1)
            loss_dict["loss"].append(total_loss / num_samples)

    df = pd.DataFrame(loss_dict)
    df.to_csv(f"{model_name}.csv", index=False)


def get_accuracy(model, X, y):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)
    X = X.to(device)
    y = y.to(device)

    _, pred, *args = model(X)
    correct = (pred == y).sum().item()
    return (correct / y.shape[0])

def build_confusion_matrix(model_name, model, X, y):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)
    X = X.to(device)

    _, pred, *args = model(X)
    pred = pred.cpu().numpy().flatten()
    label = y.numpy().flatten()
    conf_matrix = confusion_matrix(label, pred)
    np.savetxt(f"{model_name}_confusion_matrix.txt", conf_matrix, fmt='%d')

def build_confusion_matrix_svm(model_name, model, X, y):
    pred = model.predict(X)
    conf_matrix = confusion_matrix(y, pred)
    np.savetxt(f"{model_name}_confusion_matrix.txt", conf_matrix, fmt='%d')

# Plotters

In [None]:
def plot_decision_boundary_perceptron(model, model_name, train_data, train_label):
    X_train_numpy = train_data.numpy()
    y_train_numpy = train_label.squeeze().numpy()
    classes = np.unique(y_train_numpy)

    x_min, x_max = X_train_numpy[:, 0].min() - 1, X_train_numpy[:, 0].max() + 1
    y_min, y_max = X_train_numpy[:, 1].min() - 1, X_train_numpy[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))
    grid_input = np.c_[xx.ravel(), yy.ravel()]

    grid_input_tensor = torch.from_numpy(grid_input).float()

    model.eval()
    with torch.no_grad():
        _, predictions = model(grid_input_tensor)

    Z = predictions.numpy()
    Z = Z.reshape(xx.shape)

    plt.figure(figsize=(8, 6))
    plt.contourf(xx, yy, Z, cmap=plt.cm.RdBu, alpha=0.8)

    for class_label in classes:
        indices = np.where(y_train_numpy == class_label)
        plt.scatter(X_train_numpy[indices, 0], X_train_numpy[indices, 1], s=20, edgecolor="k", label=f'Class {int(class_label)}')

    plt.title('Decision Surface of Perceptron Classifier')
    plt.xlabel('x1')
    plt.ylabel('x2')
    plt.legend()
    plt.savefig(model_name+".jpeg", dpi=300, format="jpeg")
    plt.close()

def plot_decision_boundary_mlffnn(model, model_name, train_data, train_label):
    X_train_numpy = train_data.numpy()
    y_train_numpy = train_label.numpy()
    classes = np.unique(y_train_numpy)

    x_min, x_max = X_train_numpy[:, 0].min() - 1, X_train_numpy[:, 0].max() + 1
    y_min, y_max = X_train_numpy[:, 1].min() - 1, X_train_numpy[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))
    grid_input = np.c_[xx.ravel(), yy.ravel()]

    grid_input_tensor = torch.from_numpy(grid_input).float()

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)
    grid_input_tensor = grid_input_tensor.to(device)

    model.eval()
    with torch.no_grad():
        _, predictions, _ = model(grid_input_tensor)

    Z = predictions.cpu().numpy()
    Z = Z.reshape(xx.shape)

    plt.figure(figsize=(8, 6))
    plt.contourf(xx, yy, Z, cmap=plt.cm.RdBu, alpha=0.8)

    for class_label in classes:
        indices = np.where(y_train_numpy == class_label)
        plt.scatter(X_train_numpy[indices, 0], X_train_numpy[indices, 1], s=20, edgecolor="k", label=f'Class {class_label}')

    plt.title('Decision Surface of MLFFNN Classifier')
    plt.xlabel('x1')
    plt.ylabel('x2')
    plt.legend()
    plt.savefig(model_name+".jpeg", dpi=300, format="jpeg")
    plt.close()

def plot_decision_boundary_svm(svm, model_name, X, y):
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))
    grid_input = np.c_[xx.ravel(), yy.ravel()]

    Z = svm.predict(grid_input)
    Z = Z.reshape(xx.shape)

    plt.figure(figsize=(8, 6))
    plt.contourf(xx, yy, Z, cmap=plt.cm.RdBu, alpha=0.8)

    classes = np.unique(y)
    for class_label in classes:
        indices = np.where(y == class_label)
        plt.scatter(X[indices, 0], X[indices, 1], s=20, edgecolor="k", label=f'Class {class_label}')

    plt.title('Decision Boundary of SVM Classifier')
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.legend()
    plt.savefig(model_name+".jpeg", dpi=300, format="jpeg")
    plt.close()

def plot_node_surface(model, model_name, train_data):
    X_train_numpy = train_data.numpy()

    x_min, x_max = X_train_numpy[:, 0].min() - 1, X_train_numpy[:, 0].max() + 1
    y_min, y_max = X_train_numpy[:, 1].min() - 1, X_train_numpy[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))
    grid_input = np.c_[xx.ravel(), yy.ravel()]

    grid_input_tensor = torch.from_numpy(grid_input).float()

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)
    grid_input_tensor = grid_input_tensor.to(device)

    model.eval()
    with torch.no_grad():
        _, _, outputs = model(grid_input_tensor)

    num_hidden_layers = len(outputs)

    for hl in range(num_hidden_layers):
        z = outputs[hl].cpu().detach().numpy()
        num_nodes = z.shape[1]
        for nl in range(num_nodes):
            fig = plt.figure(figsize=(8, 6))
            ax = fig.add_subplot(111, projection='3d')
            z_ = z[:, nl]
            z_ = z_.reshape(xx.shape)

            ax.plot_surface(xx, yy, z_, cmap="viridis")
            if(hl != num_hidden_layers - 1):
                plt.title(f"Surface of node {nl} of hidden layer {hl}")
            else:
                plt.title(f"Surface of node {nl} of output layer")
            plt.xlabel("Feature 1")
            plt.ylabel("Feature 2")
            plt.savefig(f"{model_name}, node={nl}, hl={hl}.jpeg", dpi=300, format="jpeg")
            plt.close()

# Dataset 1a

In [None]:
train_df = pd.read_csv("./Train-20.csv")
val_df = pd.read_csv("./Val-20.csv")
test_df = pd.read_csv("./Test-20.csv")

## Perceptron Model

In [None]:
train_data = torch.tensor(train_df[["input1", "input2"]].values, dtype=torch.float32)
train_label = torch.tensor(train_df["output"].values, dtype=torch.float32).unsqueeze(1)

val_data = torch.tensor(val_df[["input1", "input2"]].values, dtype=torch.float32)
val_label = torch.tensor(val_df["output"].values, dtype=torch.float32).unsqueeze(1)

test_data = torch.tensor(test_df[["input1", "input2"]].values, dtype=torch.float32)
test_label = torch.tensor(test_df["output"].values, dtype=torch.float32).unsqueeze(1)

In [None]:
# train the model
num_epochs = 500
lr = [1, 1e-1, 1e-2, 2e-3, 3e-4, 4e-5]

accuracy = {"lr" : [], "Train Accuracy" : [], "Val Accuracy" : [], "Test Accuracy" : []}

train_loader = DataLoader(loader(train_data, train_label, "perceptron"), batch_size = 32, shuffle=True)
input_size = train_data.shape[1]

best_val_accuracy = 0
best_model = -1

for index, lr_ in enumerate(lr):
    model = Perceptron(input_size)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.SGD(model.parameters(), lr = lr_)

    model_name = f"perceptron, lr={lr_}"
    train_perceptron(num_epochs, model_name, model, criterion, optimizer, train_loader, val_data, val_label)

    torch.save(model.state_dict(), f"perceptron_model_{index}.pth")
    plot_decision_boundary_perceptron(model, model_name, train_data, train_label)

    accuracy["lr"].append(lr_)
    accuracy["Train Accuracy"].append(get_accuracy(model, train_data, train_label))
    val_accuracy = get_accuracy(model, val_data, val_label)
    accuracy["Val Accuracy"].append(val_accuracy)
    accuracy["Test Accuracy"].append(get_accuracy(model, test_data, test_label))

    if(best_val_accuracy <= val_accuracy):
        best_model = index
        best_val_accuracy = val_accuracy

pd.DataFrame(accuracy).to_csv("perceptron_accuracy.csv", index=False)

In [None]:
# load best model
final_model = Perceptron(input_size)
final_model.load_state_dict(torch.load(f"perceptron_model_{best_model}.pth"))

<All keys matched successfully>

In [None]:
# generate confusion matrix
build_confusion_matrix(f"perceptron_train", final_model, train_data, train_label)
build_confusion_matrix(f"perceptron_val", final_model, val_data, val_label)
build_confusion_matrix(f"perceptron_test", final_model, test_data, test_label)

## MLFFNN Model

In [None]:
train_data = torch.tensor(train_df[["input1", "input2"]].values, dtype=torch.float32)
train_label = torch.tensor(train_df["output"].values, dtype=torch.int64)

val_data = torch.tensor(val_df[["input1", "input2"]].values, dtype=torch.float32)
val_label = torch.tensor(val_df["output"].values, dtype=torch.int64)

test_data = torch.tensor(test_df[["input1", "input2"]].values, dtype=torch.float32)
test_label = torch.tensor(test_df["output"].values, dtype=torch.int64)

In [None]:
num_epochs = 1000
num_hidden_nodes = [1, 2, 4, 6]
lr = [1e-2, 1e-3, 3e-4]

train_loader = DataLoader(loader(train_data, train_label), batch_size = 32, shuffle=True)

input_size = train_data.shape[1]

best_val_accuracy = 0
best_model = -1

accuracy = {"lr" : [], "num hidden nodes" : [], "Train Accuracy" : [], "Val Accuracy" : [], "Test Accuracy" : []}

surface_plot_epochs = [1, 5, 20, 100]

index = 0

for hn in num_hidden_nodes:
    for lr_ in lr:
        model = MLFFNN(input_size, 1, [hn,], 2)
        model_name = f"mlffnn, hidden_nodes={hn}, lr={lr_}"

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(model.parameters(), lr=lr_)

        train(num_epochs, model_name, model, criterion, optimizer, train_loader, train_data, val_data, val_label)

        torch.save(model.state_dict(), f"mlffnn_model_{index}.pth")

        plot_decision_boundary_mlffnn(model, model_name, train_data, train_label)
        index += 1

        accuracy["lr"].append(lr_)
        accuracy["num hidden nodes"].append(hn)
        accuracy["Train Accuracy"].append(get_accuracy(model, train_data, train_label))
        val_accuracy = get_accuracy(model, val_data, val_label)
        accuracy["Val Accuracy"].append(val_accuracy)
        accuracy["Test Accuracy"].append(get_accuracy(model, test_data, test_label))

        if(best_val_accuracy < val_accuracy):
            best_model = index
            best_val_accuracy = val_accuracy

pd.DataFrame(accuracy).to_csv("mlffnn_accuracy.csv", index=False)

  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **

## SVM Model

In [None]:
train_data = np.reshape(train_df[["input1", "input2"]].to_numpy(), (-1, 2))
train_label = (train_df["output"].astype(int)).to_numpy()

val_data = np.reshape(val_df[["input1", "input2"]].to_numpy(), (-1, 2))
val_label = (val_df["output"].astype(int)).to_numpy()

test_data = np.reshape(test_df[["input1", "input2"]].to_numpy(), (-1, 2))
test_label = (test_df["output"].astype(int)).to_numpy()

In [None]:
model = plot_training_data_with_decision_boundary("linear", train_data, train_label, "svm")

In [None]:
accuracy = {"Train Accuracy" : [], "Val Accuracy" : [], "Test Accuracy" : []}

train_pred = model.predict(train_data)
train_accuracy = accuracy_score(train_pred, train_label)
accuracy["Train Accuracy"].append(train_accuracy)

val_pred = model.predict(val_data)
val_accuracy = accuracy_score(val_pred, val_label)
accuracy["Val Accuracy"].append(val_accuracy)

test_pred = model.predict(test_data)
test_accuracy = accuracy_score(test_pred, test_label)
accuracy["Test Accuracy"].append(test_accuracy)

df = pd.DataFrame(accuracy)
df.to_csv("svm_linear.csv", index=False)

# Dataset 1b

In [None]:
train_df = pd.read_csv("./Train-10.csv")
val_df = pd.read_csv("./Val-10.csv")
test_df = pd.read_csv("./Test-10.csv")

## MLFFNN Model

In [None]:
train_data = torch.tensor(train_df[["x1", "x2"]].values, dtype=torch.float32)
train_label = torch.tensor(train_df["label"].values, dtype=torch.int64)

val_data = torch.tensor(val_df[["x1", "x2"]].values, dtype=torch.float32)
val_label = torch.tensor(val_df["label"].values, dtype=torch.int64)

test_data = torch.tensor(test_df[["x1", "x2"]].values, dtype=torch.float32)
test_label = torch.tensor(test_df["label"].values, dtype=torch.int64)

In [None]:
num_epochs = 1000
num_hidden_nodes = [1, 2, 4, 6]
lr = 1e-3

train_loader = DataLoader(loader(train_data, train_label), batch_size = 64, shuffle=True)

input_size = train_data.shape[1]

best_val_accuracy = 0
best_model = -1

accuracy = {"num hidden nodes 1" : [], "num hidden nodes 2" : [], "Train Accuracy" : [], "Val Accuracy" : [], "Test Accuracy" : []}

index = 0

for hn1 in num_hidden_nodes:
    for hn2 in num_hidden_nodes:
        print(f"Training: {hn1}, {hn2}")
        model = MLFFNN(input_size, 2, [hn1, hn2], 2)
        model_name = f"mlffnn, hidden_nodes1={hn1}, hidden_nodes2={hn2}"

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(model.parameters(), lr=lr)

        train(num_epochs, model_name, model, criterion, optimizer, train_loader, train_data, val_data, val_label)

        torch.save(model.state_dict(), f"mlffnn_model_{index}.pth")

        plot_decision_boundary_mlffnn(model, model_name, train_data, train_label)

        accuracy["num hidden nodes 1"].append(hn1)
        accuracy["num hidden nodes 2"].append(hn2)
        accuracy["Train Accuracy"].append(get_accuracy(model, train_data, train_label))
        val_accuracy = get_accuracy(model, val_data, val_label)
        accuracy["Val Accuracy"].append(val_accuracy)
        accuracy["Test Accuracy"].append(get_accuracy(model, test_data, test_label))

        if(best_val_accuracy < val_accuracy):
            best_model = index
            best_val_accuracy = val_accuracy

        index += 1

pd.DataFrame(accuracy).to_csv("mlffnn_accuracy.csv", index=False)

Training: 1, 1


  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)


Training: 1, 2


  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)


Training: 1, 4


  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)


Training: 1, 6


  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)


Training: 2, 1


  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)


Training: 2, 2


  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)


Training: 2, 4


  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)


Training: 2, 6


  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)


Training: 4, 1


  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)


Training: 4, 2


  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)


Training: 4, 4


  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)


Training: 4, 6


  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)


Training: 6, 1


  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)


Training: 6, 2


  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)


Training: 6, 4


  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)


Training: 6, 6


  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)


In [None]:
# in this run, 6, 4 was the best model
model = MLFFNN(input_size, 2, [6, 4], 2)
model_name = "mlffnn, hidden_nodes1=6, hidden_nodes2=4,"
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-3)

train_loader = DataLoader(loader(train_data, train_label), batch_size = 64, shuffle=True)

surface_plot_epochs = [1, 5, 20, 100]
num_epochs = 1000

In [None]:
train(num_epochs, model_name, model, criterion, optimizer, train_loader, train_data, val_data, val_label, surface_plot_epochs)

  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)


In [None]:
build_confusion_matrix(model_name+"_train", model, train_data, train_label)
build_confusion_matrix(model_name+"_val", model, val_data, val_label)
build_confusion_matrix(model_name+"_test", model, test_data, test_label)

  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)
  return self._call_impl(*args, **kwargs)


## SVM Model with Polynomial Kernel

In [None]:
train_data = np.reshape(train_df[["x1", "x2"]].to_numpy(), (-1, 2))
train_label = (train_df["label"].astype(int)).to_numpy()

val_data = np.reshape(val_df[["x1", "x2"]].to_numpy(), (-1, 2))
val_label = (val_df["label"].astype(int)).to_numpy()

test_data = np.reshape(test_df[["x1", "x2"]].to_numpy(), (-1, 2))
test_label = (test_df["label"].astype(int)).to_numpy()

In [None]:
degree = [1, 2, 3, 4, 6]
accuracy = {"degree" : [], "Train Accuracy" : [], "Val Accuracy" : [], "Test Accuracy" : []}

for deg_ in degree:
    model_name = f"svm, poly, deg={deg_}"
    model = plot_training_data_with_decision_boundary("polynomial", train_data, train_label, model_name, degree=deg_)

    accuracy["degree"].append(deg_)
    train_pred = model.predict(train_data)
    train_accuracy = accuracy_score(train_pred, train_label)
    accuracy["Train Accuracy"].append(train_accuracy)

    val_pred = model.predict(val_data)
    val_accuracy = accuracy_score(val_pred, val_label)
    accuracy["Val Accuracy"].append(val_accuracy)

    test_pred = model.predict(test_data)
    test_accuracy = accuracy_score(test_pred, test_label)
    accuracy["Test Accuracy"].append(test_accuracy)

    build_confusion_matrix_svm(model_name + "_train", model, train_data, train_label)
    build_confusion_matrix_svm(model_name + "_val", model, val_data, val_label)
    build_confusion_matrix_svm(model_name + "_test", model, test_data, test_label)

df = pd.DataFrame(accuracy)
df.to_csv("svm_poly.csv", index=False)

## SVM Model with Gaussian Kernel

In [None]:
train_data = np.reshape(train_df[["x1", "x2"]].to_numpy(), (-1, 2))
train_label = (train_df["label"].astype(int)).to_numpy()

val_data = np.reshape(val_df[["x1", "x2"]].to_numpy(), (-1, 2))
val_label = (val_df["label"].astype(int)).to_numpy()

test_data = np.reshape(test_df[["x1", "x2"]].to_numpy(), (-1, 2))
test_label = (test_df["label"].astype(int)).to_numpy()

In [None]:
gamma = [100, 10, 1, 1e-1, 1e-2]
accuracy = {"gamma" : [], "Train Accuracy" : [], "Val Accuracy" : [], "Test Accuracy" : []}

for gamma_ in gamma:
    model_name = f"svm, gaussian, gamma={gamma_}"
    model = plot_training_data_with_decision_boundary("gaussian", train_data, train_label, model_name, gamma=gamma_)

    accuracy["gamma"].append(deg_)
    train_pred = model.predict(train_data)
    train_accuracy = accuracy_score(train_pred, train_label)
    accuracy["Train Accuracy"].append(train_accuracy)

    val_pred = model.predict(val_data)
    val_accuracy = accuracy_score(val_pred, val_label)
    accuracy["Val Accuracy"].append(val_accuracy)

    test_pred = model.predict(test_data)
    test_accuracy = accuracy_score(test_pred, test_label)
    accuracy["Test Accuracy"].append(test_accuracy)

    build_confusion_matrix_svm(model_name + "_train", model, train_data, train_label)
    build_confusion_matrix_svm(model_name + "_val", model, val_data, val_label)
    build_confusion_matrix_svm(model_name + "_test", model, test_data, test_label)

df = pd.DataFrame(accuracy)
df.to_csv("svm_gaussian.csv", index=False)

# Dataset 2 without Principal Component Analysis

In [None]:
train_data = pd.read_csv("./train_data.csv", header=None)
train_label = pd.read_csv("./train_label.csv", header=None)

val_data = pd.read_csv("./val_data.csv", header=None)
val_label = pd.read_csv("./val_label.csv", header=None)

test_data = pd.read_csv("./test_data.csv", header=None)
test_label = pd.read_csv("./test_label.csv", header=None)

## MLFFNN Model

In [None]:
train_data = torch.tensor(train_data.values, dtype=torch.float32)
train_label = torch.tensor(train_label.values, dtype=torch.int64).squeeze()

val_data = torch.tensor(val_data.values, dtype=torch.float32)
val_label = torch.tensor(val_label.values, dtype=torch.int64).squeeze()

test_data = torch.tensor(test_data.values, dtype=torch.float32)
test_label = torch.tensor(test_label.values, dtype=torch.int64).squeeze()

In [None]:
# num_epochs = 1000
# num_hidden_nodes = [40, 60, 80, 100]
# lr = 1e-2

# train_loader = DataLoader(loader(train_data, train_label), batch_size = 64, shuffle=True)

# input_size = train_data.shape[1]

# best_val_accuracy = 0
# best_model = -1

# accuracy = {"num hidden nodes 1" : [], "num hidden nodes 2" : [], "Train Accuracy" : [], "Val Accuracy" : [], "Test Accuracy" : []}

# index = 0

# for hn1 in num_hidden_nodes:
#     for hn2 in num_hidden_nodes:
#         print(f"Training: {hn1}, {hn2}")
#         model = MLFFNN(input_size, 2, [hn1, hn2], 5)
#         model_name = f"mlffnn, hidden_nodes1={hn1}, hidden_nodes2={hn2}"

#         criterion = nn.CrossEntropyLoss()
#         optimizer = optim.SGD(model.parameters(), lr=lr)

#         train(num_epochs, model_name, model, criterion, optimizer, train_loader, train_data, val_data, val_label, verbose=True)

#         torch.save(model.state_dict(), f"mlffnn_model_{index}.pth")

#         accuracy["num hidden nodes 1"].append(hn1)
#         accuracy["num hidden nodes 2"].append(hn2)
#         accuracy["Train Accuracy"].append(get_accuracy(model, train_data, train_label))
#         val_accuracy = get_accuracy(model, val_data, val_label)
#         accuracy["Val Accuracy"].append(val_accuracy)
#         accuracy["Test Accuracy"].append(get_accuracy(model, test_data, test_label))

#         if(best_val_accuracy < val_accuracy):
#             best_model = index
#             best_val_accuracy = val_accuracy

#         index += 1

# pd.DataFrame(accuracy).to_csv("mlffnn_accuracy.csv", index=False)

In [None]:
final_model = MLFFNN(input_size, 2, [60, 60], 5)
final_model.load_state_dict(torch.load(f"mlffnn_model_{best_model}.pth"))

In [None]:
model_name = "mlffnn, hn1=60, hn2=60"
build_confusion_matrix(model_name+",train", model, train_data, train_label)
build_confusion_matrix(model_name+",val", model, val_data, val_label)
build_confusion_matrix(model_name+",test", model, test_data, test_label)

## SVM Model with Gaussian Kernel

In [None]:
train_data = np.reshape(train_data.to_numpy(), (-1, 81))
train_label = np.reshape(train_label.to_numpy(dtype="int64"), (-1, ))

val_data = np.reshape(val_data.to_numpy(), (-1, 81))
val_label = np.reshape(val_label.to_numpy(dtype="int64"), (-1, ))

test_data = np.reshape(test_data.to_numpy(), (-1, 81))
test_label = np.reshape(test_label.to_numpy(dtype="int64"), (-1, ))

In [None]:
gamma = [100, 10, 1, 1e-1, 1e-2]
accuracy = {"gamma" : [], "Train Accuracy" : [], "Val Accuracy" : [], "Test Accuracy" : []}

for gamma_ in gamma:
    model_name = f"svm, gaussian, gamma={gamma_}"

    model = SVC(kernel="rbf", gamma=gamma_)
    model.fit(train_data, train_label)

    accuracy["gamma"].append(gamma_)
    train_pred = model.predict(train_data)
    train_accuracy = accuracy_score(train_pred, train_label)
    accuracy["Train Accuracy"].append(train_accuracy)

    val_pred = model.predict(val_data)
    val_accuracy = accuracy_score(val_pred, val_label)
    accuracy["Val Accuracy"].append(val_accuracy)

    test_pred = model.predict(test_data)
    test_accuracy = accuracy_score(test_pred, test_label)
    accuracy["Test Accuracy"].append(test_accuracy)

    build_confusion_matrix_svm(model_name + "_train", model, train_data, train_label)
    build_confusion_matrix_svm(model_name + "_val", model, val_data, val_label)
    build_confusion_matrix_svm(model_name + "_test", model, test_data, test_label)

df = pd.DataFrame(accuracy)
df.to_csv("svm_gaussian.csv", index=False)

# Dataset 2 with Principal Component Analysis

In [None]:
train_data = pd.read_csv("./train_data.csv", header=None)
train_label = pd.read_csv("./train_label.csv", header=None)

val_data = pd.read_csv("./val_data.csv", header=None)
val_label = pd.read_csv("./val_label.csv", header=None)

test_data = pd.read_csv("./test_data.csv", header=None)
test_label = pd.read_csv("./test_label.csv", header=None)

## MLFFNN Model

In [None]:
train_data = np.reshape(train_data.to_numpy(), (-1, 81))
train_label = torch.tensor(train_label.values, dtype=torch.int64).squeeze()

val_data = np.reshape(val_data.to_numpy(), (-1, 81))
val_label = torch.tensor(val_label.values, dtype=torch.int64).squeeze()

test_data = np.reshape(test_data.to_numpy(), (-1, 81))
test_label = torch.tensor(test_label.values, dtype=torch.int64).squeeze()

In [None]:
reduced_dim = [40, 60]

index = 0

accuracy = {"dim" : [], "num hidden nodes 1" : [], "num hidden nodes 2" : [], "Train Accuracy" : [], "Val Accuracy" : [], "Test Accuracy" : []}

for dim_ in reduced_dim:
    pca = PCA(n_components=dim_)

    train_data_ = pca.fit_transform(train_data)
    val_data_ = pca.fit_transform(val_data)
    test_data_ = pca.fit_transform(test_data)

    train_data_ = torch.tensor(train_data_, dtype=torch.float32)
    val_data_ = torch.tensor(val_data_, dtype=torch.float32)
    test_data_ = torch.tensor(test_data_, dtype=torch.float32)

    num_epochs = 1000
    num_hidden_nodes = [40, 60]
    lr = 1e-2

    train_loader = DataLoader(loader(train_data_, train_label), batch_size = 64, shuffle=True)

    input_size = train_data.shape[1]

    best_val_accuracy = 0
    best_model = -1

    for hn1 in num_hidden_nodes:
        for hn2 in num_hidden_nodes:
            print(f"Training: {hn1}, {hn2}")
            model = MLFFNN(dim_, 2, [hn1, hn2], 5)
            model_name = f"mlffnn, input_dim={dim_} hidden_nodes1={hn1}, hidden_nodes2={hn2}"

            criterion = nn.CrossEntropyLoss()
            optimizer = optim.SGD(model.parameters(), lr=lr)

            train(num_epochs, model_name, model, criterion, optimizer, train_loader, train_data, val_data_, val_label, verbose=True)

            torch.save(model.state_dict(), f"mlffnn_model_{index}.pth")

            accuracy["dim"].append(dim_)
            accuracy["num hidden nodes 1"].append(hn1)
            accuracy["num hidden nodes 2"].append(hn2)
            accuracy["Train Accuracy"].append(get_accuracy(model, train_data_, train_label))
            val_accuracy = get_accuracy(model, val_data_, val_label)
            accuracy["Val Accuracy"].append(val_accuracy)
            accuracy["Test Accuracy"].append(get_accuracy(model, test_data_, test_label))

            if(best_val_accuracy < val_accuracy):
                best_model = index
                best_val_accuracy = val_accuracy

            index += 1

pd.DataFrame(accuracy).to_csv("mlffnn_accuracy.csv", index=False)

In [None]:
final_model = MLFFNN(60, 2, [40,60], 5)
final_model.load_state_dict(torch.load(f"mlffnn_model_{best_model}.pth"))

In [None]:
model_name = "mlffnn, dim=60, hn1=40, hn2=60"

pca = PCA(n_components=60)

train_data_ = pca.fit_transform(train_data)
val_data_ = pca.fit_transform(val_data)
test_data_ = pca.fit_transform(test_data)

train_data_ = torch.tensor(train_data_, dtype=torch.float32)
val_data_ = torch.tensor(val_data_, dtype=torch.float32)
test_data_ = torch.tensor(test_data_, dtype=torch.float32)

build_confusion_matrix(model_name+",train", model, train_data_, train_label)
build_confusion_matrix(model_name+",val", model, val_data_, val_label)
build_confusion_matrix(model_name+",test", model, test_data_, test_label)

## SVM Model with Gaussian Kernel

In [None]:
train_data = np.reshape(train_data.to_numpy(), (-1, 81))
train_label = np.reshape(train_label.to_numpy(dtype="int64"), (-1, ))

val_data = np.reshape(val_data.to_numpy(), (-1, 81))
val_label = np.reshape(val_label.to_numpy(dtype="int64"), (-1, ))

test_data = np.reshape(test_data.to_numpy(), (-1, 81))
test_label = np.reshape(test_label.to_numpy(dtype="int64"), (-1, ))

In [None]:
reduced_dim = [40, 60]
accuracy = {"reduced_dim" : [], "gamma" : [], "Train Accuracy" : [], "Val Accuracy" : [], "Test Accuracy" : []}

for dim_ in reduced_dim:
    pca = PCA(n_components=dim_)

    train_data_ = pca.fit_transform(train_data)
    val_data_ = pca.fit_transform(val_data)
    test_data_ = pca.fit_transform(test_data)

    gamma = [100, 10, 1, 1e-1, 1e-2]

    for gamma_ in gamma:
        model_name = f"svm, gaussian, dim={dim_}, gamma={gamma_}"

        model = SVC(kernel="rbf", gamma=gamma_)
        model.fit(train_data_, train_label)

        accuracy["reduced_dim"].append(dim_)
        accuracy["gamma"].append(gamma_)
        train_pred = model.predict(train_data_)
        train_accuracy = accuracy_score(train_pred, train_label)
        accuracy["Train Accuracy"].append(train_accuracy)

        val_pred = model.predict(val_data_)
        val_accuracy = accuracy_score(val_pred, val_label)
        accuracy["Val Accuracy"].append(val_accuracy)

        test_pred = model.predict(test_data_)
        test_accuracy = accuracy_score(test_pred, test_label)
        accuracy["Test Accuracy"].append(test_accuracy)

        build_confusion_matrix_svm(model_name + "_train", model, train_data_, train_label)
        build_confusion_matrix_svm(model_name + "_val", model, val_data_, val_label)
        build_confusion_matrix_svm(model_name + "_test", model, test_data_, test_label)

    df = pd.DataFrame(accuracy)
    df.to_csv("svm_gaussian.csv", index=False)

In [None]:
! rm -rf svm_nopca

In [None]:
! mv svm* /content/svm_pca

In [None]:
! zip -r svm_pca.zip svm_pca