# Imports

In [118]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.datasets import load_digits

### Generating data for sanity test

In [21]:
def generate_sanity_check_data():

    # generate data
    X = np.float64(np.random.randn(120, 2))
    Y = np.tanh(X[:, 0] + X[:, 1])
    Y = 1. / (1. + np.exp(-(Y + Y)))
    Y = Y > 0.5
    Y = np.array([[1] if y==True else [0] for y in Y], dtype=np.float64)

    # split it train test
    X_train, X_test = X[0:100], X[100:120]
    Y_train, Y_test =  Y[0:100],  Y[100:120]

    # save it for loading in julia
    np.save(f"./data/sanity_input.npy",X_train)
    np.save(f"./data/sanity_labels.npy",Y_train)
    np.save(f"./data/sanity_test.npy",X_test)
    np.save(f"./data/sanity_labels_test.npy",Y_test)


In [22]:
generate_sanity_check_data()

### Generating data for binary and multiclass clasifiation using make_blobs.

In [30]:
from sklearn.datasets import make_blobs

def generate_train_data_using_blobs(multiclass:bool = False):
    if multiclass:
        no_class = 4
    else:
        no_class = 2

    X, Y = make_blobs(n_samples=500, centers=no_class, n_features=10, random_state=0)
    data = np.array(X)
    labels = np.array([[y] for y in Y],dtype=np.float64)

    X_train, X_test = data[0:400], data[400:500]
    Y_train, Y_test =  labels[0:400],  labels[400:500]

    # save it for loading in julia
    np.save(f"./data/binary_input{no_class}.npy",X_train)
    np.save(f"./data/binary_labels{no_class}.npy",Y_train)
    np.save(f"./data/binary_test{no_class}.npy",X_test)
    np.save(f"./data/binary_labels_test{no_class}.npy",Y_test)

In [31]:
# data for binary classification
generate_train_data_using_blobs()

In [32]:
# data for multiclass classificaton
generate_train_data_using_blobs(True)

### Create dataset for torch training.

In [119]:
class CheckDataset(Dataset):

    def __init__(self,data_path,labels_path, multiclass:bool = False) -> None:
        x = np.float32(np.load(data_path))
        y = np.float32(np.load(labels_path))

        if multiclass:
            y = y.flatten()

        self.x = torch.from_numpy(x)
        self.y = torch.from_numpy(y)

        self.n_samples = y.shape[0]

    def __getitem__(self, index):
        return self.x[index], self.y[index]

    def __len__(self):
        return self.n_samples

    def get_data(self):
        return self.x,self.y

### Create model for binary or multiclass clasification.

In [120]:
class ClassificationModel(nn.Module):
    def __init__(self,input_dim,interm_dim,output_dim, multiclass: bool = False):
        super(ClassificationModel, self).__init__()

        self.layer1 = nn.Linear(input_dim, interm_dim)
        self.layer2 = nn.Linear(interm_dim,output_dim)
        if multiclass:
            self.ac = nn.Softmax()
        else:
            self.ac = nn.Sigmoid()

    def forward(self, x):
        x = self.layer1(x)
        x = torch.tanh(x)
        x = self.layer2(x)
        x = self.ac(x)
        return x


In [121]:
def get_acc(model, dataset, multiclass: bool = False):
    x , y = dataset.get_data()
    with torch.no_grad(): 
        outputs = model(x)
        if multiclass:
            predicted = [torch.argmax(lst) for lst in outputs]
            result = [1 if y[i]==predicted[i] else 0 for i in range(len(predicted))]
            acc = sum(result) / len(result)
        else: 
            predicted =  [1 if x > 0.5 else 0 for x in outputs]
            result = [1 if y[i]==predicted[i] else 0 for i in range(len(predicted))]
            acc = sum(result) / len(result)
    
    return acc

# Normal training sanity check

In [122]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

sanity_check_model = ClassificationModel(2,2,1).to(device)
sanity_check_train_dataset = CheckDataset(data_path = "./data/sanity_input.npy",
                                                labels_path= "./data/sanity_labels.npy")
saniry_check_test_dataset = CheckDataset(data_path = "./data/sanity_test.npy",
                                               labels_path= "./data/sanity_labels_test.npy")

sanity_check_train_loader = DataLoader(dataset=sanity_check_train_dataset, batch_size=5, shuffle=True)
sanity_check_test_loader = DataLoader(dataset=saniry_check_test_dataset, batch_size=5, shuffle=False)
optimizer = torch.optim.SGD(sanity_check_model.parameters(), lr=0.01)
loss_fn = nn.BCELoss()

n_total_steps = len(sanity_check_train_loader)
num_epochs = 10

for epoch in range(100):
    for i, (inputs, labels) in enumerate(sanity_check_train_loader):

        # forward pass and loss
        outputs = sanity_check_model(inputs)
        loss = loss_fn(outputs, labels)

        # backward
        loss.backward()

        # compute gradients
        optimizer.step()

        # clear gradients
        optimizer.zero_grad()

        if (i+1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

print("Finish training")

print("Accuracy on training dataset: {}".format(get_acc(sanity_check_model, saniry_check_test_dataset)))
print("Accuracy on testing dataset: {}".format(get_acc(sanity_check_model, sanity_check_train_dataset)))

Epoch [1/10], Step [10/20], Loss: 0.6634
Epoch [1/10], Step [20/20], Loss: 0.8711
Epoch [2/10], Step [10/20], Loss: 0.6774
Epoch [2/10], Step [20/20], Loss: 0.6225
Epoch [3/10], Step [10/20], Loss: 0.8782
Epoch [3/10], Step [20/20], Loss: 0.7668
Epoch [4/10], Step [10/20], Loss: 0.9296
Epoch [4/10], Step [20/20], Loss: 0.6900
Epoch [5/10], Step [10/20], Loss: 0.5867
Epoch [5/10], Step [20/20], Loss: 0.5952
Epoch [6/10], Step [10/20], Loss: 0.7423
Epoch [6/10], Step [20/20], Loss: 0.6994
Epoch [7/10], Step [10/20], Loss: 0.6176
Epoch [7/10], Step [20/20], Loss: 0.7064
Epoch [8/10], Step [10/20], Loss: 0.5604
Epoch [8/10], Step [20/20], Loss: 0.5913
Epoch [9/10], Step [10/20], Loss: 0.5919
Epoch [9/10], Step [20/20], Loss: 0.6104
Epoch [10/10], Step [10/20], Loss: 0.5286
Epoch [10/10], Step [20/20], Loss: 0.5792
Epoch [11/10], Step [10/20], Loss: 0.6516
Epoch [11/10], Step [20/20], Loss: 0.4480
Epoch [12/10], Step [10/20], Loss: 0.5679
Epoch [12/10], Step [20/20], Loss: 0.5877
Epoch [13/

# Normal training binary classification model

In [123]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

binary_classification_model = ClassificationModel(10,3,1).to(device)
binary_train_dataset = CheckDataset(data_path = "./data/binary_input2.npy",
                                                labels_path= "./data/binary_labels2.npy")
binary_test_dataset = CheckDataset(data_path = "./data/binary_test2.npy",
                                               labels_path= "./data/binary_labels_test2.npy")

binary_classification_train_loader = DataLoader(dataset=binary_train_dataset, batch_size=10, shuffle=True)
binary_classification_test_loader = DataLoader(dataset=binary_test_dataset, batch_size=10, shuffle=False)
optimizer = torch.optim.SGD(binary_classification_model.parameters(), lr=0.01)
loss_fn = nn.BCELoss()

n_total_steps = len(binary_classification_train_loader)
num_epochs = 10

for epoch in range(100):
    for i, (inputs, labels) in enumerate(binary_classification_train_loader):

        # forward pass and loss
        outputs = binary_classification_model(inputs)
        loss = loss_fn(outputs, labels)

        # backward
        loss.backward()

        # compute gradients
        optimizer.step()

        # clear gradients
        optimizer.zero_grad()

        if (i+1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

print("Finish training")

print("Accuracy on training dataset: {}".format(get_acc(binary_classification_model, binary_train_dataset)))
print("Accuracy on testing dataset: {}".format(get_acc(binary_classification_model, binary_test_dataset)))

Epoch [1/10], Step [10/40], Loss: 0.6815
Epoch [1/10], Step [20/40], Loss: 0.5005
Epoch [1/10], Step [30/40], Loss: 0.4707
Epoch [1/10], Step [40/40], Loss: 0.4676
Epoch [2/10], Step [10/40], Loss: 0.3013
Epoch [2/10], Step [20/40], Loss: 0.4104
Epoch [2/10], Step [30/40], Loss: 0.3389
Epoch [2/10], Step [40/40], Loss: 0.3684
Epoch [3/10], Step [10/40], Loss: 0.2889
Epoch [3/10], Step [20/40], Loss: 0.2762
Epoch [3/10], Step [30/40], Loss: 0.2814
Epoch [3/10], Step [40/40], Loss: 0.2562
Epoch [4/10], Step [10/40], Loss: 0.2880
Epoch [4/10], Step [20/40], Loss: 0.2247
Epoch [4/10], Step [30/40], Loss: 0.2145
Epoch [4/10], Step [40/40], Loss: 0.2635
Epoch [5/10], Step [10/40], Loss: 0.2131
Epoch [5/10], Step [20/40], Loss: 0.1925
Epoch [5/10], Step [30/40], Loss: 0.2147
Epoch [5/10], Step [40/40], Loss: 0.2258
Epoch [6/10], Step [10/40], Loss: 0.2081
Epoch [6/10], Step [20/40], Loss: 0.2095
Epoch [6/10], Step [30/40], Loss: 0.1645
Epoch [6/10], Step [40/40], Loss: 0.1757
Epoch [7/10], St

# Normal training multiclass classification model

In [124]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

classification_model = ClassificationModel(10,3,4,multiclass=True).to(device)
multiclass_train_dataset = CheckDataset(data_path = "./data/binary_input4.npy",
                                          labels_path= "./data/binary_labels4.npy",
                                          multiclass= True)
multiclass_test_dataset = CheckDataset(data_path = "./data/binary_test4.npy",
                                         labels_path= "./data/binary_labels_test4.npy",
                                         multiclass= True)

train_loader = DataLoader(dataset=multiclass_train_dataset, batch_size=10, shuffle=True)
test_loader = DataLoader(dataset=multiclass_test_dataset, batch_size=10, shuffle=False)
optimizer = torch.optim.SGD(classification_model.parameters(), lr=0.01)
loss_fn = nn.CrossEntropyLoss()

n_total_steps = len(train_loader)
num_epochs = 10

for epoch in range(100):
    for i, (inputs, labels) in enumerate(train_loader):

        # forward pass and loss
        outputs = classification_model(inputs)

        labels = labels.type(torch.LongTensor)
        loss = loss_fn(outputs, labels)

        # backward
        loss.backward()

        # compute gradients
        optimizer.step()

        # clear gradients
        optimizer.zero_grad()

        if (i+1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

print("Finish training")

print("Accuracy on training dataset: {}".format(get_acc(classification_model, binary_train_dataset, multiclass=True)))
print("Accuracy on testing dataset: {}".format(get_acc(classification_model, multiclass_test_dataset, multiclass=True)))

  x = self.ac(x)


Epoch [1/10], Step [10/40], Loss: 1.3979
Epoch [1/10], Step [20/40], Loss: 1.3784
Epoch [1/10], Step [30/40], Loss: 1.3936
Epoch [1/10], Step [40/40], Loss: 1.3890
Epoch [2/10], Step [10/40], Loss: 1.3735
Epoch [2/10], Step [20/40], Loss: 1.3930
Epoch [2/10], Step [30/40], Loss: 1.3972
Epoch [2/10], Step [40/40], Loss: 1.4001
Epoch [3/10], Step [10/40], Loss: 1.3732
Epoch [3/10], Step [20/40], Loss: 1.3842
Epoch [3/10], Step [30/40], Loss: 1.3967
Epoch [3/10], Step [40/40], Loss: 1.3881
Epoch [4/10], Step [10/40], Loss: 1.3952
Epoch [4/10], Step [20/40], Loss: 1.3784
Epoch [4/10], Step [30/40], Loss: 1.3848
Epoch [4/10], Step [40/40], Loss: 1.3756
Epoch [5/10], Step [10/40], Loss: 1.4074
Epoch [5/10], Step [20/40], Loss: 1.3764
Epoch [5/10], Step [30/40], Loss: 1.3613
Epoch [5/10], Step [40/40], Loss: 1.3777
Epoch [6/10], Step [10/40], Loss: 1.3554
Epoch [6/10], Step [20/40], Loss: 1.3781
Epoch [6/10], Step [30/40], Loss: 1.3878
Epoch [6/10], Step [40/40], Loss: 1.3776
Epoch [7/10], St