# Import all libraries

In [None]:
from Mnist_Classifier_SNN import MNIST_SNN
from Mnist_Classifier_CNN import MNIST_CNN
from Mnist_Classifier_NN import MNIST_NN

import random
from tqdm import tqdm
import torch
import torch.nn as nn
from torchsummary import summary
import torchvision
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error as mse

### Check if GPU is available

In [None]:
# import ray
# import os
# import random
# import sys
#
# def init_cluster():
#     ray.init(address='auto')
#     print(f"This cluster consists of {len(ray.nodes())} nodes and "
#           f"{ray.cluster_resources()['CPU']} CPUs in total.")
#     return int(ray.cluster_resources()['CPU'])
#
#
# @ray.remote
# def throw_points(n):
#     print(f"{os.getpid()} on {os.uname().nodename} is started")
#     i = 0
#     for _ in range(n):
#         x = random.uniform(-1, 1)
#         y = random.uniform(-1, 1)
#         if x**2 + y**2 <= 1:
#             i += 1
#     print(f"{os.getpid()} on {os.uname().nodename} is finished")
#     return i
#
#
# def main():
#     sys.stderr = open(os.devnull, "w")
#     number_of_cpu = init_cluster()
#     n = 10000000
#     inner = ray.get([throw_points.remote(n) for _ in range(number_of_cpu)])
#     pi = 4 * sum(inner) / (number_of_cpu*n)
#     print(f"Estimated Pi value is {pi:.8f}")
#
#
# if __name__ == "__main__":
#     main()

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if device.type == 'cuda':
    !nvidia-smi
    print(torch.cuda.get_device_name(0))

else:
    print("No GPU :(")


# Load MNIST dataset

In [None]:
dataset = torchvision.datasets.MNIST(root = 'datasets', train = True, download = True, transform=torchvision.transforms.ToTensor())

#### Get and visualize random individual

In [None]:
rand_individ = random.randint(0, 60_000)
img = dataset[rand_individ][0].view((28, 28))
ans = dataset[rand_individ][1]
plt.imshow(img, cmap='Greys')
plt.xlabel(f"NUMBER: {ans}")
pass

# Prepare and split data into train/test

In [17]:
split_ratio = 0.90
batch_size = 50
train_size = round(len(dataset) * split_ratio)
valid_size = len(dataset) - train_size

# print(dataset.shape)
print(dataset)

train, valid = torch.utils.data.random_split(dataset, [train_size, valid_size])

train_batch_loader = torch.utils.data.DataLoader2(train, batch_size=batch_size, collate_fn=lambda x: tuple(x_.to(device) for x_ in torch.utils.data.dataloader.default_collate(x)))
valid_batch_loader = torch.utils.data.DataLoader2(valid, batch_size=batch_size, collate_fn=lambda x: tuple(x_.to(device) for x_ in torch.utils.data.dataloader.default_collate(x)))

Dataset MNIST
    Number of datapoints: 60000
    Root location: datasets
    Split: Train
    StandardTransform
Transform: ToTensor()


# Configure NN Model and hyper-parameters

In [None]:
mnist_nn = MNIST_NN(28*28, 10).to(device)

epochs = 5
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.ASGD(mnist_nn.parameters(), lr=0.8)

history = []
summary(mnist_nn, (28*28,), batch_size=batch_size)

# Train loop

In [None]:
def train_mnist_NN():
    for epoch in range(0, epochs):
        for i, (features, ans) in enumerate(tqdm(train_batch_loader)):
            # Forward pass
            Y_pred = mnist_nn.forward(features.view(-1, 28*28))

            loss = loss_func(Y_pred, ans)

            # Backward pass
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            if i % 3 == 0: history.append(loss.data)

train_mnist_NN()

# Training via RAY Cluster
* Firstly install corresponding version of python to each cluster node `python 3.7.13`.
* Install ray by `python3.7 -m pip install ray[default]`, not `ray`, cuz rather dashboard would not work. If you already have it installed, don't forget to update by `pip install -U ray[default]`.
* Start the cluster head node via ray CLI mode: `ray start --head`.
* The dashboard would be available on `http://localhost:8265/`.
* Add other nodes the same by CLI mode: `ray start --address=''`.

After that you would be able to execute this code:

In [None]:
import torch
import torch.nn as nn

import ray
from ray import train
from ray.air import session, Checkpoint
from ray.train.torch import TorchTrainer
from ray.air.config import ScalingConfig


def train_loop_per_worker():
    dataset_shard = session.get_dataset_shard("train")

    model = ray.train.torch.prepare_model(mnist_nn)

    for epoch in range(0, epochs):
        for batches in dataset_shard.iter_torch_batches(batch_size=batch_size, dtypes=torch.float):
            inputs, labels = torch.unsqueeze(batches["x"], 1), batches["y"]
            output = model(inputs)
            loss = loss_func(output, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print(f"epoch: {epoch}, loss: {loss.item()}")

        session.report(
            {},
            checkpoint=Checkpoint.from_dict(
                dict(epoch=epoch, model=model.state_dict())
            ),
        )


train_dataset = train.torch.prepare_data_loader(train_batch_loader)
valid_dataset = train.torch.prepare_data_loader(valid_batch_loader)

scaling_config = ScalingConfig(num_workers=3)
# If using GPUs, use the below scaling config instead.
# scaling_config = ScalingConfig(num_workers=3, use_gpu=True)
trainer = TorchTrainer(
    train_loop_per_worker=train_loop_per_worker,
    scaling_config=scaling_config,
    datasets={"train": train_dataset},
)
result = trainer.fit()

In [None]:
plt.plot(list(map(lambda x: x.cpu(), history)))
print(f"Mean CrossEntropyLoss (last 100): {sum(history[-100:]) / 100}")

# Validation

In [None]:
with torch.no_grad():
    answers = torch.zeros(10).to(device)
    predictions = torch.zeros(10).to(device)

    for i, (features, ans) in enumerate(tqdm(valid_batch_loader)):
        predictions = torch.vstack((predictions, nn.functional.normalize(mnist_nn(features.view(-1, 28*28)))))

        # Convert to one-hot encoded
        ans = nn.functional.one_hot(ans, num_classes=10)

        answers = torch.vstack((answers, ans))

    print(f"MSE: {mse(predictions.T.cpu(), answers.T.cpu())}")


# Visualisation test

In [None]:
rand_individ = random.randint(0, 60_000)
img = dataset[rand_individ][0].view((28, 28))
ans = dataset[rand_individ][1]


with torch.no_grad():
    pred = mnist_nn(img.to(device).view(-1)).argmax()

plt.imshow(img, cmap='Greys')
plt.xlabel(f"PREDICTION: {pred}, ANSWER {ans}")
pass 

# Configure CNN Model and hyper-parameters

In [None]:
mnist_cnn = MNIST_CNN().to(device)

epochs = 2
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(mnist_cnn.parameters(), lr=0.01)

history = []
summary(mnist_cnn, (1, 28, 28), batch_size=batch_size)

# Train loop

In [None]:
for epoch in range(0, epochs):
    for i, (features, ans) in enumerate(tqdm(train_batch_loader)):
        # Forward pass
        Y_pred = mnist_cnn.forward(features.view(-1, 1, 28, 28))
        # print(Y_pred.shape)

        loss = loss_func(Y_pred, ans)

        # Backward pass
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if i % 3 == 0: history.append(loss.data)

In [None]:
plt.plot(list(map(lambda x: x.cpu(), history)))
print(f"Mean CrossEntropyLoss (last 100): {sum(history[-100:]) / 100}")

# Validation

In [None]:
with torch.no_grad():
    answers = torch.zeros(10).to(device)
    predictions = torch.zeros(10).to(device)

    for i, (features, ans) in enumerate(tqdm(valid_batch_loader)):
        predictions = torch.vstack((predictions, nn.functional.normalize(mnist_cnn(features.view(-1, 1, 28, 28)))))

        # Convert to one-hot encoded
        ans = nn.functional.one_hot(ans, num_classes=10)

        answers = torch.vstack((answers, ans))

    print(f"MSE: {mse(predictions.T.cpu(), answers.T.cpu())}")