In [None]:
# https://www.geeksforgeeks.org/implementing-an-autoencoder-in-pytorch/
import torch
from torchvision import datasets
from torchvision import transforms
import matplotlib.pyplot as plt


In [None]:
# Transforms images to a PyTorch Tensor
tensor_transform = transforms.ToTensor()

# Download the MNIST Dataset
dataset = datasets.MNIST(root = "./aidata",
						train = True,
						download = True,
						transform = tensor_transform)

# DataLoader is used to load the dataset
# for training
loader = torch.utils.data.DataLoader(dataset = dataset,
									batch_size = 32,
									shuffle = True)

In [None]:
import loader as load
import processor as pr

target="tumor"
c="STAD"
data, files_names = load.loadAll(includeStage=(target=="stage"), sameSamples=True, skipGenes=True)
ge_genus, ge_genus_name = data[-1], files_names[-1]
ge_genus = load.attachTumorStatus(ge_genus)

x, y = pr.splitData(ge_genus, target=target, project=c)
x = x.drop(x.iloc[:, 5:5216], axis=1)
x

In [44]:
# https://towardsdatascience.com/how-to-use-datasets-and-dataloader-in-pytorch-for-custom-text-data-270eed7f7c00
from torch.utils.data import Dataset, DataLoader
class OverlapDataset(Dataset):
    """Genus + GE dataset."""

    def __init__(self, target, cancer):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        data, files_names = load.loadAll(includeStage=(target=="stage"), sameSamples=True, skipGenes=True)
        ge_genus, ge_genus_name = data[-1], files_names[-1]

        if target=="tumor":
            ge_genus = load.attachTumorStatus(ge_genus)
        else:
            ge_genus = load.attachStageStatus(ge_genus)

        x, y = pr.splitData(ge_genus, target=target, project=cancer)
        x = x.drop(x.iloc[:, 5:5216], axis=1)
        
        self.modality_features = x
        self.targets = y


    def __len__(self):
        return len(self.modality_features)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        sample_features = self.modality_features.iloc[idx].values
        sample_target = self.targets.iloc[idx]
        sample = {'features': sample_features, 'target': sample_target}

        return sample

In [45]:
overlapped = OverlapDataset("tumor", "STAD")
# Display text and label.
print('\nFirst iteration of data set: ', next(iter(overlapped)), '\n')
# Print how many items are in the data set
print('Length of data set: ', len(overlapped), '\n')
# Print entire data set
print('Entire data set: ', list(DataLoader(overlapped))[:2], '\n')


First iteration of data set:  {'features': array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.65618724, 0.77328287, 0.14825781, 0.18096101, 0.53191298]), 'target': 1} 

Length of data set:  122 

Entire data set:  [{'features': tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.6562, 0.7733, 0.1483, 0.1810,
         0.5319]], dtype=torch.float64), 'target': tensor([1])}, {'features': tensor([[0.0000, 0.0000, 0.0000, 0.1066, 0.0000, 0.5604, 0.7994, 0.0535, 0.1874,
         0.5761]], dtype=torch.float64), 'target': tensor([1])}] 



In [None]:
# Creating a PyTorch class
# 28*28 ==> 9 ==> 28*28
class AE(torch.nn.Module):
	def __init__(self):
		super().__init__()
		
		# Building an linear encoder with Linear
		# layer followed by Relu activation function
		# 784 ==> 9
		self.encoder = torch.nn.Sequential(
			torch.nn.Linear(10, 128),
			torch.nn.ReLU(),
			torch.nn.Linear(128, 64),
			torch.nn.ReLU(),
			torch.nn.Linear(64, 36),
			torch.nn.ReLU(),
			torch.nn.Linear(36, 18),
			torch.nn.ReLU(),
			torch.nn.Linear(18, 9)
		)
		
		# Building an linear decoder with Linear
		# layer followed by Relu activation function
		# The Sigmoid activation function
		# outputs the value between 0 and 1
		# 9 ==> 784
		self.decoder = torch.nn.Sequential(
			torch.nn.Linear(9, 18),
			torch.nn.ReLU(),
			torch.nn.Linear(18, 36),
			torch.nn.ReLU(),
			torch.nn.Linear(36, 64),
			torch.nn.ReLU(),
			torch.nn.Linear(64, 128),
			torch.nn.ReLU(),
			torch.nn.Linear(128, 10),
			torch.nn.Sigmoid()
		)

	def forward(self, x):
		encoded = self.encoder(x)
		decoded = self.decoder(encoded)
		return encoded, decoded


In [None]:
# Model Initialization
model = AE()

# Validation using MSE Loss function
loss_function = torch.nn.MSELoss()

# Using an Adam Optimizer with lr = 0.1
optimizer = torch.optim.Adam(model.parameters(),
							lr = 1e-1,
							weight_decay = 1e-8)

In [None]:
print(loader.dataset)

In [None]:
epochs = 1
outputs = []
losses = []
for epoch in range(epochs):
	for (image, _) in loader:
	
		# Reshaping the image to (-1, 784)
		image = image.reshape(-1, 28*28)
		
		# Output of Autoencoder
		hidden, reconstructed = model(image)
		
		# Calculating the loss function
		loss = loss_function(reconstructed, image)
		print(hidden)
		
		# The gradients are set to zero,
		# the gradient is computed and stored.
		# .step() performs parameter update
		optimizer.zero_grad()
		loss.backward()
		optimizer.step()
		
		# Storing the losses in a list for plotting
		losses.append(loss.detach())
		outputs.append((epochs, image, reconstructed))

# Defining the Plot Style
plt.style.use('fivethirtyeight')
plt.xlabel('Iterations')
plt.ylabel('Loss')

# Plotting the last 100 values
plt.plot(losses[-100:])

In [None]:
for i, item in enumerate(image):
    # Reshape the array for plotting
    item = item.reshape(-1, 28, 28)
    plt.imshow(item[0].detach())

for i, item in enumerate(reconstructed):
    item = item.reshape(-1, 28, 28)
    # plt.imshow(item[0].detach())
