# <center> Autoencoder to encode Three Meter Dataset
## <center> Using a simple 2-layer encoder
### <center> Saumya Rawat
    

In [1]:
import os
import numpy as np
import torch
import torchvision
from torch import nn
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import normalize

# Dataset introduction and loading

The dataset consists of magnetic field data in the three meter experiment, collected from the surface of a spinning ball of molten sodium designed to simulate the inner core of the earth in IREAP, University of Maryland.

The data has been fit with a simple feed forward autoencoder with MAE loss. The data runs for 10 epochs with a batch size of 60 and a learning rate 0.0001


In [2]:
# Hyper parameters
num_epochs = 10
batch_size = 60
learning_rate = 0.00001

In [3]:
in_data = np.genfromtxt('Three Meter/data.csv', delimiter=',')
in_dim = len(in_data[0])

data = normalize(in_data, axis=1, norm='l1')

class ThreeMeterDataset(Dataset):
    def __init__(self):
        X = torch.stack([torch.Tensor(i) for i in data]) # transform to torch tensors
        self.X = X
        
    def __len__(self):
        return len(self.X)
        
    def __getitem__(self, index):
        item = self.X[index]
        return item


# Data loader
dataset = ThreeMeterDataset()
data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                            batch_size=batch_size,
                                            shuffle=True)

With lowest dimension = 4


In [4]:
class Autoencoder(nn.Module):
    def __init__(self, in_dim=in_dim):
        super(Autoencoder, self).__init__()

        self.encoder = nn.Sequential(
            nn.Linear(in_dim, 20),
            nn.ReLU(),
            nn.Linear(20, 16),
            nn.ReLU(),
            nn.Linear(16, 4),
            nn.ReLU()
            )

        self.decoder = nn.Sequential(
            nn.Linear(4, 16),
            nn.ReLU(),
            nn.Linear(16, 20),
            nn.ReLU(),
            nn.Linear(20,in_dim),
            nn.Sigmoid()
            )


    def forward(self, x):
        """
        Note: image dimension conversion will be handled by external methods
        """
        out = self.encoder(x)
        out = self.decoder(out)
        return out

def MAELoss(output, data):
    batch_size = output.size()[0]
    loss = torch.abs(output-data)
    loss = loss.sum()/batch_size
    return loss

model = Autoencoder(in_dim=in_dim)
if torch.cuda.is_available():
    model.cuda()
criterion = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

MAE loss achieved: 0.0001

In [5]:
for epoch in range(num_epochs):
    for data in data_loader:
        if torch.cuda.is_available():
            data = Variable(data).cuda()
        # ===================forward=====================
        output = model(data)
        loss = criterion(output, data)
        #loss = MAELoss(output, data)
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # ===================log========================
    print('epoch [{}/{}], loss:{:.4f}'
          .format(epoch+1, num_epochs, loss.item()))


epoch [1/10], loss:0.4333
epoch [2/10], loss:0.3957
epoch [3/10], loss:0.3398
epoch [4/10], loss:0.2645
epoch [5/10], loss:0.1764
epoch [6/10], loss:0.0863
epoch [7/10], loss:0.0220
epoch [8/10], loss:0.0045
epoch [9/10], loss:0.0005
epoch [10/10], loss:0.0001
