<a href="https://colab.research.google.com/github/AntonYermilov/deep-unsupervised-learning/blob/task1/Task_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np

def sample_data(count=10000, seed=0):
    rand = np.random.RandomState(seed)
    a = 0.3 + 0.1 * rand.randn(count)
    b = 0.8 + 0.05 * rand.randn(count)
    mask = rand.rand(count) < 0.5
    samples = np.clip(a * mask + b * (1 - mask), 0.0, 1.0)
    return np.digitize(samples, np.linspace(0.0, 1.0, 100))

In [0]:
data = sample_data() - 1

In [0]:
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from torch.optim import Adam
from torch.nn import NLLLoss, CrossEntropyLoss
from sklearn.model_selection import train_test_split
from tqdm import tqdm, trange

import plotly.offline as py
import plotly.graph_objs as go
import plotly.express as px

In [0]:
train_input, val_input = train_test_split(data, random_state=1, test_size=0.2)
train_input, val_input = torch.tensor(train_input), torch.tensor(val_input)

In [0]:
batch_size = 32

train_data = TensorDataset(train_input)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

val_data = TensorDataset(val_input)
val_sampler = SequentialSampler(val_data)
val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=val_input.shape[0])

In [0]:
theta = torch.randn(100)
theta.requires_grad = True
params = [theta]

In [0]:
optimizer = Adam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-8)
loss_fn = NLLLoss()

In [100]:
train_losses, val_losses = [0], [0]

for i, batch in enumerate(train_dataloader):
    target = batch[0]

    with torch.no_grad():
        pred = F.log_softmax(theta)
        pred = pred.repeat(target.shape[0], 1)
        loss = loss_fn(pred, target)

    train_losses[0] += loss.item()

for i, batch in enumerate(val_dataloader):
    target = batch[0]

    with torch.no_grad():
        pred = F.log_softmax(theta)
        pred = pred.repeat(target.shape[0], 1)
        loss = loss_fn(pred, target)

    val_losses[0] += loss.item()

train_losses[0] /= len(train_dataloader)
val_losses[0] /= len(val_dataloader)


Implicit dimension choice for log_softmax has been deprecated. Change the call to include dim=X as an argument.


Implicit dimension choice for log_softmax has been deprecated. Change the call to include dim=X as an argument.



In [101]:
epochs = 50

for epoch in range(epochs):
    nlll_train, nlll_val = 0, 0

    for i, batch in enumerate(train_dataloader):
        target = batch[0]
        optimizer.zero_grad()

        pred = F.log_softmax(theta)
        pred = pred.repeat(target.shape[0], 1)
        loss = loss_fn(pred, target)
        nlll_train += loss.item()

        loss.backward()
        optimizer.step()

    for i, batch in enumerate(val_dataloader):
        target = batch[0]

        with torch.no_grad():
            pred = F.log_softmax(theta)
            pred = pred.repeat(target.shape[0], 1)
            loss = loss_fn(pred, target)

        nlll_val += loss.item()

    train_losses.append(nlll_train / len(train_dataloader))
    val_losses.append(nlll_val / len(val_dataloader))
    if (epoch + 1) % 5 == 0:
        print(f'Epoch {epoch + 1}: NLLL_Train={train_losses[-1]:.8f}, NLLL_Val={val_losses[-1]:.8f}')


Implicit dimension choice for log_softmax has been deprecated. Change the call to include dim=X as an argument.


Implicit dimension choice for log_softmax has been deprecated. Change the call to include dim=X as an argument.



Epoch 50: NLLL_Train=4.03852664, NLLL_Val=4.03409338


In [0]:
def plot(data, theta, epochs, train_losses, val_losses):
    train = go.Scatter(x=np.arange(epochs + 1), y=train_losses, mode='lines', name='train loss')
    val = go.Scatter(x=np.arange(epochs + 1), y=val_losses, mode='lines', name='validation loss')
    traces = [train, val]
    layout = go.Layout(title='Losses', xaxis={'title': 'Epoch'}, yaxis={'title': 'NLLLoss'})
    figure = go.Figure(data=traces, layout=layout)
    figure.show()

    probs = F.softmax(theta).detach().numpy()
    samples = np.random.choice(data, size=1000)
    samples = np.bincount(samples) / 1000

    figure = go.Figure(data=[
        go.Bar(name='sample distribution', x=np.arange(100), y=samples),
        go.Bar(name='predicted distribution', x=np.arange(100), y=probs)
    ])
    # figure.update_layout(title='Distribution', barmode='stack')
    figure.update_layout(title='Distribution')
    figure.show()

In [103]:
plot(data, theta, epochs, train_losses, val_losses)


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.

