In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

import torchtext
from torchtext.legacy.datasets import TranslationDataset, Multi30k
from torchtext.legacy.data import Field, BucketIterator

import spacy

import random
import math
import time

import matplotlib
matplotlib.rcParams.update({'figure.figsize': (16, 12), 'font.size': 14})
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import clear_output

from nltk.tokenize import WordPunctTokenizer
from subword_nmt.learn_bpe import learn_bpe
from subword_nmt.apply_bpe import BPE
import scipy
import numpy as np

Microsoft Visual C++ Redistributable is not installed, this may lead to the DLL load failure.
                 It can be downloaded at https://aka.ms/vs/16/release/vc_redist.x64.exe


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
device

device(type='cpu')

In [3]:
device = torch.device('cuda:0')

In [4]:
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, enc_hid_dim, dec_hid_dim, n_layers, dropout):
        super().__init__()
#         self.embedding = nn.Embedding(input_dim, emb_dim)
        self.rnn = nn.GRU(input_dim, enc_hid_dim, n_layers, bidirectional = True)
        self.linear = nn.Linear(enc_hid_dim * 2, dec_hid_dim)
        self.dropout = nn.Dropout(dropout)
        self.hid_dim=enc_hid_dim
        self.n_layers=n_layers
        
    def forward(self, src):
#         embedded = self.embedding(src)
#         embedded = self.dropout(embedded)     
#         print(embedded.shape)
        outputs, hidden = self.rnn(src)
#         print('enc hidden={}'.format(hidden.shape))
        concat_hiddens = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1)
        concat_hiddens = self.linear(concat_hiddens)
        concat_hiddens = torch.tanh(concat_hiddens)
        return outputs, concat_hiddens

class Attention(nn.Module):
    def __init__(self, enc_hid_dim, dec_hid_dim):
        super().__init__()
        self.attn = nn.Linear((enc_hid_dim * 2) + dec_hid_dim, dec_hid_dim)
        self.v = nn.Parameter(torch.rand(dec_hid_dim))
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, hidden, encoder_outputs):
        batch_size = encoder_outputs.shape[1]
        src_len = encoder_outputs.shape[0]
        # print(hidden.shape)
        hidden = hidden.unsqueeze(1)
        # print(hidden.shape)
        hidden = hidden.repeat(1, src_len, 1)
        encoder_outputs = encoder_outputs.permute(1, 0, 2)
        energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim = 2))) 
        energy = energy.permute(0, 2, 1)
        v = self.v.repeat(batch_size, 1).unsqueeze(1)
        attention = torch.bmm(v, energy).squeeze(1)
        attention = self.softmax(attention)
        return attention

In [13]:
def batch_generator(X,D,count_of_delays_elements,batch_size=10):
    for j in range(0,X.shape[0],batch_size):
        yield torch.cat(([X[i-count_of_delays_elements:i+1,:,:] if i > count_of_delays_elements else \
        torch.cat((torch.zeros(count_of_delays_elements-i,1,2),X[:i+1,:,:]),dim=0) 
            for i in range(j,batch_size+j)]),dim=1),\
        torch.cat(([D[i-count_of_delays_elements:i+1,:,:] if i > count_of_delays_elements else \
        torch.cat((torch.zeros(count_of_delays_elements-i,1,2),D[:i+1,:,:]),dim=0) 
            for i in range(j,batch_size+j)]),dim=1)
#         D[:,i:i+1,:]

In [14]:
name = 'Data/BlackBoxData_80'
# name = 'BlackBoxData'
# name = '../BlackBoxData/data1'
mat = scipy.io.loadmat(name)
x = np.array(mat['x']).reshape(-1,1)/2**15
d = np.array(mat['y']).reshape(-1,1)/2**15
# x = np.array(mat['xE']).reshape(-1,1)/2**15
# d = np.array(mat['d']).reshape(-1,1)/2**15
# x, d = mat['xE'], mat['d']
x_real, x_imag = torch.from_numpy(np.real(x)), torch.from_numpy(np.imag(x))
d_real, d_imag = torch.from_numpy(np.real(d)), torch.from_numpy(np.imag(d))
X = torch.DoubleTensor(torch.cat((x_real, x_imag))).reshape(2,-1,1).type(torch.FloatTensor).permute(1,2,0)
D = torch.DoubleTensor(torch.cat((d_real, d_imag))).reshape(2,-1,1).type(torch.FloatTensor).permute(1,2,0)

In [15]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        
    def forward(self, src, trg, teacher_forcing_ratio = 0.5):
        
        batch_size = src.shape[1]
        max_len = trg.shape[0]
        trg_vocab_size = self.decoder.output_dim
        
        outputs = torch.zeros_like( trg).to(self.device)
 
        encoder_outputs, hidden = self.encoder(src)
#         print('seq2seq enc_outputs={} hidden={}'.format(encoder_outputs.shape,hidden.shape))
                
        input = trg[0]
        
        for t in range(1, max_len):
#             print('seq2seq_input={}'.format(input.shape))
            output, hidden = self.decoder(input, hidden, encoder_outputs)
#             print(output.shape)
            outputs[t,:,:] = output
            
            teacher_force = random.random() < teacher_forcing_ratio
            
            top1 = output
#             print('seq2seq_top1={}'.format(top1))

            input = trg[t] if teacher_force else top1

        return outputs

In [16]:
# import my_network
# Encoder = Encoder
# Decoder = Decoder
# Seq2Seq = Seq2Seq

In [17]:
device=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [18]:
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, enc_hid_dim, dec_hid_dim,n_layers, dropout):
        super().__init__()
        self.output_dim = output_dim
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.attention = Attention(enc_hid_dim, dec_hid_dim).to(device)
#         self.embedding = nn.Embedding(output_dim, emb_dim)
        self.rnn = nn.GRU((enc_hid_dim)+output_dim + emb_dim, dec_hid_dim,n_layers)
        self.out = nn.Linear((enc_hid_dim*2) + output_dim + dec_hid_dim , output_dim)
        self.dropout = nn.Dropout(dropout)
        self.hid_dim=enc_hid_dim
        self.n_layers=n_layers
        
    def forward(self, input, hidden, encoder_outputs):
        
        input = input.unsqueeze(0)
#         print('inp={}'.format(input))
#         embedded = self.embedding(input)
#         embedded = self.dropout(embedded)
        
        att = self.attention(hidden, encoder_outputs)
#         print('hid={},enc_outputs={},input={},att={}'.format(hidden.shape,encoder_outputs.shape,input.shape,att.shape))

        att = att.unsqueeze(1)

        encoder_outputs = encoder_outputs.permute(1, 0, 2)        
        weighted = torch.bmm(att, encoder_outputs)
#         print('dec enc_out={}, weighted={}, att={}'.format(encoder_outputs.shape,weighted.shape,att.shape))
        weighted = weighted.permute(1, 0, 2)
#         print('weight={}'.format(torch.unique(weighted)))
        rnn_input = torch.cat((input, weighted), dim = 2)        
#         print('dec weighted={}, rnn_inp={}'.format(weighted.shape,rnn_input.shape))

        output, hidden = self.rnn(rnn_input, hidden.unsqueeze(0))
#         print(torch.unique(rnn_input))
#         print(torch.unique(weighted))
#         print('kjnfrkjernfkjen')
#         embedded = embedded.squeeze(0)
        output = output.squeeze(0)
        weighted = weighted.squeeze(0)
        input = input.squeeze(0)
#         print('dec weighted={}, output={}'.format(weighted.shape,output.shape))

#         print('out_shape={}'.format(torch.cat((output, weighted, input), dim = 1).shape))
        output = self.out(torch.cat((output, weighted, input), dim = 1))
#         print('dec_out={}'.format(output))
        
        return output, hidden.squeeze(0)


In [19]:
INPUT_DIM = 2
OUTPUT_DIM = 2
ENC_EMB_DIM = 2
DEC_EMB_DIM = 256
HID_DIM = 256
N_LAYERS = 2
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, HID_DIM,1, DEC_DROPOUT)

# dont forget to put the model to the right device
model = Seq2Seq(enc, dec, device).to(device)
train_dataloader=batch_generator(X,D,10,6)
# src,v=train_dataloader.__next__()
# model(src, v, teacher_forcing_ratio = 0.9)

In [20]:
def init_weights(m):
    # <YOUR CODE HERE>
    for name, param in m.named_parameters():
        nn.init.uniform_(param, -0.008, 0.008)
        
model.apply(init_weights)

Seq2Seq(
  (encoder): Encoder(
    (rnn): GRU(2, 256, num_layers=2, bidirectional=True)
    (linear): Linear(in_features=512, out_features=256, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Decoder(
    (attention): Attention(
      (attn): Linear(in_features=768, out_features=256, bias=True)
      (softmax): Softmax(dim=1)
    )
    (rnn): GRU(514, 256)
    (out): Linear(in_features=770, out_features=2, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
)

In [28]:
def train(model, iterator, optimizer, criterion, clip, train_history=None, valid_history=None):
    model.train()
    
    epoch_loss = 0
    history = []
    output_sig=[]
    output_sig_for_acc=torch.zeros((1,2))
    i=1
    for i, batch in enumerate(iterator):
        
        src = batch[0]
        trg = batch[1]
        
        optimizer.zero_grad()
        
        output = model(src, trg)
        output_sig=np.hstack((output_sig,
                              np.apply_along_axis(lambda args: [complex(*args)],
                                                  1, (output[-1,:,:].data)).reshape(-1)))
        output_sig_for_acc=torch.cat((output_sig_for_acc,output[-1,:,:]))
        output = output.view(-1)
#         print(output.shape)
        trg = trg.view(-1)
#         print(trg.shape)
        loss = criterion(output, trg)
        
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()
        
        epoch_loss += loss.item()
        
        history.append(loss.item())
#         print('i={}'.format(i))

    return epoch_loss / i, output_sig, output_sig_for_acc

In [22]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [23]:
train_history = []
valid_history = []

N_EPOCHS = 10
CLIP = 1

# best_valid_loss = float('inf')

# PAD_IDX = TRG.vocab.stoi['<pad>']
# criterion = nn.CrossEntropyLoss(ignore_index = PAD_IDX)
loss_fn = nn.MSELoss()


optimizer = optim.Adam(model.parameters())
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[2,4,6,8,10,12], gamma=0.8)

In [24]:
def NMSE(X, E):
    return 10 * torch.log10((torch.pow((E).norm(dim=2), 2)).sum() / (torch.pow((X).norm(dim=2), 2)).sum())

In [29]:
accuracy=[]
train_dataloader=batch_generator(X,D,10,6)
best_valid_loss=0
for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    
    train_loss , signal_for_drawing,signal= train(model, train_dataloader, optimizer, loss_fn, CLIP, train_history, valid_history)
#     valid_loss = evaluate(model, valid_iterator, criterion)
    scheduler.step()
    end_time = time.time()
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    signal=signal.unsqueeze(1)
    accuracy.append(NMSE(X,D-signal).item())
    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12, 8))
    
    clear_output(True)
    ax[0].plot(accuracy, label='train NMSE')
    ax[0].set_xlabel('Batch')
    ax[0].set_title('Train loss')
    ax[1].psd(x.reshape(-1),NFFT=2048,label='X')
    ax[1].psd(d.reshape(-1),NFFT=2048,label='D')
#     outputs=torch.complex(outputs[0,:,:],outputs[1,:,:]).detach().cpu().view(-1)
    ax[1].psd(no,NFFT=2048,label='output')
    ax[1].psd(d.reshape(-1)-no,NFFT=2048,label='eOut')
    ax[1].legend()
    ax[1].grid()
    # Draw figure on canvas
#     fig.canvas.draw()
#     if train_history is not None:
#         ax[1].plot(train_history, label='general train history')
#         ax[1].set_xlabel('Epoch')
#     if valid_history is not None:
#         ax[1].plot(valid_history, label='general valid history')
#     plt.legend()

#     plt.show()
    
    if accuracy[-1] < best_valid_loss:
        best_valid_loss = accuracy[-1] 
        torch.save(model.state_dict(), './experiment_data/att_dpd/tut1-model.pt')
    
    train_history.append(train_loss)
    valid_history.append(valid_loss)
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {accuracy[-1]:.3f}')

RuntimeError: The size of tensor a (99840) must match the size of tensor b (99841) at non-singleton dimension 0

In [32]:
signal.shape

torch.Size([99841, 1, 2])

In [41]:
d.reshape(-1)

(99840,)

In [34]:
NMSE(X,D-signal[1:,:,:])

tensor(-16.2566, grad_fn=<MulBackward0>)

__Let's take a look at our network quality__:

In [None]:
model.load_state_dict(torch.load('tut1-model.pt'))

<All keys matched successfully>

In [None]:
import utils
import imp
imp.reload(utils)
generate_translation = utils.generate_translation
remove_tech_tokens = utils.remove_tech_tokens
get_text = utils.get_text
flatten = utils.flatten

In [None]:
batch = next(iter(test_iterator))

In [None]:
for idx in [1,2]:
    src = batch.src[:, idx:idx+1]
    trg = batch.trg[:, idx:idx+1]
    generate_translation(src, trg, model, TRG.vocab)

Original: the property offers free parking .
Generated: wichita free parking is available on site . parking is provided .

Original: you will find a kettle in the room .
Generated: wichita you will find a kettle in the room . room . kettle . kettle



In [None]:
from nltk.translate.bleu_score import corpus_bleu

In [None]:
import tqdm

In [None]:
original_text = []
generated_text = []
model.eval()
with torch.no_grad():

    for i, batch in tqdm.tqdm(enumerate(test_iterator)):

        src = batch.src
        trg = batch.trg

        output = model(src, trg, 0) #turn off teacher forcing

        output = output.argmax(dim=-1)
        
        original_text.extend([get_text(x, TRG.vocab) for x in trg.cpu().numpy().T])
        generated_text.extend([get_text(x, TRG.vocab) for x in output.detach().cpu().numpy().T])

59it [00:07,  3.69it/s]


In [None]:
corpus_bleu([[text] for text in original_text], generated_text) * 100

25.092799185499636


Baseline solution BLEU score is quite low. Try to achieve at least __18__ BLEU on the test set. 
The checkpoints are:

* __18__ - minimal score to submit the homework, 30% of points

* __20__ - good score, 70% of points

* __25__ - excellent score, 100% of points

In [119]:
#oold version
def batch_generator(X,D,count_of_delays_elements,batch_size=10):
    for j in range(0,X.shape[1],10):
        yield X[:,i-count_of_delays_elements:i+1,:]if i > count_of_delays_elements else \
        torch.cat((torch.zeros(1,count_of_delays_elements-i,2),X[:,:i+1,:]),dim=1),\
        D[:,i-count_of_delays_elements:i+1,:]if i > count_of_delays_elements else \
        torch.cat((torch.zeros(1,count_of_delays_elements-i,2),D[:,:i+1,:]),dim=1)
#         D[:,i:i+1,:]