In [1]:
#coding=utf-8
import os
import re
import sys
import yaml
import argparse
from datetime import datetime

import numpy as np
import pandas as pd

import torch
from torch.autograd import Variable
import torch.nn as nn
from torch.utils.data import DataLoader

# utils
from utils import get_embedding, load_embed, save_embed, data_preprocessing
# data
from data import myDS, mytestDS
# model
from model import Siamese_lstm

In [2]:
!which python

/Users/liushijing/anaconda3/bin/python


In [3]:
config = {
    'experiment_name': 'siamese-transfer-baseline',
    'task': 'train',
    'make_dict': True,
    'data_preprocessing': True,

    'ckpt_dir': 'ckpt/',

    'training':{
        'num_epochs': 20,
        'learning_rate': 0.01,
        'optimizer': 'sgd'
    },
    
    'embedding':{
        'full_embedding_path': 'input/wiki.es.vec',
        'cur_embedding_path': 'input/embedding.pkl',
    },
        
    'model':{
        'fc_dropout': 0.1,
        'fc_dim': 100,
        'name': 'siamese',
        'embed_size': 300,
        'batch_size': 1,
        'embedding_freeze': False,
        'encoder':{
            'hidden_size': 150,
            'num_layers': 1,
            'bidirectional': False,
            'dropout': 0.5,
        },  
    },   
    
    'result':{
        'filename':'result.txt',
        'filepath':'res/',
    }
}

## Read Data

In [37]:
# Training data
df_train_en_sp = pd.read_csv('./input/cikm_english_train_20180516.txt', sep='	', header=None,
                             error_bad_lines=False)
df_train_sp_en = pd.read_csv('./input/cikm_spanish_train_20180516.txt', sep='	', header=None,
                             error_bad_lines=False)
df_train_en_sp.columns = ['english1', 'spanish1', 'english2', 'spanish2', 'result']
df_train_sp_en.columns = ['spanish1', 'english1', 'spanish2', 'english2', 'result']

sp1 = pd.DataFrame(pd.concat([df_train_en_sp['spanish1'], df_train_sp_en['spanish1']], axis=0))
sp2 = pd.DataFrame(pd.concat([df_train_en_sp['spanish2'], df_train_sp_en['spanish2']], axis=0))
sp = pd.concat([sp1, sp2], axis=1).reset_index()
sp = sp.drop(['index'], axis=1)

en1 = pd.DataFrame(pd.concat([df_train_en_sp['english1'], df_train_sp_en['english1']], axis=0))
en2 = pd.DataFrame(pd.concat([df_train_en_sp['english2'], df_train_sp_en['english2']], axis=0))
en = pd.concat([en1, en2], axis=1).reset_index()
en = en.drop(['index'], axis=1)

result = pd.DataFrame(pd.concat([df_train_en_sp['result'], df_train_sp_en['result']], axis=0)).reset_index()
result = result.drop(['index'], axis=1)
# pd.get_dummies(result['result']).head()
sp['label'] = result
en['label'] = result

In [38]:
# Evaluation data
test_data = pd.read_csv('./input/cikm_test_a_20180516.txt', sep='	', header=None, error_bad_lines=False)
test_data.columns = ['spanish1', 'spanish2']

## Clean Up

In [39]:
def clean_sent(sent):
    sent = sent.lower()
    sent = re.sub(u"[_'\-;%()|+&=*%.,!?:#$@\[\]/]",' ',sent)
    sent = re.sub('¡',' ',sent)
    sent = re.sub('¿',' ',sent)
    sent = re.sub('Á','á',sent)
    sent = re.sub('Ó','ó',sent)
    sent = re.sub('Ú','ú',sent)
    sent = re.sub('É','é',sent)
    sent = re.sub('Í','í',sent)
    return sent
def cleanSpanish(df):
    if (sys.version_info > (3, 0)):
        df['spanish1'] = df.spanish1.map(lambda x: ' '.join([word for word in
                                                             nltk.word_tokenize(clean_sent(x))]))
        df['spanish2'] = df.spanish2.map(lambda x: ' '.join([word for word in
                                                             nltk.word_tokenize(clean_sent(x))]))
    else:
        df['spanish1'] = df.spanish1.map(lambda x: ' '.join([ word for word in
                                                             nltk.word_tokenize(clean_sent(x).decode('utf-8'))]).encode('utf-8'))
        df['spanish2'] = df.spanish2.map(lambda x: ' '.join([ word for word in
                                                             nltk.word_tokenize(clean_sent(x).decode('utf-8'))]).encode('utf-8'))
def removeSpanishStopWords(df, stop):
    if (sys.version_info > (3, 0)):
        df['spanish1'] = df.spanish1.map(lambda x: ' '.join([word for word in nltk.word_tokenize(x)
                                                             if word not in stop]))
        df['spanish2'] = df.spanish2.map(lambda x: ' '.join([word for word in nltk.word_tokenize(x)
                                                             if word not in stop]))
    else:
        df['spanish1'] = df.spanish1.map(lambda x: ' '.join([word for word in nltk.word_tokenize(x.decode('utf-8'))
                                                             if word not in stop]).encode('utf-8'))
        df['spanish2'] = df.spanish2.map(lambda x: ' '.join([word for word in nltk.word_tokenize(x.decode('utf-8'))
                                                             if word not in stop]).encode('utf-8'))
        
def cleanEnglish(df):
    df['english1'] = df.english1.map(lambda x: ' '.join([word for word in
                                                     nltk.word_tokenize(clean_sent(x))]))
    df['english2'] = df.english2.map(lambda x: ' '.join([word for word in
                                                     nltk.word_tokenize(clean_sent(x))]))

def removeEnglishStopWords(df, stop):
    df['english1'] = df.english1.map(lambda x: ' '.join([word for word in nltk.word_tokenize(x) 
                                                               if word not in stop]))
    df['english2'] = df.english2.map(lambda x: ' '.join([word for word in nltk.word_tokenize(x) 
                                                               if word not in stop]))

In [40]:
import nltk
from nltk.corpus import stopwords
sp_stops = set(stopwords.words("spanish"))
en_stops = set(stopwords.words("english"))

In [41]:
cleanSpanish(sp)
removeSpanishStopWords(sp, sp_stops)

In [42]:
cleanEnglish(en)
removeEnglishStopWords(en, en_stops)

In [43]:
cleanSpanish(test_data)
removeSpanishStopWords(test_data, sp_stops)

## Check Empty Data

In [44]:
def check_dirty_train(df):
    df.replace('', np.nan, inplace=True)
    dirty_data = df[df.isnull().any(axis=1)]
    print('dirty sample count:', dirty_data.shape[0])
    print('positive dirty training sample:', len(dirty_data[dirty_data['label'] == 1]))
    print('negative dirty training sample:', len(dirty_data[dirty_data['label'] == 0]))
    print('Before Clean:', len(df))
    df = df.dropna()
    print('After Clean:', len(df))
    return df

In [45]:
sp = check_dirty_train(sp)

dirty sample count: 73
positive dirty training sample: 5
negative dirty training sample: 68
Before Clean: 21400
After Clean: 21327


In [46]:
en = check_dirty_train(en)

dirty sample count: 192
positive dirty training sample: 20
negative dirty training sample: 172
Before Clean: 21400
After Clean: 21208


In [47]:
test_data.replace('', np.nan, inplace=True)
# refill the empty row
test_data.iloc[1712, 0] = 'hola'
test_data.iloc[2349, 0] = 'hola'
test_data = test_data.dropna()
print('English sample count:', en.shape[0], '\nSpanish sample count:', sp.shape[0], '\nTest sample count:', test_data.shape[0])

English sample count: 21208 
Spanish sample count: 21327 
Test sample count: 5000


In [48]:
sp.to_csv("input/cleaned_sp.csv", index=False)
en.to_csv("input/cleaned_en.csv", index=False)
test_data.to_csv("input/cleaned_test.csv", index=False)

### Embedding

In [58]:
full_en_embed_path = 'input/wiki.en.vec'
cur_en_embed_path = 'input/en_embed.pkl'

print('Making embedding...')
en_embed_dict = get_embedding(enDS.vocab._id2word, full_en_embed_path)
save_embed(en_embed_dict, cur_en_embed_path)
print('Saved generated en embedding.')

Making embedding...
Found 2606/2685 words with embedding vectors
Missing Ratio: 2.94%
Filled missing words' embeddings.
Embedding Matrix Size:  2685
Embedding saved
Saved generated en embedding.


In [60]:
full_sp_embed_path = 'input/wiki.es.vec'
cur_sp_embed_path = 'input/sp_embed.pkl'

print('Making embedding...')
sp_embed_dict = get_embedding(spDS.vocab._id2word, full_sp_embed_path)
save_embed(sp_embed_dict, cur_sp_embed_path)
print('Saved generated sp embedding.')

Making embedding...
Found 5142/5767 words with embedding vectors
Missing Ratio: 10.84%
Filled missing words' embeddings.
Embedding Matrix Size:  5767
Embedding saved
Saved generated sp embedding.


## Sencond Start

In [4]:
en = pd.read_csv("input/cleaned_en.csv")
sp = pd.read_csv("input/cleaned_sp.csv")
test_data = pd.read_csv("input/cleaned_test.csv")

### Dataset

In [5]:
en.columns = ['s1', 's2', 'label']
# split dataset
msk = np.random.rand(len(en)) < 0.8
en_train = en[msk]
en_valid = en[~msk]
en_all_sents = en['s1'].tolist() + en['s2'].tolist()

# dataset
en_trainDS = myDS(en_train, en_all_sents)
en_validDS = myDS(en_valid, en_all_sents)

In [6]:
sp.columns = ['s1', 's2', 'label']
# split dataset
msk = np.random.rand(len(sp)) < 0.8
sp_train = sp[msk]
sp_valid = sp[~msk]
sp_all_sents = sp['s1'].tolist() + sp['s2'].tolist()

# dataset
sp_trainDS = myDS(sp_train, sp_all_sents)
sp_validDS = myDS(sp_valid, sp_all_sents)

### Embedding

In [7]:
en_embed_dict = load_embed('input/en_embed.pkl')
sp_embed_dict = load_embed('input/sp_embed.pkl')

In [8]:
embed_size = 300
en_embed_list = []
for word in en_validDS.vocab._id2word:
    en_embed_list.append(en_embed_dict[word])
en_vocab_size = len(en_embed_list)
    

sp_embed_list = []
for word in sp_trainDS.vocab._id2word:
    sp_embed_list.append(sp_embed_dict[word])
sp_vocab_size = len(sp_embed_list)

In [9]:
"""
Input: English and Spanish embed list
Output: English and Spanish aligned Embedding weight
"""
def align_embeddings(en_embed_list, sp_embed_list, embed_size):
    print('English Vocab Size:{}, Spanish Vocab Size:{}'.format(len(en_embed_list), len(sp_embed_list)))
    dif = abs(len(en_embed_list) - len(sp_embed_list))
    compensate = []
    for i in range(dif):
        compensate.append(np.zeros(embed_size))
    # shorter one aligned to longer one
    if len(en_embed_list) < len(sp_embed_list):
        en_embed_list.extend(compensate)
    else: sp_embed_list.extend(compensate)
    
    if len(en_embed_list) == len(sp_embed_list):
        print('-> Aligned to', len(en_embed_list))
    
    en_weight = nn.Parameter(torch.from_numpy(np.array(en_embed_list)).type(torch.FloatTensor), requires_grad = False)
    sp_weight = nn.Parameter(torch.from_numpy(np.array(sp_embed_list)).type(torch.FloatTensor), requires_grad = False)

    return en_weight, sp_weight

In [10]:
aligned_size = max(en_vocab_size,sp_vocab_size)
en_embedding = nn.Embedding(aligned_size, embed_size)
sp_embedding = nn.Embedding(aligned_size, embed_size)

en_embedding.weight, sp_embedding.weight = align_embeddings(en_embed_list, sp_embed_list, config['model']['embed_size'])

English Vocab Size:2685, Spanish Vocab Size:4101
-> Aligned to 4101


## Model

In [11]:
config['embedding_matrix'] = en_embedding
# model
siamese_en = Siamese_lstm(config)
print(siamese_en)

Siamese_lstm(
  (encoder): LSTMEncoder(
    (embedding): Embedding(4101, 300)
    (lstm): LSTM(300, 150, dropout=0.5)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.1)
    (1): Linear(in_features=600, out_features=100, bias=True)
    (2): Tanh()
    (3): Dropout(p=0.1)
    (4): Linear(in_features=100, out_features=2, bias=True)
  )
)


  "num_layers={}".format(dropout, num_layers))


In [12]:
siamese_en.state_dict()

OrderedDict([('encoder.embedding.weight',
              tensor([[ 0.0545, -0.8301,  0.1130,  ...,  0.3013, -0.2740,  0.7586],
                      [ 0.2216, -0.8387,  0.4687,  ...,  0.8663,  0.3699,  0.2862],
                      [ 0.8881,  0.5238,  0.6115,  ..., -0.7655,  0.5403, -0.6247],
                      ...,
                      [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
                      [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
                      [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])),
             ('encoder.lstm.weight_ih_l0',
              tensor([[ 0.0138, -0.0523, -0.0013,  ..., -0.0419,  0.0260,  0.0701],
                      [-0.0621,  0.0590, -0.0237,  ..., -0.0580,  0.0659,  0.0563],
                      [-0.0009, -0.0701, -0.0804,  ..., -0.0771, -0.0558,  0.0595],
                      ...,
                      [ 0.0055,  0.0710,  0.0561,  ..., -0.0546,  0.0438,  0.0002],
                  

In [16]:
# loaded
siamese_en.state_dict()

OrderedDict([('encoder.embedding.weight',
              tensor([[ 0.0545, -0.8301,  0.1130,  ...,  0.3013, -0.2740,  0.7586],
                      [ 0.2216, -0.8387,  0.4687,  ...,  0.8663,  0.3699,  0.2862],
                      [ 0.8881,  0.5238,  0.6115,  ..., -0.7655,  0.5403, -0.6247],
                      ...,
                      [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
                      [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
                      [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])),
             ('encoder.lstm.weight_ih_l0',
              tensor([[ 0.0361, -0.0769,  0.0482,  ..., -0.0454,  0.0273, -0.0062],
                      [ 0.0088, -0.0274,  0.0335,  ..., -0.0576, -0.0711, -0.0272],
                      [ 0.0027,  0.0797,  0.0173,  ..., -0.0505,  0.0132,  0.0350],
                      ...,
                      [ 0.0568,  0.0336, -0.0606,  ...,  0.0338,  0.0600,  0.0458],
                  

In [14]:
siamese_en.encoder.embedding

Embedding(4101, 300)

In [15]:
# loss func
loss_weights = Variable(torch.FloatTensor([1, 3]))
if torch.cuda.is_available():
    loss_weights = loss_weights.cuda()
criterion = torch.nn.CrossEntropyLoss(loss_weights)

# optimizer
learning_rate = config['training']['learning_rate']
if config['training']['optimizer'] == 'sgd':
    optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, siamese_en.parameters()), lr=learning_rate)
elif config['training']['optimizer'] == 'adam':
    optimizer = torch.optim.Adam(filter(lambda x: x.requires_grad, siamese_en.parameters()), lr=learning_rate)
elif config['training']['optimizer'] == 'adadelta':
    optimizer = torch.optim.Adadelta(filter(lambda x: x.requires_grad, siamese_en.parameters()), lr=learning_rate)
elif config['training']['optimizer'] == 'rmsprop':
    optimizer = torch.optim.RMSprop(filter(lambda x: x.requires_grad, siamese_en.parameters()), lr=learning_rate)
print('Optimizer:', config['training']['optimizer'])
print('Learning rate:', config['training']['learning_rate'])

# log info
train_log_string = '%s :: Epoch %i :: Iter %i / %i :: train loss: %0.4f'
valid_log_string = '%s :: Epoch %i :: valid loss: %0.4f\n'

# Restore saved model (if one exists).
ckpt_path = os.path.join(config['ckpt_dir'], config['experiment_name']+'.pt')
if os.path.exists(ckpt_path):
    print('Loading checkpoint: %s' % ckpt_path)
    ckpt = torch.load(ckpt_path)
    epoch = ckpt['epoch']
    siamese_en.load_state_dict(ckpt['siamese'])
    optimizer.load_state_dict(ckpt['optimizer'])
else:
    epoch = 1
    print('Fresh start!\n')

Optimizer: sgd
Learning rate: 0.01
Loading checkpoint: ckpt/siamese-transfer-baseline.pt


## Train English Model

In [14]:
""" Train """

if config['task'] == 'train':

    # save every epoch for visualization
    train_loss_record = []
    valid_loss_record = []
    best_record = 10.0

    # training
    print('Experiment: {}\n'.format(config['experiment_name']))

    while epoch < config['training']['num_epochs']:

        print('Start Epoch {} Training...'.format(epoch))

        # loss
        train_loss = []
        train_loss_sum = []
        # dataloader
        train_dataloader = DataLoader(dataset=en_trainDS, shuffle=True, num_workers=2, batch_size=1)

        for idx, data in enumerate(train_dataloader, 0):

            # get data
            s1, s2, label = data

            # clear gradients
            optimizer.zero_grad()

            # input
            output = siamese_en(s1, s2)
            output = output.squeeze(0)

            # label cuda
            label = Variable(label)
            if torch.cuda.is_available():
                label = label.cuda()

            # loss backward
            loss = criterion(output, label)
            loss.backward()
            optimizer.step()
            train_loss.append(loss.data.cpu())
            train_loss_sum.append(loss.data.cpu())

            # Every once and a while check on the loss
            if ((idx + 1) % 5000) == 0:
                print(train_log_string % (datetime.now(), epoch, idx + 1, len(en_train), np.mean(train_loss)))
                train_loss = []

        # Record at every epoch
        print('Train Loss at epoch {}: {}\n'.format(epoch, np.mean(train_loss_sum)))
        train_loss_record.append(np.mean(train_loss_sum))

        # Valid
        print('Epoch {} Validating...'.format(epoch))

        # loss
        valid_loss = []
        # dataloader
        valid_dataloader = DataLoader(dataset=en_validDS, shuffle=True, num_workers=2, batch_size=1)

        for idx, data in enumerate(valid_dataloader, 0):
            # get data
            s1, s2, label = data

            # input
            output = siamese_en(s1, s2)
            output = output.squeeze(0)

            # label cuda
            label = Variable(label)
            if torch.cuda.is_available():
                label = label.cuda()

            # loss
            loss = criterion(output, label)
            valid_loss.append(loss.data.cpu())

        print(valid_log_string % (datetime.now(), epoch, np.mean(valid_loss)))
        # Record
        valid_loss_record.append(np.mean(valid_loss))
        epoch += 1

        if np.mean(valid_loss)-np.mean(train_loss_sum) > 0.02:
             print("Early Stopping!")
             break

        # Keep track of best record
        if np.mean(valid_loss) < best_record:
            best_record = np.mean(valid_loss)
            # save the best model
            state_dict = {
                'epoch': epoch,
                'siamese': siamese_en.state_dict(),
                'optimizer': optimizer.state_dict(),
            }
            torch.save(state_dict, ckpt_path)
            print('Model saved!\n')

Experiment: siamese-transfer-baseline

Start Epoch 1 Training...
2018-07-28 15:40:36.333335 :: Epoch 1 :: Iter 5000 / 17029 :: train loss: 0.5641
2018-07-28 15:41:53.099676 :: Epoch 1 :: Iter 10000 / 17029 :: train loss: 0.5486
2018-07-28 15:43:08.376094 :: Epoch 1 :: Iter 15000 / 17029 :: train loss: 0.5131
Train Loss at epoch 1: 0.5361940264701843

Epoch 1 Validating...
2018-07-28 15:44:05.876684 :: Epoch 1 :: valid loss: 0.4803

Model saved!

Start Epoch 2 Training...
2018-07-28 15:45:18.723418 :: Epoch 2 :: Iter 5000 / 17029 :: train loss: 0.4701
2018-07-28 15:46:35.313879 :: Epoch 2 :: Iter 10000 / 17029 :: train loss: 0.4556
2018-07-28 15:47:52.252918 :: Epoch 2 :: Iter 15000 / 17029 :: train loss: 0.4445
Train Loss at epoch 2: 0.45427393913269043

Epoch 2 Validating...
2018-07-28 15:48:47.985039 :: Epoch 2 :: valid loss: 0.4464

Model saved!

Start Epoch 3 Training...
2018-07-28 15:50:01.436991 :: Epoch 3 :: Iter 5000 / 17029 :: train loss: 0.4134
2018-07-28 15:51:14.784408 :: E

## Transfer weights to Train Spanish Model

In [11]:
ckpt_path = 'ckpt/siamese-transfer-baseline.pt'

In [12]:
config['experiment_name'] = config['experiment_name'] + '-Spain'
config['embedding_matrix'] = sp_embedding
siamese_sp = Siamese_lstm(config)

  "num_layers={}".format(dropout, num_layers))


In [21]:
# Restore saved English model 

print('Transfering English Model from: %s' % ckpt_path)
ckpt = torch.load(ckpt_path)
en_epoch = ckpt['epoch']
epoch = 1
siamese_sp.load_state_dict(ckpt['siamese'])

# Another path to save sp model
ckpt_path = os.path.join(config['ckpt_dir'], config['experiment_name']+'.pt')

Transfering English Model from: ckpt/siamese-transfer-baseline.pt


In [13]:
print(siamese_sp)

Siamese_lstm(
  (encoder): LSTMEncoder(
    (embedding): Embedding(4101, 300)
    (lstm): LSTM(300, 150, dropout=0.5)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.1)
    (1): Linear(in_features=600, out_features=100, bias=True)
    (2): Tanh()
    (3): Dropout(p=0.1)
    (4): Linear(in_features=100, out_features=2, bias=True)
  )
)


In [14]:
config['training']['learning_rate'] = 0.001
config['training']['optimizer'] = 'adadelta'

In [16]:
# loss func
loss_weights = Variable(torch.FloatTensor([1, 3]))
if torch.cuda.is_available():
    loss_weights = loss_weights.cuda()
criterion = torch.nn.CrossEntropyLoss(loss_weights)

# optimizer
learning_rate = config['training']['learning_rate']
if config['training']['optimizer'] == 'sgd':
    optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, siamese_sp.parameters()), lr=learning_rate)
elif config['training']['optimizer'] == 'adam':
    optimizer = torch.optim.Adam(filter(lambda x: x.requires_grad, siamese_sp.parameters()), lr=learning_rate)
elif config['training']['optimizer'] == 'adadelta':
    optimizer = torch.optim.Adadelta(filter(lambda x: x.requires_grad, siamese_sp.parameters()), lr=learning_rate)
elif config['training']['optimizer'] == 'rmsprop':
    optimizer = torch.optim.RMSprop(filter(lambda x: x.requires_grad, siamese_sp.parameters()), lr=learning_rate)
print('Optimizer:', config['training']['optimizer'])
print('Learning rate:', config['training']['learning_rate'])


Optimizer: adadelta
Learning rate: 0.001


In [20]:
epoch = 1
# log info
train_log_string = '%s :: Epoch %i :: Iter %i / %i :: train loss: %0.4f'
valid_log_string = '%s :: Epoch %i :: valid loss: %0.4f\n'

In [87]:
train_dataloader = DataLoader(dataset=sp_trainDS, shuffle=True, batch_size=1)

for idx, data in enumerate(train_dataloader, 0):
    s1, s2, label = data
    print(s1,'\n',s2)
    output = siamese_sp(s1, s2)
    output = output.squeeze(0)
    print('out',output)
    
    # clear gradients
    optimizer.zero_grad()
    
    # loss backward
    loss = criterion(output, label)
    print(loss)
    loss.backward()
    optimizer.step()
    break

[tensor([1]), tensor([9]), tensor([1362]), tensor([2])] 
 [tensor([1]), tensor([4]), tensor([5]), tensor([40]), tensor([9]), tensor([21]), tensor([554]), tensor([2])]
out tensor([[ 0.6446, -0.6029]], grad_fn=<SqueezeBackward1>)
tensor(0.2525, grad_fn=<NllLossBackward>)


In [21]:
""" Train """

if config['task'] == 'train':

    # save every epoch for visualization
    train_loss_record = []
    valid_loss_record = []
#     best_record = 10.0

    # training
    print('Experiment: {}\n'.format(config['experiment_name']))

    while epoch < config['training']['num_epochs']:

        print('Start Epoch {} Training...'.format(epoch))

        # loss
        train_loss = []
        train_loss_sum = []
        # dataloader
        train_dataloader = DataLoader(dataset=sp_trainDS, shuffle=True, num_workers=2, batch_size=1)

        for idx, data in enumerate(train_dataloader, 0):

            # get data
            s1, s2, label = data

            # clear gradients
            optimizer.zero_grad()

            # input
            output = siamese_sp(s1, s2)
            output = output.squeeze(0)

            # label cuda
            label = Variable(label)
            if torch.cuda.is_available():
                label = label.cuda()

            # loss backward
            loss = criterion(output, label)
            loss.backward()
            optimizer.step()
            train_loss.append(loss.data.cpu())
            train_loss_sum.append(loss.data.cpu())

            # Every once and a while check on the loss
            if ((idx + 1) % 5000) == 0:
                print(train_log_string % (datetime.now(), epoch, idx + 1, len(sp_train), np.mean(train_loss)))
                train_loss = []

        # Record at every epoch
        print('Train Loss at epoch {}: {}\n'.format(epoch, np.mean(train_loss_sum)))
        train_loss_record.append(np.mean(train_loss_sum))

        # Valid
        print('Epoch {} Validating...'.format(epoch))

        # loss
        valid_loss = []
        # dataloader
        valid_dataloader = DataLoader(dataset=sp_validDS, shuffle=True, num_workers=2, batch_size=1)

        for idx, data in enumerate(valid_dataloader, 0):
            # get data
            s1, s2, label = data

            # input
            output = siamese_sp(s1, s2)
            output = output.squeeze(0)

            # label cuda
            label = Variable(label)
            if torch.cuda.is_available():
                label = label.cuda()

            # loss
            loss = criterion(output, label)
            valid_loss.append(loss.data.cpu())

        print(valid_log_string % (datetime.now(), epoch, np.mean(valid_loss)))
        # Record
        valid_loss_record.append(np.mean(valid_loss))
        epoch += 1

        if np.mean(valid_loss)-np.mean(train_loss_sum) > 0.02:
             print("Early Stopping!")
             break

        # Keep track of best record
        if np.mean(valid_loss) < best_record:
            best_record = np.mean(valid_loss)
            # save the best model
            state_dict = {
                'epoch': epoch,
                'siamese': siamese_sp.state_dict(),
                'optimizer': optimizer.state_dict(),
            }
            torch.save(state_dict, ckpt_path)
            print('Model saved!\n')

Experiment: siamese-transfer-baseline-Spain

Start Epoch 1 Training...
2018-07-28 17:31:06.542327 :: Epoch 1 :: Iter 5000 / 17063 :: train loss: 0.5542
2018-07-28 17:32:40.870212 :: Epoch 1 :: Iter 10000 / 17063 :: train loss: 0.5594
2018-07-28 17:34:16.170620 :: Epoch 1 :: Iter 15000 / 17063 :: train loss: 0.5645
Train Loss at epoch 1: 0.5586052536964417

Epoch 1 Validating...
2018-07-28 17:35:22.929568 :: Epoch 1 :: valid loss: 0.5712



NameError: name 'best_record' is not defined