## Wandb

In [1]:
!wandb login ***

wandb: Appending key for api.wandb.ai to your netrc file: C:\Users\Mubuky\.netrc


## Import packages

In [2]:
import os
import re
import torch
import wandb
import random
import warnings
import collections
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Data
import torch.nn.functional as F

from tqdm.notebook import tqdm
from sklearn.metrics import f1_score

In [3]:
warnings.filterwarnings("ignore")

## Configurations

In [4]:
dtype = torch.FloatTensor
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

config = {
	'seed': 5201314,
	'batch_size': 64,
	'learning_rate': 5e-4,
	'num_workers': 8,
	'save_path': './models/',
#	'output_path': './pred.csv',
	'n_epochs': 10,
    'clip_norm': 0.75,
    'padding_length': 108,
    'num_classes': 4  # -2, -1, 0, 1
}

In [5]:
wandb.init(
    project = "SA",
    name = "LSTM",
    config = config
)

[34m[1mwandb[0m: Currently logged in as: [33mmubuky[0m ([33macsdf[0m). Use [1m`wandb login --relogin`[0m to force relogin


## Read CSV

In [6]:
data_path = './data/'
df_train = pd.read_csv(data_path + 'train.csv')
df_valid = pd.read_csv(data_path + 'valid.csv')
df_testa = pd.read_csv(data_path + 'testa.csv')

## Data Analysis

In [7]:
df_train.head(1)

Unnamed: 0,id,content,location_traffic_convenience,location_distance_from_business_district,location_easy_to_find,service_wait_time,service_waiters_attitude,service_parking_convenience,service_serving_speed,price_level,...,environment_decoration,environment_noise,environment_space,environment_cleaness,dish_portion,dish_taste,dish_look,dish_recommendation,others_overall_experience,others_willing_to_consume_again
0,0,"""吼吼吼，萌死人的棒棒糖，中了大众点评的霸王餐，太可爱了。一直就好奇这个棒棒糖是怎么个东西，...",-2,-2,-2,-2,1,-2,-2,-2,...,-2,-2,-2,-2,-2,-2,1,-2,1,-2


In [8]:
df_train.describe()

Unnamed: 0,id,location_traffic_convenience,location_distance_from_business_district,location_easy_to_find,service_wait_time,service_waiters_attitude,service_parking_convenience,service_serving_speed,price_level,price_cost_effective,...,environment_decoration,environment_noise,environment_space,environment_cleaness,dish_portion,dish_taste,dish_look,dish_recommendation,others_overall_experience,others_willing_to_consume_again
count,105000.0,105000.0,105000.0,105000.0,105000.0,105000.0,105000.0,105000.0,105000.0,105000.0,...,105000.0,105000.0,105000.0,105000.0,105000.0,105000.0,105000.0,105000.0,105000.0,105000.0
mean,52499.5,-1.360267,-1.407095,-1.402276,-1.749895,-0.496495,-1.846952,-1.661457,-0.9758,-1.379238,...,-0.6716,-1.203162,-1.06541,-1.033552,-0.907552,0.389181,-1.275771,-1.484181,0.537771,-0.981248
std,30311.03347,1.210342,1.185697,1.145902,0.739263,1.358622,0.6187,0.866716,1.151532,1.166656,...,1.41387,1.266776,1.282662,1.337833,1.308446,0.781373,1.224178,1.095536,0.740198,1.372485
min,0.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,...,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0
25%,26249.75,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,...,-2.0,-2.0,-2.0,-2.0,-2.0,0.0,-2.0,-2.0,0.0,-2.0
50%,52499.5,-2.0,-2.0,-2.0,-2.0,0.0,-2.0,-2.0,-2.0,-2.0,...,-2.0,-2.0,-2.0,-2.0,-2.0,1.0,-2.0,-2.0,1.0,-2.0
75%,78749.25,-2.0,-2.0,-2.0,-2.0,1.0,-2.0,-2.0,0.0,-2.0,...,1.0,0.0,0.0,1.0,1.0,1.0,-1.0,-2.0,1.0,1.0
max,104999.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [9]:
for column in df_train:
    print(column)
    prt = max(
        sum(df_train[column] == -2), 
        sum(df_train[column] == -1), 
        sum(df_train[column] == 0), 
        sum(df_train[column] == 1)
    )
    print(prt)

id
1
content
0
location_traffic_convenience
81382
location_distance_from_business_district
83680
location_easy_to_find
80605
service_wait_time
92763
service_waiters_attitude
42410
service_parking_convenience
98276
service_serving_speed
88700
price_level
52820
price_cost_effective
80242
price_discount
64243
environment_decoration
53916
environment_noise
73445
environment_space
65398
environment_cleaness
66598
dish_portion
56917
dish_taste
55367
dish_look
75975
dish_recommendation
84767
others_overall_experience
70070
others_willing_to_consume_again
65600


In [10]:
df_train['service_waiters_attitude'].value_counts()

service_waiters_attitude
-2    42410
 1    41372
 0    12534
-1     8684
Name: count, dtype: int64

In [11]:
df_train = df_train.loc[ : , ['content', 'service_waiters_attitude']]
df_valid = df_valid.loc[ : , ['content', 'service_waiters_attitude']]
df_testa = df_testa.loc[ : , ['content', 'service_waiters_attitude']]

In [12]:
df_train.head(3)

Unnamed: 0,content,service_waiters_attitude
0,"""吼吼吼，萌死人的棒棒糖，中了大众点评的霸王餐，太可爱了。一直就好奇这个棒棒糖是怎么个东西，...",1
1,"""第三次参加大众点评网霸王餐的活动。这家店给人整体感觉一般。首先环境只能算中等，其次霸王餐提...",-2
2,"""4人同行 点了10个小吃\n榴莲酥 榴莲味道不足 松软 奶味浓\n虾饺 好吃 两颗大虾仁\...",0


## Pretreatment

### 繁体转简体

In [13]:
from opencc import OpenCC

cc = OpenCC('t2s')

In [14]:
df_train['content'] = df_train['content'].apply(lambda x:cc.convert(x))
df_valid['content'] = df_valid['content'].apply(lambda x:cc.convert(x))
df_testa['content'] = df_testa['content'].apply(lambda x:cc.convert(x))

### 正则

In [15]:
def regular_sentence(content):
    decimal_regex = re.compile(r"[^a-zA-Z]\d+")

    content = content.replace("\r\n", " ").replace("\n", " ")
    content = decimal_regex.sub(r"", content)

    #return "".join(re.findall('[\u4e00-\u9fa5]+', content, re.S))
    return content

In [16]:
df_train['content'] = df_train['content'].apply(lambda x:regular_sentence(x))
df_valid['content'] = df_valid['content'].apply(lambda x:regular_sentence(x))
df_testa['content'] = df_testa['content'].apply(lambda x:regular_sentence(x))

In [17]:
df_train.head(3)

Unnamed: 0,content,service_waiters_attitude
0,"""吼吼吼，萌死人的棒棒糖，中了大众点评的霸王餐，太可爱了。一直就好奇这个棒棒糖是怎么个东西，...",1
1,"""第三次参加大众点评网霸王餐的活动。这家店给人整体感觉一般。首先环境只能算中等，其次霸王餐提...",-2
2,人同行 点个小吃 榴莲酥 榴莲味道不足 松软 奶味浓 虾饺 好吃 两颗大虾仁 皮蛋粥 皮蛋多...,0


### 分词

In [18]:
import jieba

#filePath = 'D:\\Jupyter Notebook\\sa\\jieba_dict\\'
#filelist = os.listdir(filePath)
#for i in range(len(filelist)):
#    jieba.load_userdict(filePath + filelist[i])
#jieba.load_userdict("add_words_ch.txt")

def separate(content):
    return " ".join(jieba.cut(content))

In [19]:
df_train['sep_content'] = df_train['content'].apply(lambda x:separate(x))
df_valid['sep_content'] = df_valid['content'].apply(lambda x:separate(x))
df_testa['sep_content'] = df_testa['content'].apply(lambda x:separate(x))

Building prefix dict from the default dictionary ...
Loading model from cache C:\Users\Mubuky\AppData\Local\Temp\jieba.cache
Loading model cost 0.492 seconds.
Prefix dict has been built successfully.


In [20]:
df_train.head(3)

Unnamed: 0,content,service_waiters_attitude,sep_content
0,"""吼吼吼，萌死人的棒棒糖，中了大众点评的霸王餐，太可爱了。一直就好奇这个棒棒糖是怎么个东西，...",1,""" 吼吼 吼 ， 萌死 人 的 棒棒糖 ， 中 了 大众 点评 的 霸王餐 ， 太 可爱 了..."
1,"""第三次参加大众点评网霸王餐的活动。这家店给人整体感觉一般。首先环境只能算中等，其次霸王餐提...",-2,""" 第三次 参加 大众 点评 网 霸王餐 的 活动 。 这家 店 给 人 整体 感觉 一般 ..."
2,人同行 点个小吃 榴莲酥 榴莲味道不足 松软 奶味浓 虾饺 好吃 两颗大虾仁 皮蛋粥 皮蛋多...,0,人 同行 点个 小吃 榴莲 酥 榴莲 味道 不足 松软 奶味 浓 虾...


### 删去单字

In [21]:
def excludeunique(sentence):
    segs = [word for word in sentence.split() if len(word) > 1]
    return " ".join(segs)

In [22]:
df_train['sep_content'] = df_train['sep_content'].apply(lambda x:excludeunique(x))
df_valid['sep_content'] = df_valid['sep_content'].apply(lambda x:excludeunique(x))
df_testa['sep_content'] = df_testa['sep_content'].apply(lambda x:excludeunique(x))

In [23]:
df_train.head(3)

Unnamed: 0,content,service_waiters_attitude,sep_content
0,"""吼吼吼，萌死人的棒棒糖，中了大众点评的霸王餐，太可爱了。一直就好奇这个棒棒糖是怎么个东西，...",1,吼吼 萌死 棒棒糖 大众 点评 霸王餐 可爱 一直 好奇 这个 棒棒糖 怎么 东西 大众 点...
1,"""第三次参加大众点评网霸王餐的活动。这家店给人整体感觉一般。首先环境只能算中等，其次霸王餐提...",-2,第三次 参加 大众 点评 霸王餐 活动 这家 整体 感觉 一般 首先 环境 只能 中等 其次...
2,人同行 点个小吃 榴莲酥 榴莲味道不足 松软 奶味浓 虾饺 好吃 两颗大虾仁 皮蛋粥 皮蛋多...,0,同行 点个 小吃 榴莲 榴莲 味道 不足 松软 奶味 虾饺 好吃 两颗 虾仁 皮蛋 皮蛋 但...


### 去除停用词

In [24]:
def excludestops(sentence, stop_words):
    segs = [word for word in sentence.split() if word not in stop_words]
    return " ".join(segs)

In [25]:
stop_words = set()
with open('stopwords.txt', encoding='utf-8') as f:
    con = f.readlines()
    for i in con:
        i = i.replace("\r\n", "").replace("\n", "")
        stop_words.add(i)

In [26]:
len(stop_words)

789

In [27]:
df_train['sep_content'] = df_train['sep_content'].apply(lambda x:excludestops(x, stop_words))
df_valid['sep_content'] = df_valid['sep_content'].apply(lambda x:excludestops(x, stop_words))
df_testa['sep_content'] = df_testa['sep_content'].apply(lambda x:excludestops(x, stop_words))

In [28]:
df_train.head(3)

Unnamed: 0,content,service_waiters_attitude,sep_content
0,"""吼吼吼，萌死人的棒棒糖，中了大众点评的霸王餐，太可爱了。一直就好奇这个棒棒糖是怎么个东西，...",1,萌死 棒棒糖 可爱 好奇 棒棒糖 东西 土老冒 见识 机会 介绍 棒棒糖 德国 不会 很甜 ...
1,"""第三次参加大众点评网霸王餐的活动。这家店给人整体感觉一般。首先环境只能算中等，其次霸王餐提...",-2,参加 活动 这家 整体 感觉 环境 只能 中等 提供 菜品 不是 很多 当然 商家 避免 参...
2,人同行 点个小吃 榴莲酥 榴莲味道不足 松软 奶味浓 虾饺 好吃 两颗大虾仁 皮蛋粥 皮蛋多...,0,同行 点个 小吃 榴莲 榴莲 味道 不足 松软 奶味 虾饺 好吃 两颗 虾仁 皮蛋 皮蛋 奶...


### 删去词频较低的词

In [29]:
sentences_train = list(df_train['sep_content'])
sentences_valid = list(df_valid['sep_content'])
sentences_testa = list(df_testa['sep_content'])
sentences = sentences_train + sentences_valid + sentences_testa
word_list = " ".join(sentences).split()

In [30]:
counts = collections.Counter(word_list)

expand_stop_words = set()
for key in counts:
    if counts[key] <= 2:
        expand_stop_words.add(key)

In [31]:
len(expand_stop_words)

155313

In [32]:
print(counts.most_common(100))

[('味道', 159960), ('不错', 156026), ('感觉', 118353), ('好吃', 107818), ('没有', 96754), ('喜欢', 70502), ('服务员', 69611), ('环境', 61992), ('这家', 60543), ('里面', 59997), ('服务', 58274), ('不是', 53692), ('特别', 52421), ('觉得', 50659), ('口味', 47891), ('推荐', 46540), ('很多', 46263), ('菜品', 40383), ('新鲜', 37286), ('价格', 35879), ('口感', 35609), ('牛肉', 35431), ('朋友', 34881), ('位置', 31320), ('最后', 30369), ('团购', 29397), ('他家', 29045), ('知道', 28272), ('看到', 26308), ('起来', 26276), ('干净', 25690), ('装修', 25630), ('餐厅', 25300), ('老板', 25295), ('个人', 24601), ('不会', 24074), ('店里', 23698), ('这次', 23295), ('总体', 22816), ('海鲜', 22753), ('特色', 22638), ('地方', 22329), ('其实', 21561), ('东西', 21429), ('可能', 21333), ('来说', 20396), ('芝士', 20232), ('火锅', 19457), ('套餐', 19311), ('大家', 19147), ('方便', 19062), ('之前', 19028), ('很大', 18638), ('便宜', 18545), ('选择', 18234), ('入味', 18131), ('现在', 18109), ('附近', 17984), ('出来', 17526), ('热情', 17436), ('活动', 17377), ('以后', 17272), ('豆腐', 17017), ('超级', 16943), ('店家', 16887), ('适合', 16782), ('饮料

In [33]:
df_train['sep_content'] = df_train['sep_content'].apply(lambda x:excludestops(x, expand_stop_words))
df_valid['sep_content'] = df_valid['sep_content'].apply(lambda x:excludestops(x, expand_stop_words))
df_testa['sep_content'] = df_testa['sep_content'].apply(lambda x:excludestops(x, expand_stop_words))

In [34]:
df_train.head(3)

Unnamed: 0,content,service_waiters_attitude,sep_content
0,"""吼吼吼，萌死人的棒棒糖，中了大众点评的霸王餐，太可爱了。一直就好奇这个棒棒糖是怎么个东西，...",1,萌死 棒棒糖 可爱 好奇 棒棒糖 东西 土老冒 见识 机会 介绍 棒棒糖 德国 不会 很甜 ...
1,"""第三次参加大众点评网霸王餐的活动。这家店给人整体感觉一般。首先环境只能算中等，其次霸王餐提...",-2,参加 活动 这家 整体 感觉 环境 只能 中等 提供 菜品 不是 很多 当然 商家 避免 参...
2,人同行 点个小吃 榴莲酥 榴莲味道不足 松软 奶味浓 虾饺 好吃 两颗大虾仁 皮蛋粥 皮蛋多...,0,同行 点个 小吃 榴莲 榴莲 味道 不足 松软 奶味 虾饺 好吃 两颗 虾仁 皮蛋 皮蛋 奶...


### Padding

In [35]:
sentences_train = list(df_train['sep_content'])
sentences_valid = list(df_valid['sep_content'])
sentences_testa = list(df_testa['sep_content'])
sentences = sentences_train + sentences_valid + sentences_testa
word_list = " ".join(sentences).split()
lenlist = [len(sentence.split()) for sentence in sentences]
print('max:{}\nmin:{}\nmean:{}'.format(np.max(lenlist), np.min(lenlist), np.mean(lenlist)))

cnt = collections.Counter(lenlist)
sum_sentences, sum_overlength = 0, 0
for key in cnt:
    if key > config['padding_length']:
        sum_overlength += cnt[key]
    sum_sentences += cnt[key]
print('Overlength: {} / {}'.format(sum_overlength, sum_sentences))
print(sum_sentences)

max:829
min:2
mean:84.99480740740741
Overlength: 25129 / 135000
135000


In [36]:
def padding(sentence):
    PAD = ' <PAD>'
    pad_size = config['padding_length']
    senlist = sentence.split()
    sentence_len = len(senlist)
    if sentence_len < pad_size:
        sentence += PAD * (pad_size - sentence_len)
    else:
        sentence = " ".join(senlist[-pad_size:])
    return sentence

In [37]:
df_train['sep_content'] = df_train['sep_content'].apply(lambda x:padding(x))
df_valid['sep_content'] = df_valid['sep_content'].apply(lambda x:padding(x))
df_testa['sep_content'] = df_testa['sep_content'].apply(lambda x:padding(x))

## Building Vocab

In [38]:
sentences_train = list(df_train['sep_content'])
sentences_valid = list(df_valid['sep_content'])
sentences_testa = list(df_testa['sep_content'])
sentences = sentences_train + sentences_valid + sentences_testa
word_list = " ".join(sentences).split()

In [39]:
vocab = list(set(word_list))
word2idx = {w: i for i, w in enumerate(vocab)}
vocab_size = len(vocab)

In [40]:
vocab_size

90822

## Set Seeds

In [41]:
def same_seed(seed):
    torch.use_deterministic_algorithms(True)
    torch.backends.cudnn.enabled = False
    torch.backends.cudnn.benchmark = False
    
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'

same_seed(config['seed'])

#def seed_worker(worker_id):
#    worker_seed = torch.initial_seed() % 2**32
#    numpy.random.seed(worker_seed)
#    random.seed(worker_seed)

#g = torch.Generator()
#g.manual_seed(config['seed'])

## Data

In [42]:
labels_train = list(df_train['service_waiters_attitude'])
labels_train = [i + 2 for i in labels_train]
labels_valid = list(df_valid['service_waiters_attitude'])
labels_valid = [i + 2 for i in labels_valid]

In [43]:
def make_data(sentences, labels):
    inputs = []
    for sen in sentences:
        inputs.append([word2idx[n] for n in sen.split()])

    targets = []
    for out in labels:
        targets.append(out) # To using Torch Softmax Loss function
    return torch.LongTensor(inputs), torch.LongTensor(targets)

In [44]:
input_train, target_train = make_data(sentences_train, labels_train)
input_valid, target_valid = make_data(sentences_valid, labels_valid)

In [45]:
train_dataset = Data.TensorDataset(torch.tensor(input_train), torch.tensor(target_train))
valid_dataset = Data.TensorDataset(torch.tensor(input_valid), torch.tensor(target_valid))

In [46]:
train_loader = Data.DataLoader(
    dataset=train_dataset,
    batch_size=config['batch_size'],
    shuffle=True,
    num_workers=config['num_workers'],
    drop_last=True,
#    worker_init_fn=seed_worker,
#    generator=g,
)
valid_loader = Data.DataLoader(
    dataset=valid_dataset,
    batch_size=config['batch_size'],
    shuffle=True,
    num_workers=config['num_workers'],
    drop_last=True,
#    worker_init_fn=seed_worker,
#    generator=g,
)

## Model

In [47]:
class SALSTM(nn.Module):
    def __init__(self):
        super(SALSTM, self).__init__()
        self.n_layers = 2
        self.hidden_dim = 512
        self.embedding_dim = 512
        self.dropout = 0.5
        self.n_vocab = vocab_size
        self.num_classes = config['num_classes']

        self.embedding = nn.Embedding(self.n_vocab, self.embedding_dim, padding_idx = word2idx['<PAD>'])

        self.lstm = nn.LSTM(
            self.embedding_dim,
            self.hidden_dim,
            self.n_layers,
            dropout = self.dropout, 
            batch_first = True,
        )

        self.fc = nn.Linear(
            in_features = self.hidden_dim,
            out_features = self.num_classes
        ) 
        self.dropout = nn.Dropout(self.dropout)
        
    def forward(self, x, hidden):
        # x.size() = (batch_size, padding_size)
        # h.size() = c.size() = (n_layers, batch_size, hidden_dim)
        
        out = self.embedding(x) # out.size() = (batch_size, padding_size, embedding_dim)
        out, hidden = self.lstm(out, hidden) # out.size() = (batch_size, padding_size, hidden_dim)
        out = out[:, -1, :].squeeze(1)
        out = self.dropout(out)
        out = self.fc(out) # out.size() = (batch_size, num_classes)
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        hidden = (
            torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device),
            torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)
        )
        return hidden


## Training

In [48]:
def get_f1_score(labels, pred):
    return f1_score(labels, pred, labels=[0, 1, 2, 3], average='macro')

In [49]:
def trainer(train_loader, valid_loader, model, config, device):
    #criterion = AMSoftmax()
    criterion = nn.CrossEntropyLoss()#.to(device)
    optimizer = optim.AdamW(model.parameters(), lr=config['learning_rate'])
#	scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
#		optimizer,
#		eta_min=config['learning_rate']/50.0,
#		T_0=config['n_epochs']
#	)

    if not os.path.isdir(config['save_path']):
        os.mkdir(config['save_path'])
    
    best_f1 = 0.0
    n_epochs = config['n_epochs']
    
    for epoch in range(n_epochs):

        # train
        model.train()
        
        acc_record, loss_record, record_count = 0.0, 0.0, 0
        prediction = []
        groundtruth = []
        train_pbar = tqdm(train_loader, position=0, leave=True)
        train_pbar.set_description(f'Epoch [{epoch+1}/{n_epochs}]')
        
        for data, labels in train_pbar:

            optimizer.zero_grad()
            data = data.to(device)
            labels = labels.to(device)

            hidden = model.init_hidden(config['batch_size'])
            pred, hidden = model(data, hidden)
            
            loss = criterion(pred, labels)
            pred_flate = pred.argmax(dim = 1)
            acc = torch.mean((pred_flate == labels).float())
            f1 = get_f1_score(labels.tolist(), pred_flate.tolist())
            prediction += pred_flate.tolist()
            groundtruth += labels.tolist()
            
            loss.backward()
            gnorm = nn.utils.clip_grad_norm_(model.parameters(), config['clip_norm'])
            
            optimizer.step()
            
            record_count += 1
            loss_record += loss.item()
            acc_record += acc.item()
            train_pbar.set_postfix({'loss': loss.item(), 'acc': acc.item(), 'f1': f1})
            wandb.log({"train/acc": acc, "train/loss": loss, "train/f1": f1, "train/grad_norm": gnorm})
            
        train_acc = acc_record / record_count
        train_loss = loss_record / record_count
        train_f1 = get_f1_score(groundtruth, prediction)
        
        print('TRAIN: epoch:{}, loss:{:.3f}, acc:{:.3f}, f1_score:{:.3f}'.format(epoch + 1, train_loss, train_acc, train_f1))

        # valid
        model.eval()
        
        acc_record, loss_record, record_count = 0.0, 0.0, 0
        prediction = []
        groundtruth = []
        valid_pbar = tqdm(valid_loader, position=0, leave=True)
        valid_pbar.set_description(f'Epoch [{epoch+1}/{n_epochs}]')
        for data, labels in valid_pbar:

            data = data.to(device)
            labels = labels.to(device)

            with torch.no_grad():
                hidden = model.init_hidden(config['batch_size'])
                pred, hidden = model(data, hidden)
                loss = criterion(pred, labels)
                pred_flate = pred.argmax(dim=1)
                acc = torch.mean((pred_flate == labels).float())
                f1 = get_f1_score(labels.tolist(), pred_flate.tolist())
                prediction += pred_flate.tolist()
                groundtruth += labels.tolist()

            record_count += 1
            loss_record += loss.item()
            acc_record += acc.item()
            valid_pbar.set_postfix({'loss': loss.item(), 'acc': acc.item(), 'f1': f1})
        
        valid_acc = acc_record / record_count
        valid_loss = loss_record / record_count
        valid_f1 = get_f1_score(groundtruth, prediction)

        #scheduler.step()

        print('VALID: epoch:{}, loss:{:.3f}, acc:{:.3f}, f1_score:{:.3f}'.format(epoch + 1, valid_loss, valid_acc, valid_f1))
        
        if valid_f1 > best_f1:
            best_f1 = valid_f1
            torch.save(model.state_dict(), config['save_path'] + 'model.ckpt')
            print('Saving model with f1 {:.5f}'.format(best_f1))


In [50]:
model = SALSTM().to(device)
trainer(train_loader, valid_loader, model, config, device)
wandb.finish()

  0%|          | 0/1640 [00:00<?, ?it/s]

TRAIN: epoch:1, loss:0.818, acc:0.679, f1_score:0.486


  0%|          | 0/234 [00:00<?, ?it/s]

VALID: epoch:1, loss:0.594, acc:0.779, f1_score:0.650
Saving model with f1 0.64972


  0%|          | 0/1640 [00:00<?, ?it/s]

TRAIN: epoch:2, loss:0.551, acc:0.798, f1_score:0.687


  0%|          | 0/234 [00:00<?, ?it/s]

VALID: epoch:2, loss:0.542, acc:0.792, f1_score:0.678
Saving model with f1 0.67777


  0%|          | 0/1640 [00:00<?, ?it/s]

TRAIN: epoch:3, loss:0.451, acc:0.837, f1_score:0.752


  0%|          | 0/234 [00:00<?, ?it/s]

VALID: epoch:3, loss:0.573, acc:0.792, f1_score:0.690
Saving model with f1 0.69022


  0%|          | 0/1640 [00:00<?, ?it/s]

TRAIN: epoch:4, loss:0.349, acc:0.879, f1_score:0.815


  0%|          | 0/234 [00:00<?, ?it/s]

VALID: epoch:4, loss:0.631, acc:0.786, f1_score:0.689


  0%|          | 0/1640 [00:00<?, ?it/s]

TRAIN: epoch:5, loss:0.247, acc:0.919, f1_score:0.874


  0%|          | 0/234 [00:00<?, ?it/s]

VALID: epoch:5, loss:0.721, acc:0.784, f1_score:0.685


  0%|          | 0/1640 [00:00<?, ?it/s]

TRAIN: epoch:6, loss:0.181, acc:0.945, f1_score:0.913


  0%|          | 0/234 [00:00<?, ?it/s]

VALID: epoch:6, loss:0.768, acc:0.780, f1_score:0.681


  0%|          | 0/1640 [00:00<?, ?it/s]

TRAIN: epoch:7, loss:0.135, acc:0.961, f1_score:0.938


  0%|          | 0/234 [00:00<?, ?it/s]

VALID: epoch:7, loss:0.973, acc:0.770, f1_score:0.673


  0%|          | 0/1640 [00:00<?, ?it/s]

TRAIN: epoch:8, loss:0.105, acc:0.972, f1_score:0.956


  0%|          | 0/234 [00:00<?, ?it/s]

VALID: epoch:8, loss:1.029, acc:0.772, f1_score:0.676


  0%|          | 0/1640 [00:00<?, ?it/s]

TRAIN: epoch:9, loss:0.084, acc:0.978, f1_score:0.965


  0%|          | 0/234 [00:00<?, ?it/s]

VALID: epoch:9, loss:1.082, acc:0.773, f1_score:0.665


  0%|          | 0/1640 [00:00<?, ?it/s]

TRAIN: epoch:10, loss:0.068, acc:0.982, f1_score:0.973


  0%|          | 0/234 [00:00<?, ?it/s]

VALID: epoch:10, loss:1.154, acc:0.774, f1_score:0.675


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train/acc,▁▅▇▄▆▆▆▅▇▆▇▆▇▇▇▇▇▇▇▇▇███████████████████
train/f1,▁▃▅▃▅▅▅▅▆▄▅▆▇▆▆▅▇▇▇▅▇█▇█▇███████████████
train/grad_norm,▂▅▂▄▂▂▂▄▃▄▂▄▂▄▃▃█▄▂▅▄▃▄▃▃▅▁▄▂▂▆▃▄▇▆▂▁▃▃▁
train/loss,█▅▃▅▄▄▃▅▃▃▃▄▂▃▂▃▂▂▂▂▂▁▂▁▂▁▁▁▁▂▂▁▁▁▁▁▁▁▁▁

0,1
train/acc,0.95312
train/f1,0.91012
train/grad_norm,2.01077
train/loss,0.21086
