# Import libraries and classes, set parameters

In [1]:
import re
from itertools import product

from tqdm.auto import tqdm

import pandas as pd
import numpy as np

import torch
from torch import nn
from torch.utils.data import DataLoader

from sklearn.preprocessing import MinMaxScaler

import transformers

from models.TweetDataset import TweetDataset
from models.Wd_Xlm_T import Wd_Xlm_T

In [2]:
DATA_PATH = "./data/"
CHECKPOINT_DIR = "./checkpoints/"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

print(DEVICE)

cuda


In [3]:
orig_features = [
    'text_tokens',    ###############
    'hashtags',       #Tweet Features
    'tweet_id',       #
    'media',          #
    'links',          #
    'domains',        #
    'tweet_type',     #
    'language',       #
    'timestamp',      ###############
    'engaged_with_user_id',              ###########################
    'engaged_with_user_follower_count',  #Engaged With User Features
    'engaged_with_user_following_count', #
    'engaged_with_user_is_verified',     #
    'engaged_with_user_account_creation', ###########################
    'engaging_user_id',                  #######################
    'engaging_user_follower_count',      #Engaging User Features
    'engaging_user_following_count',     #
    'engaging_user_is_verified',         #
    'engaging_user_account_creation',    #######################
    'engagee_follows_engager',    #################### Engagement Features
    'reply',          #Target Reply
    'retweet',        #Target Retweet    
    'retweet_comment',#Target Retweet with comment
    'like',           #Target Like
                      ####################
]

target_features = orig_features[-4:]
numerical_features = ['engaged_with_user_follower_count', 'engaged_with_user_following_count', 
                      'engaging_user_follower_count', 'engaging_user_following_count', 'url_cnt']
categorical_features = ['language', 'engaged_with_user_id', 'engaging_user_id']


cat_target_prod = product(categorical_features, target_features)
features = []
for (cat, target) in cat_target_prod:
    features.append(cat+"_"+target+"_TE")

m = 20
MAX_LEN = 100

# Load and preprocess data

In [4]:
orig_df = pd.read_csv(DATA_PATH+"dataset_filtered_small.csv")
orig_df.sort_values(by=['timestamp'], inplace=True)
orig_df.drop("Unnamed: 0", axis=1, inplace=True)
orig_df.head()

Unnamed: 0,text_tokens,hashtags,tweet_id,media,links,domains,tweet_type,language,timestamp,engaged_with_user_id,...,engaging_user_id,engaging_user_follower_count,engaging_user_following_count,engaging_user_is_verified,engaging_user_account_creation,engagee_follows_engager,reply,retweet,retweet_comment,like
1120596,101\t12148\t11675\t14707\t117\t17924\t16266\t2...,E8EC1049F02FE3900B1E45D1BDD52BEF\tF0F29CEE3668...,20C5F3A6F47B4E7A85A1443CC3D12B0C,Video,,,TopLevel,B0FA488F2911701DD8EC5B1EA5E322D8,1612396800,E400C001A195BD92CB74AA0B2E2BB522,...,7327C6D707CE1C0F8DC376B8F6AA6B3F,49,237,False,1590030909,False,,,,
6744778,101\t14120\t131\t120\t120\t188\t119\t11170\t12...,,ABC98352C6238B3129A1772122532156,GIF,,,TopLevel,313ECD3A1E5BB07406E4249475C2D6D6,1612396800,80C3FD8645A74F589C103A1F9A3C40E5,...,75CC5A829BFDBECDFCDD0868EFAC04FF,79,163,False,1404873502,False,,,,1612414000.0
1536740,101\t14120\t131\t120\t120\t188\t119\t11170\t12...,,120E4BA71617DDFA622E0263783D04B2,Photo\tPhoto,,,TopLevel,313ECD3A1E5BB07406E4249475C2D6D6,1612396800,3413E19B696E35FC1B6567C260ED2E0B,...,85351873930358043D54DEC159B30E59,252,728,False,1470684663,False,,1612397000.0,,
5782609,101\t14200\t10182\t24248\t10125\t45411\t119\t1...,,AC3770889BCE6879E01ACC3D675CCD5C,Photo,,,TopLevel,B0FA488F2911701DD8EC5B1EA5E322D8,1612396800,C530D3D968FBD0D08537C5EDEEDEE542,...,943B164BB3B9EE48C8F1487BCF731994,138,1108,False,1260577260,False,,,,1612399000.0
2289432,101\t20452\t10142\t14908\t10841\t17565\t80677\...,,AC97A848CC9CE41F42D566B2C5EAAB45,Video,,,TopLevel,488B32D24BD4BB44172EB981C1BCA6FA,1612396800,E7A2CEC020385D583CA1F15C53E671DD,...,50E8C399099EEAB5363E786D020917A7,101,294,False,1532466558,False,,,,1612465000.0


#### Get text and count urls

In [8]:
bert_multilingual_tokenizer = transformers.BertTokenizer.from_pretrained("bert-base-multilingual-cased")

In [6]:
def transform_tokens_count_urls(text_tokens, bert_tokenizer, xlm_t_tokenizer):
    tweet_text = bert_tokenizer.decode([int(s) for s in text_tokens.split('\t')])
    
    link_cnt = 0
    for url in re.finditer("(https : \/(\s(\/\s)?\w+(\.)?)+)", tweet_text):
        link_cnt += 1
        
    #new_ids = xlm_t_tokenizer.convert_tokens_to_ids(xlm_t_tokenizer.tokenize(tweet_text))
    
    return (tweet_text, link_cnt)
    
# transform_tokens_count_urls(orig_df['text_tokens'][3], bert_multilingual_tokenizer, xlm_t_tokenizer)

In [7]:
text_tokens_url_cnts = orig_df['text_tokens'].map(
    lambda x: transform_tokens_count_urls(x, bert_multilingual_tokenizer, xlm_t_tokenizer))

#### Add extracted text and url count to dataframe

In [8]:
text, counts = zip(*text_tokens_url_cnts)
orig_df['url_cnt'] = counts
orig_df['text'] = text
orig_df.drop(['text_tokens'], axis=1, inplace=True)

del(text_tokens_url_cnts)
del(text)
del(counts)
orig_df.head()

Unnamed: 0,hashtags,tweet_id,media,links,domains,tweet_type,language,timestamp,engaged_with_user_id,engaged_with_user_follower_count,...,engaging_user_following_count,engaging_user_is_verified,engaging_user_account_creation,engagee_follows_engager,reply,retweet,retweet_comment,like,url_cnt,text
1120596,E8EC1049F02FE3900B1E45D1BDD52BEF\tF0F29CEE3668...,20C5F3A6F47B4E7A85A1443CC3D12B0C,Video,,,TopLevel,B0FA488F2911701DD8EC5B1EA5E322D8,1612396800,E400C001A195BD92CB74AA0B2E2BB522,6460161,...,237,False,1590030909,False,,,,,1,"[CLS] Una vez dentro, sólo habrá una salida. ¶..."
6744778,,ABC98352C6238B3129A1772122532156,GIF,,,TopLevel,313ECD3A1E5BB07406E4249475C2D6D6,1612396800,80C3FD8645A74F589C103A1F9A3C40E5,53204,...,163,False,1404873502,False,,,,1612414000.0,1,[CLS] https : / / t. co / kJysqCq6UR [SEP]
1536740,,120E4BA71617DDFA622E0263783D04B2,Photo\tPhoto,,,TopLevel,313ECD3A1E5BB07406E4249475C2D6D6,1612396800,3413E19B696E35FC1B6567C260ED2E0B,96565,...,728,False,1470684663,False,,1612397000.0,,,1,[CLS] https : / / t. co / r9ltA3k5k9 [SEP]
5782609,,AC3770889BCE6879E01ACC3D675CCD5C,Photo,,,TopLevel,B0FA488F2911701DD8EC5B1EA5E322D8,1612396800,C530D3D968FBD0D08537C5EDEEDEE542,6569,...,1108,False,1260577260,False,,,,1612399000.0,1,[CLS] Como los trata el calor.... a mi asi! ht...
2289432,,AC97A848CC9CE41F42D566B2C5EAAB45,Video,,,TopLevel,488B32D24BD4BB44172EB981C1BCA6FA,1612396800,E7A2CEC020385D583CA1F15C53E671DD,3600646,...,294,False,1532466558,False,,,,1612465000.0,1,[CLS] Never forget when Pop Smoke dropped two ...


In [9]:
#based on https://maxhalford.github.io/blog/target-encoding/
###TODO move this
class TargetEncoder():
    
    def __init__(self, cols, w):
        
        if isinstance(cols, str):
            self.cols = [cols]
        else: self.cols = cols

        self.w = w
        
    def fit(self, X, y):
        #calculate target overall mean
        if y not in X.columns:
             raise ValueError('Column: {} not in dataframe'.format(y))
        
        self.target = y
        self.target_mean= X[self.target].mean()
        
        #store mapping for each column w.r.t target column
        self.maps = {}
        for col in self.cols:
            
            if col not in X:
                self.maps = {} # valid state if we decide to transform nothing happens
                raise ValueError('Column: {} not in dataframe'.format(col))

            aggr = X.groupby(col)[self.target].agg(['count', 'mean'])
            counts = aggr['count']
            means = aggr['mean']

            self.maps[col] = (counts * means + self.w * self.target_mean) / (counts + self.w)

        return self

    
    def transform(self, X, y=None):
        X_new = X.copy()
        for col, smooth in self.maps.items():
            new_col_name = col + '_' + self.target + "_TE"
            X_new[new_col_name] = X_new[col].map(smooth)
            X_new[new_col_name].fillna(self.target_mean, inplace=True)
        return X_new
    
    def fit_transform(self, X, y):
        return self.fit(X, y).transform(X, y)

In [10]:
df_test = pd.DataFrame({
    'x_0': ['a'] * 5 + ['b'] * 5,
    'x_1': ['c'] * 9 + ['d'] * 1,
    'y': [1, 1, 1, 1, 0, 1, 0, 0, 0, 0]
})

te = TargetEncoder(['x_0','x_1'], 10)
df_test

Unnamed: 0,x_0,x_1,y
0,a,c,1
1,a,c,1
2,a,c,1
3,a,c,1
4,a,c,0
5,b,c,1
6,b,c,0
7,b,c,0
8,b,c,0
9,b,d,0


In [11]:
new_df = te.fit_transform(df_test, 'y')
new_df

Unnamed: 0,x_0,x_1,y,x_0_y_TE,x_1_y_TE
0,a,c,1,0.6,0.526316
1,a,c,1,0.6,0.526316
2,a,c,1,0.6,0.526316
3,a,c,1,0.6,0.526316
4,a,c,0,0.6,0.526316
5,b,c,1,0.4,0.526316
6,b,c,0,0.4,0.526316
7,b,c,0,0.4,0.526316
8,b,c,0,0.4,0.526316
9,b,d,0,0.4,0.454545


In [12]:
### Next steps:
# 1.split - 60/20/20? -- done
# 2.apply targetencoding where needed in train
# 3.transform text to correct ids for XLM-T - already have tokenizers -- just text!
# 4.create dataset object
# 5.finish the wide & deep model architecture
# 6.create trainer object & train 
# 7.use validate to monitor loss on every ??? steps(use tensorboard??)
# 8.add evaluation metrics as in contest, eval on test set

### TO think about:
# - create dataset only for some users, can't train on all, too big, 
#   this way we have all the information in table for specific users
# - use hash func for user-ids?
# - reduce size of embeddings from MLM
# - 

#### Drop columns which won't be used

In [13]:
orig_df.drop(['hashtags', 'tweet_id', 'media', 'links',
              'domains', 'tweet_type', 'timestamp',
              'engaging_user_account_creation' ,'engaged_with_user_account_creation'], axis=1, inplace=True)
print(orig_df.columns)

Index(['language', 'engaged_with_user_id', 'engaged_with_user_follower_count',
       'engaged_with_user_following_count', 'engaged_with_user_is_verified',
       'engaging_user_id', 'engaging_user_follower_count',
       'engaging_user_following_count', 'engaging_user_is_verified',
       'engagee_follows_engager', 'reply', 'retweet', 'retweet_comment',
       'like', 'url_cnt', 'text'],
      dtype='object')


#### Change targets to be binary $\in$ {0,1} instead of timestamps

In [14]:
def fix_target(df, col):
    df[col].fillna(0, inplace=True)
    df[col].mask(df[col]>0, 1, inplace=True)
    
for col in target_features:
    fix_target(orig_df, col)

orig_df.head(2)

Unnamed: 0,language,engaged_with_user_id,engaged_with_user_follower_count,engaged_with_user_following_count,engaged_with_user_is_verified,engaging_user_id,engaging_user_follower_count,engaging_user_following_count,engaging_user_is_verified,engagee_follows_engager,reply,retweet,retweet_comment,like,url_cnt,text
1120596,B0FA488F2911701DD8EC5B1EA5E322D8,E400C001A195BD92CB74AA0B2E2BB522,6460161,254,True,7327C6D707CE1C0F8DC376B8F6AA6B3F,49,237,False,False,0.0,0.0,0.0,0.0,1,"[CLS] Una vez dentro, sólo habrá una salida. ¶..."
6744778,313ECD3A1E5BB07406E4249475C2D6D6,80C3FD8645A74F589C103A1F9A3C40E5,53204,132,False,75CC5A829BFDBECDFCDD0868EFAC04FF,79,163,False,False,0.0,0.0,0.0,1.0,1,[CLS] https : / / t. co / kJysqCq6UR [SEP]


#### Change 'engagee_follows_engager', 'engaged_with_user_is_verified', 'engaging_user_is_verified' to be numeric 

In [15]:
def replace_boolean_numeric(df, column):
    df[column].replace({True: 1.0, False: 0.0}, inplace=True)
    
for col in ['engagee_follows_engager', 'engaged_with_user_is_verified', 'engaging_user_is_verified']:
    replace_boolean_numeric(orig_df, col)
    
orig_df.head(2)

Unnamed: 0,language,engaged_with_user_id,engaged_with_user_follower_count,engaged_with_user_following_count,engaged_with_user_is_verified,engaging_user_id,engaging_user_follower_count,engaging_user_following_count,engaging_user_is_verified,engagee_follows_engager,reply,retweet,retweet_comment,like,url_cnt,text
1120596,B0FA488F2911701DD8EC5B1EA5E322D8,E400C001A195BD92CB74AA0B2E2BB522,6460161,254,1.0,7327C6D707CE1C0F8DC376B8F6AA6B3F,49,237,0.0,0.0,0.0,0.0,0.0,0.0,1,"[CLS] Una vez dentro, sólo habrá una salida. ¶..."
6744778,313ECD3A1E5BB07406E4249475C2D6D6,80C3FD8645A74F589C103A1F9A3C40E5,53204,132,0.0,75CC5A829BFDBECDFCDD0868EFAC04FF,79,163,0.0,0.0,0.0,0.0,0.0,1.0,1,[CLS] https : / / t. co / kJysqCq6UR [SEP]


#### Split dataframe to test, validation, train dataframes

In [16]:
def train_valid_test_split(df, valid_ratio=0.2, test_ratio=0.2):
    if(valid_ratio + test_ratio >= 1.0):
        raise ValueError("Invalid valid and test ratio")
    
    all_point_cnt = df.shape[0]
    val_cnt = int(valid_ratio * all_point_cnt)
    test_cnt = int(test_ratio * all_point_cnt)
    val_test_cnt = val_cnt + test_cnt
    
    train_points = df.iloc[:-val_test_cnt].copy()
    valid_points = df.iloc[-val_test_cnt:-test_cnt].copy()
    test_points = df.iloc[-test_cnt:].copy()
    
    return train_points, valid_points, test_points

train_df, valid_df, test_df = train_valid_test_split(orig_df)

#### Scale numerical features

In [17]:
train_numerical_feat = train_df.loc[:,numerical_features].values
valid_numerical_feat = valid_df.loc[:,numerical_features].values
test_numerical_feat = test_df.loc[:,numerical_features].values

In [18]:
scaler = MinMaxScaler()

train_scaled = scaler.fit_transform(train_numerical_feat)
print(train_scaled.shape)
valid_scaled = scaler.transform(valid_numerical_feat)
test_scaled = scaler.transform(test_numerical_feat)

(4178184, 5)


In [19]:
for i, feat in enumerate(numerical_features):
    train_df.loc[:,feat] = train_scaled[:,i]
    valid_df.loc[:,feat] = valid_scaled[:,i]
    test_df.loc[:,feat] = test_scaled[:,i]

train_df[numerical_features].head(2)    

Unnamed: 0,engaged_with_user_follower_count,engaged_with_user_following_count,engaging_user_follower_count,engaging_user_following_count,url_cnt
1120596,0.049986,6e-05,2e-05,0.001692,0.1
6744778,0.000412,3.1e-05,3.2e-05,0.001162,0.1


#### Apply target encoding

In [20]:
def target_encode(train_df, valid_df, test_df, features, m):
    for target in target_features:
        te = TargetEncoder(features, m)
        
        train_df = te.fit_transform(train_df, target)
        valid_df = te.transform(valid_df, target)
        test_df = te.transform(test_df, target)
    
    return train_df, valid_df, test_df

train_df, valid_df, test_df = target_encode(train_df, valid_df, test_df, categorical_features, m)
train_df.head(2)

Unnamed: 0,language,engaged_with_user_id,engaged_with_user_follower_count,engaged_with_user_following_count,engaged_with_user_is_verified,engaging_user_id,engaging_user_follower_count,engaging_user_following_count,engaging_user_is_verified,engagee_follows_engager,...,engaging_user_id_reply_TE,language_retweet_TE,engaged_with_user_id_retweet_TE,engaging_user_id_retweet_TE,language_retweet_comment_TE,engaged_with_user_id_retweet_comment_TE,engaging_user_id_retweet_comment_TE,language_like_TE,engaged_with_user_id_like_TE,engaging_user_id_like_TE
1120596,B0FA488F2911701DD8EC5B1EA5E322D8,E400C001A195BD92CB74AA0B2E2BB522,0.049986,6e-05,1.0,7327C6D707CE1C0F8DC376B8F6AA6B3F,2e-05,0.001692,0.0,0.0,...,0.005168,0.112059,0.031386,0.010432,0.007492,0.011705,0.000156,0.404614,0.127174,0.665901
6744778,313ECD3A1E5BB07406E4249475C2D6D6,80C3FD8645A74F589C103A1F9A3C40E5,0.000412,3.1e-05,0.0,75CC5A829BFDBECDFCDD0868EFAC04FF,3.2e-05,0.001162,0.0,0.0,...,0.000525,0.091135,0.15746,0.137603,0.006282,0.000967,0.002331,0.429628,0.680971,0.63683


#### Drop categorical values

In [21]:
train_df.drop(categorical_features, axis=1, inplace=True)
valid_df.drop(categorical_features, axis=1, inplace=True)
test_df.drop(categorical_features, axis=1, inplace=True)

In [22]:
def write_file(df, name):
    df.to_csv(DATA_PATH+name+".csv", index=False)
    
write_file(train_df, "train")
write_file(valid_df, "valid")
write_file(test_df, "test")

# Model training

In [4]:
## Dirty hack so we can start the notebook from here(besides imports, parameters..) 
## and don't have to split into another notebook

if ('train_df' not in locals() and 'train_df' not in globals()):
    train_df = pd.read_csv(DATA_PATH+"train.csv")

if ('valid_df' not in locals() and 'valid_df' not in globals()):
    valid_df = pd.read_csv(DATA_PATH+"valid.csv")
    
if ('test_df' not in locals() and 'test_df' not in globals()):
    test_df = pd.read_csv(DATA_PATH+"test.csv")
    
xlm_t_tokenizer = transformers.XLMRobertaTokenizer.from_pretrained("cardiffnlp/twitter-xlm-roberta-base")

In [5]:
def create_dataset(df, tokenizer, numerical_features=numerical_features,
                   features=features, targets=target_features, max_len=MAX_LEN):
    all_features = numerical_features + features
    
    text = df['text'].values.tolist()
    feats = df.loc[:,all_features].values
    target_values = df.loc[:, targets].values
    
    return TweetDataset(text, feats, target_values, tokenizer, max_len)
    

train_dataset = create_dataset(train_df, xlm_t_tokenizer)
valid_dataset = create_dataset(valid_df, xlm_t_tokenizer)
test_dataset = create_dataset(test_df, xlm_t_tokenizer)

In [6]:
config = transformers.XLMRobertaConfig().from_pretrained("cardiffnlp/twitter-xlm-roberta-base")
model = Wd_Xlm_T.from_pretrained("cardiffnlp/twitter-xlm-roberta-base", config = config,
                                            dim_features=len(numerical_features + features),dim_hidden=[512,256])

model = model.to(DEVICE)

Some weights of Wd_Xlm_T were not initialized from the model checkpoint at cardiffnlp/twitter-xlm-roberta-base and are newly initialized: ['deep.net.dense_layer_0.0.weight', 'wide.bias', 'deep.net.dense_layer_1.0.bias', 'roberta.pooler.dense.weight', 'deep.net.dense_layer_0.0.bias', 'deep.net.dense_layer_2.0.weight', 'wide.weight', 'roberta.pooler.dense.bias', 'deep.net.dense_layer_2.0.bias', 'deep.net.dense_layer_1.0.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
EPOCH_COUNT = 3
BATCH_SIZE = 16

optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
loss_fn = nn.BCEWithLogitsLoss()

In [10]:
def calc_valid_loss(model, valid_loss, batch_size, loss_fn):
    model.eval()
    loss_list = []
    dataloader = DataLoader(valid_dataset, batch_size=batch_size, 
                            shuffle=False, drop_last=False)
    
    for data in tqdm(dataloader):
        input_ids = data['input_ids'].to(DEVICE)
        attention_mask = data['attention_mask'].to(DEVICE)
        features = data['features'].to(DEVICE)

        labels = data['labels'].to(DEVICE)
        
        logits = model(input_ids, attention_mask, features)
        loss = loss_fn(logits, labels)
        
        loss_list.append(loss.detach().cpu())
        
    
    
    model.train()
    return np.mean(loss_list)


def train_model(model, train_dataset, valid_datset, optimizer, loss_fn, 
                batch_size=BATCH_SIZE, epochs=EPOCH_COUNT):
    model.train()
    
    for epoch in range(epochs):
        dataloader = DataLoader(train_dataset, batch_size=batch_size,
                                shuffle=True, drop_last=False)
        avg_loss = []
        best_valid_loss = None
        for step, data in enumerate(tqdm(dataloader)):
            input_ids = data['input_ids'].to(DEVICE)
            attention_mask = data['attention_mask'].to(DEVICE)
            features = data['features'].to(DEVICE)

            labels = data['labels'].to(DEVICE)

            logits = model(input_ids, attention_mask, features)
            loss = loss_fn(logits, labels)

            loss.backward()
            optimizer.step()
            
            avg_loss.append(loss.detach().cpu())
            
            if(step > 0 and step % 25000 == 0):
                train_loss = np.mean(avg_loss)
                valid_loss = calc_valid_loss(model, valid_dataset, batch_size, loss_fn)
                print("Epoch: {}, step: {}, train_loss: {}, valid_loss: {}"
                      .format(epoch, step, train_loss, valid_loss))
                model.save_pretrained((CHECKPOINT_DIR+"_{}_{}").format(epoch, step))
                
                if(best_valid_loss is None or valid_loss < best_valid_loss):
                    model.save_pretrained(CHECKPOINT_DIR+"best_valid_loss")
                    best_valid_loss = valid_loss 

            
            optimizer.zero_grad()
        
train_model(model, train_dataset, valid_dataset, optimizer, loss_fn)

  0%|          | 0/261137 [00:00<?, ?it/s]

  0%|          | 0/87046 [00:00<?, ?it/s]

Epoch: 0, step: 25000, train_loss: 0.26758432388305664, valid_loss: 0.26314738392829895


  0%|          | 0/87046 [00:00<?, ?it/s]

Epoch: 0, step: 50000, train_loss: 0.26383233070373535, valid_loss: 0.2600317597389221


  0%|          | 0/87046 [00:00<?, ?it/s]

Epoch: 0, step: 75000, train_loss: 0.2616996467113495, valid_loss: 0.25870105624198914


  0%|          | 0/87046 [00:00<?, ?it/s]

Epoch: 0, step: 100000, train_loss: 0.2599082589149475, valid_loss: 0.2562841773033142


  0%|          | 0/87046 [00:00<?, ?it/s]

Epoch: 0, step: 125000, train_loss: 0.2585690915584564, valid_loss: 0.25417235493659973


  0%|          | 0/87046 [00:00<?, ?it/s]

Epoch: 0, step: 150000, train_loss: 0.25699394941329956, valid_loss: 0.2522868514060974


  0%|          | 0/87046 [00:00<?, ?it/s]

Epoch: 0, step: 175000, train_loss: 0.2558172047138214, valid_loss: 0.25166869163513184


  0%|          | 0/87046 [00:00<?, ?it/s]

Epoch: 0, step: 200000, train_loss: 0.25462085008621216, valid_loss: 0.25066447257995605


  0%|          | 0/87046 [00:00<?, ?it/s]

Epoch: 0, step: 225000, train_loss: 0.2535407543182373, valid_loss: 0.248321533203125


  0%|          | 0/87046 [00:00<?, ?it/s]

Epoch: 0, step: 250000, train_loss: 0.2524455189704895, valid_loss: 0.24863219261169434


  0%|          | 0/261137 [00:00<?, ?it/s]

  0%|          | 0/87046 [00:00<?, ?it/s]

Epoch: 1, step: 25000, train_loss: 0.23966403305530548, valid_loss: 0.24735558032989502


KeyboardInterrupt: 