In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import os, math
import numpy as np, pandas as pd
import matplotlib.pyplot as plt, seaborn as sns
from tqdm import tqdm, tqdm_notebook
from pathlib import Path
pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_rows', 400)
sns.set()

os.chdir('../..')
from src import utils

In [2]:
DATA        = Path('data')
RAW         = DATA/'raw'
INTERIM     = DATA/'interim'
PROCESSED   = DATA/'processed'
SUBMISSIONS = DATA/'submissions'

In [3]:
challenge  = pd.read_csv(RAW/'Challenge_20180423.csv', low_memory=False)
customer   = pd.read_csv(RAW/'Customer.csv', low_memory=False)
isin       = pd.read_csv(RAW/'Isin.csv', low_memory=False)
# submission = pd.read_csv(RAW/'sample_submission.csv', low_memory=False)
trade      = pd.read_csv(RAW/'Trade.csv', low_memory=False)
# market     = pd.read_csv(RAW/'Market.csv', low_memory=False)

In [4]:
from src.utils import get_weeks, week_num
week_labels = get_weeks(day_from=20160104, num_weeks=121)[104:]

In [5]:
print(week_labels)

[20180101, 20180108, 20180115, 20180122, 20180129, 20180205, 20180212, 20180219, 20180226, 20180305, 20180312, 20180319, 20180326, 20180402, 20180409, 20180416, 20180423]


In [6]:
import pickle
with open(INTERIM/'interest_sequences.pkl', 'rb') as f:
    interests = pickle.load(f)

In [7]:
isin.head()

Unnamed: 0,IsinIdx,TickerIdx,ActualMaturityDateKey,IssueDateKey,Seniority,Currency,ActivityGroup,Region,Activity,RiskCaptain,Owner,CompositeRating,IndustrySector,IndustrySubgroup,MarketIssue,IssuedAmount,CouponType
0,0,238,20381231,20051129,GOV,USD,FLOW LOCAL MARKET,AMERICAS,ARGENTINA,ARGENTINA,EMK ARGENTINA,NR,Government,Sovereign,Domestic,1246002000.0,STEP CPN
1,1,238,20331231,20051129,GOV,USD,FLOW LOCAL MARKET,AMERICAS,ARGENTINA,ARGENTINA,EMK ARGENTINA,NR,Government,Sovereign,Domestic,4901086000.0,FIXED
2,2,238,20331231,20051129,GOV,ARS,FLOW LOCAL MARKET,AMERICAS,ARGENTINA,ARGENTINA,EMK ARGENTINA,NR,Government,Sovereign,Domestic,15012450000.0,FIXED
3,3,236,20170417,20070417,GOV,USD,FLOW LOCAL MARKET,AMERICAS,ARGENTINA,ARGENTINA,EMK ARGENTINA,B,Government,Sovereign,Domestic,7340076000.0,FIXED
4,4,234,20221004,20100222,GOV,ARS,FLOW LOCAL MARKET,AMERICAS,ARGENTINA,ARGENTINA,EMK ARGENTINA,NR,Government,Sovereign,Domestic,3058452000.0,FLOATING


In [8]:
%%time
train = pd.DataFrame()
for name in week_labels[:-2]:
    train = pd.concat([train, pd.read_feather(PROCESSED/f'SVD_17-18_72f/week_{name}_SVD_diffscount.feather')])

CPU times: user 1min 26s, sys: 44.9 s, total: 2min 11s
Wall time: 2min 26s


In [15]:
val = pd.read_feather(PROCESSED/f'SVD_17-18_72f/week_{week_labels[-2]}_SVD_diffscount.feather')
test = pd.read_feather(PROCESSED/f'SVD_17-18_72f/week_{week_labels[-1]}_SVD_diffscount.feather')

In [9]:
train.head()

Unnamed: 0,TradeDateKey,CustomerIdx,IsinIdx,BuySell,CustomerInterest,DaysSinceBuySell,DaysSinceTransaction,DaysSinceCustomerActivity,DaysSinceBondActivity,DaysCountBuySell,DaysCountTransaction,DaysCountCustomerActivity,DaysCountBondActivity,SVD_CustomerBias,SVD_IsinBuySellBias,SVD_Recommend,SVD_CustomerFactor00,SVD_CustomerFactor01,SVD_CustomerFactor02,SVD_CustomerFactor03,SVD_CustomerFactor04,SVD_CustomerFactor05,SVD_CustomerFactor06,SVD_CustomerFactor07,SVD_CustomerFactor08,SVD_CustomerFactor09,SVD_CustomerFactor10,SVD_CustomerFactor11,SVD_CustomerFactor12,SVD_CustomerFactor13,SVD_CustomerFactor14,SVD_IsinBuySellFactor00,SVD_IsinBuySellFactor01,SVD_IsinBuySellFactor02,SVD_IsinBuySellFactor03,SVD_IsinBuySellFactor04,SVD_IsinBuySellFactor05,SVD_IsinBuySellFactor06,SVD_IsinBuySellFactor07,SVD_IsinBuySellFactor08,SVD_IsinBuySellFactor09,SVD_IsinBuySellFactor10,SVD_IsinBuySellFactor11,SVD_IsinBuySellFactor12,SVD_IsinBuySellFactor13,SVD_IsinBuySellFactor14,Year,Month,Day,Sector,Subsector,Region_x,Country,TickerIdx,ActualMaturityDateKey,IssueDateKey,Seniority,Currency,ActivityGroup,Region_y,Activity,RiskCaptain,Owner,CompositeRating,IndustrySector,IndustrySubgroup,MarketIssue,IssuedAmount,CouponType,BondDuration,BondRemaining,BondLife
0,20180101,0,21856,Buy,0.0,12,12,12,11,2,2,28,54,0.145856,0.066204,0.514251,-0.029115,0.016691,-0.056704,0.096195,0.045666,0.092239,0.08126,0.055171,-0.115695,-0.020836,-0.071208,0.011179,0.004409,0.016432,0.023712,0.012533,0.11176,0.005203,0.036013,0.046625,0.177593,0.047611,0.083235,-0.051471,0.006236,-0.021289,0.080423,0.056338,0.122781,-0.062886,2018,18,1,Asset Managers & Hedge Funds,Independent Asset Manager,Asia Pacific,HONG KONG,1930,20190114,20140114,SEN,USD,FLOW LOCAL MARKET,ASIA-TOKYO,ASIA MARKET MAKING,ASIA HIGH YIELD,ASIA HIGH YIELD,14,Financial,Real Estate Oper/Develop,Euro-dollar,600000000.0,FIXED,1826,378,1448
1,20180101,0,21856,Sell,0.0,915,12,12,11,0,2,28,54,0.145856,0.054329,0.515428,-0.029115,0.016691,-0.056704,0.096195,0.045666,0.092239,0.08126,0.055171,-0.115695,-0.020836,-0.071208,0.011179,0.004409,0.016432,0.023712,0.050487,-0.006822,0.050556,0.100922,0.190385,0.112586,0.102837,0.07735,-0.131995,-0.041197,0.008692,0.154851,-0.077626,0.02806,-0.025874,2018,18,1,Asset Managers & Hedge Funds,Independent Asset Manager,Asia Pacific,HONG KONG,1930,20190114,20140114,SEN,USD,FLOW LOCAL MARKET,ASIA-TOKYO,ASIA MARKET MAKING,ASIA HIGH YIELD,ASIA HIGH YIELD,14,Financial,Real Estate Oper/Develop,Euro-dollar,600000000.0,FIXED,1826,378,1448
2,20180101,0,24944,Buy,0.0,915,915,12,18,0,0,28,76,0.145856,0.269019,0.71723,-0.029115,0.016691,-0.056704,0.096195,0.045666,0.092239,0.08126,0.055171,-0.115695,-0.020836,-0.071208,0.011179,0.004409,0.016432,0.023712,-0.083098,-0.10414,-0.195715,-0.057268,0.241214,0.092401,0.040741,0.042378,-0.137947,0.155237,0.03617,0.030057,0.051076,-0.056409,-0.017747,2018,18,1,Asset Managers & Hedge Funds,Independent Asset Manager,Asia Pacific,HONG KONG,1333,20230817,20160817,SEN,USD,FLOW LOCAL MARKET,ASIA-TOKYO,ASIA MARKET MAKING,ASIA HIGH YIELD,ASIA HIGH YIELD,0,Financial,Property/Casualty Ins,Euro-dollar,590000000.0,FIXED,2556,2054,502
3,20180101,0,24944,Sell,0.0,915,915,12,18,0,0,28,76,0.145856,0.384511,0.806818,-0.029115,0.016691,-0.056704,0.096195,0.045666,0.092239,0.08126,0.055171,-0.115695,-0.020836,-0.071208,0.011179,0.004409,0.016432,0.023712,-0.063905,0.071768,-0.06642,-0.07605,-0.02591,0.096641,-0.007377,0.166042,-0.052101,-0.057384,0.100283,0.024377,0.001089,0.075338,-0.104738,2018,18,1,Asset Managers & Hedge Funds,Independent Asset Manager,Asia Pacific,HONG KONG,1333,20230817,20160817,SEN,USD,FLOW LOCAL MARKET,ASIA-TOKYO,ASIA MARKET MAKING,ASIA HIGH YIELD,ASIA HIGH YIELD,0,Financial,Property/Casualty Ins,Euro-dollar,590000000.0,FIXED,2556,2054,502
4,20180101,0,25992,Buy,0.0,915,915,12,34,0,0,28,106,0.145856,0.201663,0.687187,-0.029115,0.016691,-0.056704,0.096195,0.045666,0.092239,0.08126,0.055171,-0.115695,-0.020836,-0.071208,0.011179,0.004409,0.016432,0.023712,0.070514,-0.180122,-0.122018,0.306007,0.251894,0.047554,-0.048932,0.02566,-0.156877,0.045887,-0.20226,0.255278,0.058086,-0.17821,0.07298,2018,18,1,Asset Managers & Hedge Funds,Independent Asset Manager,Asia Pacific,HONG KONG,744,20200601,20170601,SEN,USD,FLOW LOCAL MARKET,ASIA-TOKYO,ASIA MARKET MAKING,ASIA HIGH YIELD,ASIA HIGH YIELD,17,Communications,Internet Connectiv Svcs,Euro-dollar,500000000.0,FIXED,1096,882,214


In [11]:
train = train.drop_duplicates(['CustomerIdx', 'IsinIdx', 'BuySell'])
val = val.drop_duplicates(['CustomerIdx', 'IsinIdx', 'BuySell'])
test = test.drop_duplicates(['CustomerIdx', 'IsinIdx', 'BuySell'])

In [17]:
train.shape, val.shape, test.shape

((983172, 72), (493590, 72), (484758, 73))

In [18]:
%%time
train.reset_index(drop=True).to_feather(INTERIM/'metadata_train.feather')
val.reset_index(drop=True).to_feather(INTERIM/'metadata_val.feather')
test.reset_index(drop=True).to_feather(INTERIM/'metadata_test.feather')

CPU times: user 4.66 s, sys: 1.73 s, total: 6.38 s
Wall time: 5.98 s


## Checkpoint

In [91]:
train = pd.read_feather(INTERIM/'metadata_train.feather')
val = pd.read_feather(INTERIM/'metadata_val.feather')
test = pd.read_feather(INTERIM/'metadata_val.feather')

In [92]:
cat_cols = ['Sector', 'Subsector', 'Region_x', 'Country', 
            'TickerIdx', 'Seniority', 'Currency', 'ActivityGroup', 
            'Region_y', 'Activity', 'RiskCaptain', 'Owner', 
            'IndustrySector', 'IndustrySubgroup', 'MarketIssue', 'CouponType']
num_cols = ['ActualMaturityDateKey', 'IssueDateKey', 'CompositeRating', 
            'IssuedAmount', 'BondDuration']
id_cols = ['CustomerIdx', 'IsinIdx', 'BuySell']

In [93]:
%%time
# Label encode cats
from src.utils import to_cat_codes, apply_cats
to_cat_codes(train, cat_cols)
apply_cats(val, train)
apply_cats(test, train)

for col in cat_cols:
    train[col] = train[col].cat.codes
    val[col] = val[col].cat.codes
    test[col] = test[col].cat.codes

CPU times: user 6.18 s, sys: 432 ms, total: 6.61 s
Wall time: 6.61 s


In [94]:
nan_cols = [c for c in cat_cols if \
            any(df[c].min() < 0 for df in [train, val, test])]

In [95]:
nan_cols

['Subsector', 'TickerIdx', 'IndustrySector', 'IndustrySubgroup', 'MarketIssue']

In [96]:
for c in nan_cols:
    train[c] = train[c] + 1
    val[c] = val[c] + 1
    test[c] = test[c] + 1

In [97]:
%%time
# Scale conts
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(pd.concat([train[num_cols], 
                              val[num_cols], test[num_cols]]))

def scale_features(df, scaler, num_cols):
    scaled = scaler.transform(df[num_cols])
    for i, col in enumerate(num_cols):
        df[col] = scaled[:,i]

scale_features(train, scaler, num_cols)
scale_features(val, scaler, num_cols)
scale_features(test, scaler, num_cols)

CPU times: user 440 ms, sys: 516 ms, total: 956 ms
Wall time: 952 ms


In [98]:
import torch.utils.data
from torch.utils.data import DataLoader

In [146]:
from collections import namedtuple

class MultimodalDataset(torch.utils.data.Dataset):
    def __init__(self, cats, conts, seqs, targets=None):
        self.cats = cats.values.astype(np.int64)
        self.conts = conts.values.astype(np.float32)
        self.seqs = np.array(seqs).astype(np.float32)
        self.targets = np.array(targets).astype(np.float32) \
                            if targets is not None else \
                            np.zeros_like(seqs).astype(np.float32)
    
    def __len__(self):
        return len(self.cats)
    
    def __getitem__(self, idx):
        return [self.cats[idx], self.conts[idx],
                self.seqs[idx], self.targets[idx]]

In [100]:
train_seqs = np.array([interests[(c,i,b)] for c,i,b in \
                zip(train.CustomerIdx, train.IsinIdx, train.BuySell)])
val_seqs = np.array([interests[(c,i,b)] for c,i,b in \
                zip(val.CustomerIdx, val.IsinIdx, val.BuySell)])

In [101]:
train['BuySell'] = train.BuySell.apply(lambda x: int(x == 'Buy'))
val['BuySell'] = val.BuySell.apply(lambda x: int(x == 'Buy'))
test['BuySell'] = test.BuySell.apply(lambda x: int(x == 'Buy'))
num_cols.append('BuySell')

In [103]:
train_seqs.shape, val_seqs.shape

((983172, 16), (493590, 16))

In [147]:
train_ds = DataLoader(MultimodalDataset(
                train[cat_cols], train[num_cols], 
                train_seqs[:,:-2], train_seqs[:,1:-1]), 
                batch_size=128, shuffle=True)
val_ds = DataLoader(MultimodalDataset(
                val[cat_cols], val[num_cols], 
                val_seqs[:,:-1], val_seqs[:,1:]), 
                batch_size=128)

In [145]:
Batch = namedtuple('Batch', ['cats', 'conts', 'seqs', 'targets'])

In [148]:
x = next(iter(train_ds))

In [106]:
cat_szs = [int(train[col].max() + 1) for col in cat_cols]
emb_szs = [(c, min(50, (c+1)//2)) for c in cat_szs]

In [418]:
class MultimodalClassifier(nn.Module):
    def __init__(self, emb_szs, n_cont, emb_drop, szs, drops, 
                 rnn_hidden_sz, rnn_input_sz, rnn_n_layers, rnn_drop):
        super().__init__()
        self.structured_net = NeuralNet(emb_szs, n_cont=n_cont, 
                        emb_drop=emb_drop, szs=szs, drops=drops, 
                        out_sz=rnn_hidden_sz * rnn_n_layers * 2)
        
        self.sequential_net = LSTMClassifier(input_sz=rnn_input_sz,
                        hidden_sz=rnn_hidden_sz, n_layers=rnn_n_layers, 
                        drop=rnn_drop)  
        self.rnn_n_layers = rnn_n_layers
        self.rnn_hidden_sz = rnn_hidden_sz
        
    def forward(self, cats, conts):
        out = self.structured_net(cats, conts)
        return out.view(-1, 2, self.rnn_n_layers, self.rnn_hidden_sz) \
                    .transpose(0,1).transpose(1,2)

In [213]:
from src.neuralnet import NeuralNet
n_layers = 2
hidden_sz = 64
structured_net = NeuralNet(emb_szs, n_cont=len(num_cols), emb_drop=0.04,
                    szs=[1000, 500], drops=[0.001, 0.01], 
                    out_sz=hidden_sz * n_layers * 2)

In [214]:
from src.lstm import LSTMClassifier
sequential_net = LSTMClassifier(input_sz=1, hidden_sz=hidden_sz, 
                                n_layers=n_layers, drop=0.04)

In [215]:
out = structured_net(x[0], x[1])

In [269]:
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
criterion = nn.BCEWithLogitsLoss()

In [217]:
out.size()

torch.Size([128, 256])

In [245]:
hidden = out.view(-1, 2, n_layers, hidden_sz).transpose(0,1).transpose(1,2)
seqs = x[2].transpose(0,1)
targets = x[3].transpose(0,1)
output = None
loss = 0
for i in range(len(seqs)): # for each timestep
    output, hidden = sequential_net(seqs[i].unsqueeze(0).unsqueeze(2), hidden)
    loss += criterion(output, targets[i].unsqueeze(1))
loss = loss.item() / len(seqs)

In [261]:
model = MultimodalClassifier(emb_szs, n_cont=len(num_cols), emb_drop=0.04,
                    szs=[1000, 500], drops=[0.001, 0.01], 
                    rnn_hidden_sz=32, rnn_input_sz=1, rnn_n_layers=2, 
                    rnn_drop=0.04)

In [262]:
hidden = model(x[0], x[1])

In [263]:
seqs = x[2].transpose(0,1)
targets = x[3].transpose(0,1)
output = None
loss = 0
for i in range(len(seqs)): # for each timestep
    output, hidden = model.sequential_net(seqs[i].unsqueeze(0).unsqueeze(2), hidden)
    loss += criterion(output, targets[i].unsqueeze(1))
loss = loss.item() / len(seqs)

In [266]:
def train_step(model, cats, conts, seqs, targets, optimizer, criterion):
    model.train()
    optimizer.zero_grad()
    hidden = model(cats, conts)
    seqs = seqs.transpose(0,1) # [seq_len, batch_sz]
    targets = targets.transpose(0,1)
    loss = 0
    for i in range(len(seqs)): # for each timestep
        output, hidden = model.sequential_net(seqs[i].unsqueeze(0) \
                                              .unsqueeze(2), hidden)
        loss += criterion(output, targets[i].unsqueeze(1))
    loss.backward()
    optimizer.step()
    return loss.item() / len(seqs)

In [420]:
model = MultimodalClassifier(emb_szs, n_cont=len(num_cols), emb_drop=0.04,
                    szs=[1000, 500], drops=[0.001, 0.01], 
                    rnn_hidden_sz=32, rnn_input_sz=1, rnn_n_layers=2, 
                    rnn_drop=0.04)

In [421]:
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.BCEWithLogitsLoss()

In [422]:
train_step(model, x[0], x[1], x[2], x[3], optimizer, criterion)

0.6249370574951172

In [414]:
import torch

In [402]:
def evaluate(model, cats, conts, seqs):
    with torch.no_grad():
        model.eval()
        hidden = model(cats, conts)
        seqs = seqs.transpose(0,1) # [seq_len, batch_sz]
        for i in range(len(seqs)): # for each timestep
            output, hidden = model.sequential_net(seqs[i].unsqueeze(0) \
                                                  .unsqueeze(2), hidden)
        return F.sigmoid(output).view(-1)

In [403]:
preds = evaluate(model, x[0], x[1], x[2])

In [437]:
def get_predictions(model, data_loader, print_every=800, USE_CUDA=False):
    all_targets = []
    all_preds = []
    for batch_idx, (cats, conts, seqs, targets) in enumerate(data_loader):
        with torch.no_grad():
            if USE_CUDA:
                cats, conts, seqs, targets = cats.cuda(), conts.cuda(), \
                                             seqs.cuda(), targets.cuda()
            preds = evaluate(model, cats, conts, seqs)
            all_targets.extend(targets.cpu().numpy()[:,-1]) # last timestemp
            all_preds.extend(preds.cpu().numpy())
            assert len(all_targets) == len(all_preds)
            if batch_idx % print_every == 0:
                print('[{}/{} ({:.0f}%)]'.format(
                        batch_idx * len(seqs), len(data_loader.dataset),
                        100. * batch_idx / len(data_loader)))
    return all_targets, all_preds

In [423]:
targets, preds = get_predictions(model, val_ds)



In [424]:
len(targets), len(preds)

(493590, 493590)

In [425]:
from sklearn.metrics import roc_auc_score

In [428]:
nn.BCELoss()(torch.Tensor(preds), torch.Tensor(targets)).item()

0.6180896759033203

In [426]:
roc_auc_score(targets, preds)

0.44819258198698575

In [410]:
len(cat_cols), len(num_cols)

(16, 6)

In [447]:
def train_model(model, train_loader, val_loader, optimizer, criterion,
                n_epochs, print_every=200, val_every=5, USE_CUDA=False):
    if USE_CUDA:
        model = model.cuda()
    train_losses = []
    val_losses = []
    val_auc_scores = []
    val_every *= print_every
    for epoch in range(n_epochs):
        train_loss = 0
        for batch_idx, (cats, conts, seqs, targets) in enumerate(train_loader):
            if USE_CUDA:
                cats, conts, seqs, targets = cats.cuda(), conts.cuda(), \
                                             seqs.cuda(), targets.cuda()
            train_loss += train_step(model, cats, conts, seqs, targets, 
                                     optimizer, criterion)
            
            if batch_idx > 0 and batch_idx % print_every == 0:
                train_loss /= print_every
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                        epoch + 1, batch_idx * len(seqs), len(train_loader.dataset),
                        100. * batch_idx / len(train_loader), train_loss))
                train_losses.append(train_loss)
                train_loss = 0
            
            if batch_idx > 0 and batch_idx % val_every == 0:
                targets, preds = get_predictions(model, val_loader, USE_CUDA=USE_CUDA)
                val_loss = nn.BCELoss()(torch.Tensor(preds),
                                        torch.Tensor(targets)).item()
                val_losses.append(val_loss)
                val_auc = roc_auc_score(targets, preds)
                val_auc_scores.append(val_auc)
                print(f'ROC AUC Score: {val_auc:.6f}') 
                print(f'Validation Loss: {val_loss:.6f}')
        print()
    return model, train_losses, val_losses, val_auc_scores            

In [465]:
from src.multimodal import MultimodalClassifier, MultimodalDataset, train_model

In [476]:
train_ds = DataLoader(MultimodalDataset(
                train[cat_cols], train[num_cols], 
                train_seqs[:,:-2], train_seqs[:,1:-1]), 
                batch_size=128, shuffle=True)
val_ds = DataLoader(MultimodalDataset(
                val[cat_cols], val[num_cols], 
                val_seqs[:,:-1], val_seqs[:,1:]), 
                batch_size=128)

In [495]:
model = MultimodalClassifier(emb_szs, n_cont=len(num_cols), emb_drop=0.04,
                    szs=[1000, 500], drops=[0.001, 0.01], 
                    rnn_hidden_sz=64, rnn_input_sz=1, rnn_n_layers=2, 
                    rnn_drop=0.04)

In [496]:
USE_CUDA = True
if USE_CUDA:
    model = model.cuda()

In [497]:
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.BCEWithLogitsLoss()

In [498]:
%%time
model, train_losses, val_losses, val_auc_scores = train_model(
                model, train_ds, val_ds, optimizer, criterion,
                n_epochs=2, USE_CUDA=USE_CUDA, val_every=10)

ROC AUC Score: 0.574720
Validation Loss: 0.156681
ROC AUC Score: 0.596517
Validation Loss: 0.152969
ROC AUC Score: 0.608238
Validation Loss: 0.154524

ROC AUC Score: 0.612254
Validation Loss: 0.152280
ROC AUC Score: 0.625959
Validation Loss: 0.151150
ROC AUC Score: 0.631268
Validation Loss: 0.154673

CPU times: user 12min 27s, sys: 19.4 s, total: 12min 47s
Wall time: 12min 46s


In [499]:
from src.multimodal import get_predictions

In [500]:
targets, preds = get_predictions(model, val_ds, USE_CUDA=USE_CUDA)



In [501]:
roc_auc_score(targets, preds)

0.6153719275149232