## 环境配置

In [1]:
import sys
sys.path.append("../")

import warnings 
warnings.filterwarnings("ignore") 

import os
import random
import janestreet
import pandas as pd
import numpy as np
from sklearn.metrics import log_loss, roc_auc_score

In [2]:
import torch
from torch import nn
from torch.utils.data import DataLoader

In [3]:
def Seed(SEED=42):
    os.environ['PYTHONHASHSEED'] = str(SEED)
    random.seed(SEED)
    np.random.seed(SEED)  
    torch.manual_seed(SEED)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(SEED)
        torch.cuda.manual_seed_all(SEED)
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True 

Seed(SEED=42)

## Load Data

In [4]:
DATA_PATH = '/home/chencheng/kaggle/jane-street-market-prediction/'
DATA = pd.read_csv("{}train.csv".format(DATA_PATH))

## Preprocessing

In [5]:
DATA = DATA[DATA.weight > 0].reset_index(drop=True)

In [6]:
def Fill_NaN(df, begin_date=86):
    assert begin_date >= 1, "begin_date must be larger than 1."
    data = pd.DataFrame()
    while len(df[df.date == begin_date]) > 0:
        #print(begin_date)
        temp = df[df.date == begin_date]
        temp.fillna(df[df.date == begin_date-1].mean(), inplace=True)
        data = data.append(temp)
        begin_date += 1
    return data

In [7]:
DATA = Fill_NaN(DATA)
DATA = DATA[DATA.date > 85].reset_index(drop=True)

In [34]:
import pickle
with open("../model_02_18/DATA.pkl","wb") as file:
    pickle.dump(DATA, file)

In [8]:
def ds_standardize_zscore(df, mean=None, std=None):
    if mean is not None and std is not None:
        return (df - mean) / std
    else:
        std = df.std()
        assert (std != 0).any(), "series is constant"
        mean = df.mean()
        return (df - mean) / std, mean, std

In [9]:
DATA['feature_41_42_43'] = DATA['feature_41'] + DATA['feature_42'] + DATA['feature_43']
DATA['feature_1_2'] = DATA['feature_1'] / (DATA['feature_2'] + 1e-5)
DATA['feature_41_42_43'], feature_41_42_43_mean, feature_41_42_43_std = ds_standardize_zscore(DATA['feature_41_42_43'])
DATA['feature_1_2'], feature_1_2_mean, feature_1_2_std = ds_standardize_zscore(DATA['feature_1_2'])

In [10]:
fea_cols = [c for c in DATA.columns if 'feature' in c]

In [35]:
np.save('../model_02_18/f_mean.npy', DATA[fea_cols].mean())

In [11]:
from scipy.optimize import minimize
from sklearn.neighbors import KernelDensity

def mpPDF(var,q,pts):
    # Marcenko-Pastur pdf
    # q=T/N
    eMin, eMax = var*(1-(1./q)**.5)**2, var*(1+(1./q)**.5)**2
    eVal = np.linspace(eMin,eMax,pts)
    pdf = q/(2*np.pi*var*eVal)*((eMax-eVal)*(eVal-eMin))**.5
    pdf = pd.Series(pdf.reshape(-1,), index=eVal.reshape(-1,))
    return pdf


def getPCA(matrix):
    # Get eVal,eVec from a Hermitian matrix
    eVal,eVec = np.linalg.eigh(matrix)
    indices=eVal.argsort()[::-1] # arguments for sorting eVal desc
    eVal,eVec=eVal[indices],eVec[:,indices]
    eVal=np.diagflat(eVal)
    return eVal,eVec

def fitKDE(obs,bWidth=.25,kernel='gaussian',x=None):
    # Fit kernel to a series of obs, and derive the prob of obs
    # x is the array of values on which the fit KDE will be evaluated
    if len(obs.shape)==1:
        obs=obs.reshape(-1,1)
    kde=KernelDensity(kernel=kernel,bandwidth=bWidth).fit(obs)
    if x is None:
        x=np.unique(obs).reshape(-1,)
    if len(x.shape)==1:
        x=x.reshape(-1,1)
    logProb=kde.score_samples(x) # log(density)
    pdf=pd.Series(np.exp(logProb),index=x.flatten())
    return pdf

def cov2corr(cov):
    # Derive the correlation matrix from a covariance matrix
    std=np.sqrt(np.diag(cov))
    corr=cov/np.outer(std,std)
    corr[corr<-1],corr[corr>1]=-1,1 # numerical error
    return corr

def errPDFs(var,eVal,q,bWidth,pts=1000):
    # Fit error
    pdf0=mpPDF(var,q,pts) # theoretical pdf
    pdf1=fitKDE(eVal,bWidth,x=pdf0.index.values) # empirical pdf
    sse=np.sum((pdf1-pdf0)**2)
    return sse

def findMaxEval(eVal,q,bWidth):
    # Find max random eVal by fitting Marcenko’s dist
    out=minimize(lambda *x:errPDFs(*x),.5,args=(eVal,q,bWidth),
    bounds=((1E-5,1-1E-5),))
    if out['success']:
        var=out['x'][0]
    else:
        var=1
    eMax=var*(1+(1./q)**.5)**2
    return eMax,var

def denoisedCorr(eVal,eVec,nFacts):
    # Remove noise from corr by fixing random eigenvalues
    eVal_=np.diag(eVal).copy()
    eVal_[nFacts:]=eVal_[nFacts:].sum()/float(eVal_.shape[0] - nFacts)
    eVal_=np.diag(eVal_)
    corr1=np.dot(eVec,eVal_).dot(eVec.T)
    corr1=cov2corr(corr1)
    return corr1

def denoisedCorr2(eVal,eVec,nFacts,alpha=0):
    # Remove noise from corr through targeted shrinkage
    eValL,eVecL=eVal[:nFacts,:nFacts],eVec[:,:nFacts]
    eValR,eVecR=eVal[nFacts:,nFacts:],eVec[:,nFacts:]
    corr0=np.dot(eVecL,eValL).dot(eVecL.T)
    corr1=np.dot(eVecR,eValR).dot(eVecR.T)
    corr2=corr0+alpha*corr1+(1-alpha)*np.diag(np.diag(corr1))
    return corr2

from sklearn.base import BaseEstimator, TransformerMixin

#@njit
def fillna_npwhere_njit(array, values):
    if np.isnan(array.sum()):
        array = np.where(np.isnan(array), values, array)
    return array

class RMTDenoising(BaseEstimator, TransformerMixin):
    
    def __init__(self, bWidth=.01, alpha=.5, feature_0=True, sample=0.3, seed=2021):
        self.bWidth = bWidth
        self.alpha = alpha
        self.feature_0 = feature_0
        self.sample = sample
        self.seed = seed
    
    def denoise(self, X):
        sample = X.sample(frac=self.sample, random_state=self.seed)
        q = X.shape[0] / X.shape[1]
        cov = sample.cov().values
        corr0 = cov2corr(cov)

        eVal0, eVec0 = getPCA(corr0)
        eMax0, var0 = findMaxEval(np.diag(eVal0), q, bWidth=self.bWidth)
        nFacts0 = eVal0.shape[0] - np.diag(eVal0)[::-1].searchsorted(eMax0)
        corr1 = denoisedCorr2(eVal0,eVec0,nFacts0,alpha=self.alpha)
        eVal1, eVec1 = getPCA(corr1)
        #result = np.hstack((np.diag(eVal1), var0))
        #name = [f'eigen_{i+1}' for i in range(len(eVal1))] + ['var_explained']
        return eVec1[:, :nFacts0]
    
    def fit(self, X, y=None):
        if self.feature_0:
            self.cols_ = [c for c in X.columns if c != 'feature_0']
        else:
            self.cols_ = list(X.columns)
        X_ = X[self.cols_]
        self.W_ = self.denoise(X_)
        self.dim_W_ = self.W_.shape[1]
        return self
    
    def transform(self, X, y=None):
        X_ = X.copy()
        names = [f'proj_{i}' for i in range(self.dim_W_)]
        projection = pd.DataFrame(fillna_npwhere_njit(X_[self.cols_].values, 0).dot(self.W_), columns=names)
        if self.feature_0:
            projection['feature_0'] = X['feature_0']
        return projection

In [12]:
target_tf = RMTDenoising(sample=0.8)
target_tf.fit(DATA[['resp', 'resp_1','resp_2','resp_3','resp_4'] + ['feature_0']])
DATA['dresp'] = -target_tf.transform(DATA[['resp', 'resp_1','resp_2','resp_3','resp_4'] + ['feature_0']]).proj_0
DATA["action"]  =  (DATA['dresp'] > 0).astype('int') 
DATA['action_0'] = (DATA['resp'] > 0 ).astype('int') 
DATA['action_1'] = (DATA['resp_1'] > 0).astype('int')
DATA['action_2'] = (DATA['resp_2'] > 0).astype('int')
DATA['action_3'] = (DATA['resp_3'] > 0).astype('int')
DATA['action_4'] = (DATA['resp_4'] > 0).astype('int')
lab_cols = ["action", "action_0", "action_1", "action_2", "action_3", "action_4"]

In [13]:
from sklearn.decomposition import PCA
PcA = PCA(n_components='mle', whiten=True)
PcA.fit(DATA[fea_cols].values)

PCA(n_components='mle', whiten=True)

In [14]:
def Get_K_Fold_Data(i, K, feature: np.ndarray, label: np.ndarray):
    assert K > 1, "K must be larger than 1."
    Fold_size = feature.shape[0] // K
    
    train_feature, train_label = None, None
    for j in range(K):
        idx = slice(j * Fold_size, (j+1) * Fold_size)
        part_feature, part_label = feature[idx], label[idx]
        if j == i:
            valid_feature, valid_label = part_feature, part_label
        elif train_feature is None:
            train_feature, train_label = part_feature, part_label
        else:
            train_feature = np.concatenate((train_feature, part_feature), axis=0)
            train_label = np.concatenate((train_label, part_label), axis=0)
            
    return train_feature, train_label, valid_feature, valid_label  

In [15]:
class dataset:
    def __init__(self, feature: np.ndarray, label: np.ndarray):
        self.feature = feature
        self.label = label

    def __len__(self):
        return self.feature.shape[0]

    def __getitem__(self, idx):
        return {
            'feature': torch.tensor(self.feature[idx], dtype=torch.float),
            'label': torch.tensor(self.label[idx], dtype=torch.float)
        }

In [16]:
def dataloader(features, labels, BATCH_SIZE, shuffle=True):
    data_set = dataset(features, labels)
    return DataLoader(data_set, BATCH_SIZE, shuffle=shuffle)

## Encoder-Decoder

In [17]:
class GaussianNoise(nn.Module):
    def __init__(self, noise, on=True):
        super().__init__()
        self.noise = noise
        self.on = True
        
    def forward(self, inputs):
        if self.on:
            return inputs + torch.autograd.Variable(torch.randn(inputs.size())* self.noise).to("cuda:0")
        else:
            return inputs

In [18]:
class Encoder(nn.Module):
    def __init__(self, input_shape, hidden_size=[64, 16], noise=0.05, dropout_rate=0.5):
        super().__init__()
        self.noise = noise
        self.dropout_rate = dropout_rate
        self.Gaussiannoise =  GaussianNoise(noise, on=True)
        
        self.encoder = nn.Sequential(
            nn.BatchNorm1d(input_shape),
            nn.Dropout(self.dropout_rate),
            nn.Linear(input_shape, hidden_size[0]),
            nn.ReLU(),
            nn.BatchNorm1d(hidden_size[0]),
            nn.Dropout(self.dropout_rate),
            nn.Linear(hidden_size[0], hidden_size[1]),
            nn.ReLU(),)
        
            
    def forward(self, x):
        x = self.Gaussiannoise(x)
        encoding = self.encoder(x)
        return encoding

In [19]:
class Decoder(nn.Module):
    def __init__(self, output_shape, hidden_size=[64, 16], dropout_rate=0.2):
        super().__init__()
        self.dropout_rate = dropout_rate
        
        self.decoder = nn.Sequential(
            nn.BatchNorm1d(hidden_size[1]),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_size[1], hidden_size[0]),
            nn.ReLU(),
            nn.BatchNorm1d(hidden_size[0]),
            nn.Dropout(self.dropout_rate),
            nn.Linear(hidden_size[0], output_shape)
        )
            
            
    def forward(self, x):
        decoding = self.decoder(x)
        return decoding

In [20]:
class Encoder_Decoder(nn.Module):
     def __init__(self, input_shape, output_shape, hidden_size=[64, 16], dropout_rate=0):
        super().__init__()
        self.encoder = Encoder(input_shape, hidden_size, )
        self.decoder = Decoder(output_shape, hidden_size, )
    
     def forward(self, x):
        encoding = self.encoder(x)
        decoding = self.decoder(encoding)
        return decoding

In [21]:
def train_fn(model, optimizer, train_dataloader, loss_fn, device):
    model.train()
    Final_loss = 0
    for train_data in train_dataloader:
        optimizer.zero_grad()
        features = train_data["feature"].to(device)
        label = train_data["feature"].to(device)
        
        output = model(features)
        loss = loss_fn(output, label)
        loss.backward()
        optimizer.step()
        Final_loss += loss.item()
    Final_loss /= len(train_dataloader)
    return Final_loss

In [22]:
def inference_fn(model, valid_dataloader, device):
    model.eval()
    preds = []
    for valid_data in valid_dataloader:
        features = valid_data["feature"].to(device)
        with torch.no_grad():
            output = model(features)
        preds.append(output.detach().cpu().numpy())
    preds = np.concatenate(preds).reshape(-1, 130)
    return preds

In [23]:
TRAIN_MODE = True
EPOCHS = 50
DEVICE = "cuda:0"
BATCH_SIZE = 10240
LEARNING_RATE = 0.001
WEIGHT_DECAY = 0.0001
K = 1

if TRAIN_MODE:
    for i in range(K):
        #train_feature, train_label, valid_feature, valid_label =  Get_K_Fold_Data(K, i, DATA[fea_cols].values, DATA[lab_cols].values)
        #print(train_feature, train_label, valid_feature, valid_label)
        train_dataloader = dataloader(DATA[fea_cols].values, DATA[lab_cols].values, BATCH_SIZE=BATCH_SIZE, shuffle=True)
        #valid_dataloader = dataloader(valid_feature, valid_label, BATCH_SIZE=BATCH_SIZE, shuffle=False)
        torch.cuda.empty_cache()
        encoder_decoder = Encoder_Decoder(len(fea_cols), len(fea_cols)).to(DEVICE)
        opt = torch.optim.Adam(encoder_decoder.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
        loss_fn = nn.MSELoss()
        for epo in range(EPOCHS):
            train_loss = train_fn(encoder_decoder, opt, train_dataloader, loss_fn, DEVICE)
            #valid_pred = inference_fn(encoder_decoder, valid_dataloader, DEVICE)
            #valid_loss = float((loss_fn(torch.FloatTensor(valid_pred), torch.FloatTensor(valid_feature))).detach().cpu().numpy())
            #print(f"FOLD{i:2} EPOCH:{epo:3} train_loss={train_loss:.5f} valid_loss={valid_loss:.5f}")
            print(f"FOLD{i:2} EPOCH:{epo:3} train_loss={train_loss:.5f}")

FOLD 0 EPOCH:  0 train_loss=3.50765
FOLD 0 EPOCH:  1 train_loss=2.70289
FOLD 0 EPOCH:  2 train_loss=2.54063
FOLD 0 EPOCH:  3 train_loss=2.46370
FOLD 0 EPOCH:  4 train_loss=2.41318
FOLD 0 EPOCH:  5 train_loss=2.39544
FOLD 0 EPOCH:  6 train_loss=2.36164
FOLD 0 EPOCH:  7 train_loss=2.34239
FOLD 0 EPOCH:  8 train_loss=2.31756
FOLD 0 EPOCH:  9 train_loss=2.29953
FOLD 0 EPOCH: 10 train_loss=2.28404
FOLD 0 EPOCH: 11 train_loss=2.27806
FOLD 0 EPOCH: 12 train_loss=2.27266
FOLD 0 EPOCH: 13 train_loss=2.25307
FOLD 0 EPOCH: 14 train_loss=2.25179
FOLD 0 EPOCH: 15 train_loss=2.24575
FOLD 0 EPOCH: 16 train_loss=2.24151
FOLD 0 EPOCH: 17 train_loss=2.23947
FOLD 0 EPOCH: 18 train_loss=2.23573
FOLD 0 EPOCH: 19 train_loss=2.23563
FOLD 0 EPOCH: 20 train_loss=2.23033
FOLD 0 EPOCH: 21 train_loss=2.22364
FOLD 0 EPOCH: 22 train_loss=2.21957
FOLD 0 EPOCH: 23 train_loss=2.22002
FOLD 0 EPOCH: 24 train_loss=2.21650
FOLD 0 EPOCH: 25 train_loss=2.20013
FOLD 0 EPOCH: 26 train_loss=2.19691
FOLD 0 EPOCH: 27 train_loss=

In [24]:
encoder_decoder.eval()
encoder_decoder.to("cpu")
Encoder_Decoder_Save_Path = "../model_02_18/encoder_decoder_02_18.pth"
torch.save(encoder_decoder.state_dict(), Encoder_Decoder_Save_Path)


DEVICE = "cuda:0"
 
#encoder_decoder = Encoder_Decoder(len(fea_cols), len(fea_cols)).to(DEVICE)

#encoder_decoder.load_state_dict(torch.load(Encoder_Decoder_Save_Path)) 
encoder_decoder.to(DEVICE)
encoder_decoder.eval()

Encoder_Decoder(
  (encoder): Encoder(
    (Gaussiannoise): GaussianNoise()
    (encoder): Sequential(
      (0): BatchNorm1d(132, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (1): Dropout(p=0.5, inplace=False)
      (2): Linear(in_features=132, out_features=64, bias=True)
      (3): ReLU()
      (4): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): Dropout(p=0.5, inplace=False)
      (6): Linear(in_features=64, out_features=16, bias=True)
      (7): ReLU()
    )
  )
  (decoder): Decoder(
    (decoder): Sequential(
      (0): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (1): Dropout(p=0.2, inplace=False)
      (2): Linear(in_features=16, out_features=64, bias=True)
      (3): ReLU()
      (4): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): Dropout(p=0.2, inplace=False)
      (6): Linear(in_features=64, out_features=132, bias=True)
    )

## Resnet 

In [25]:
class ResnetBlock(nn.Module):
    def __init__(self, input_shape, hidden_size=[64, 32], noise=0.05, dropout_rate=0.5):
        super().__init__()
        self.block = nn.Sequential(
            nn.BatchNorm1d(input_shape),
            #GaussianNoise(noise),
            nn.Dropout(dropout_rate),
            nn.Linear(input_shape, hidden_size[0]),
            nn.ReLU(),
            nn.BatchNorm1d(hidden_size[0]),
            #GaussianNoise(noise),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_size[0], hidden_size[1]),
            nn.ReLU(),
        )
    def forward(self, x):
        x = self.block(x)
        return x

In [26]:
class Resnet(nn.Module):
    def __init__(self, hidden_size, input_shape, output_shape):
        super().__init__()
        self.encoder_decoder = Encoder_Decoder(input_shape[0], input_shape[0])
        self.block_1 = ResnetBlock(input_shape[1])
        self.block_2 = ResnetBlock(hidden_size[0])
        self.block_3 = ResnetBlock(hidden_size[1])
        self.dense =  nn.Linear(hidden_size[2], output_shape)
        
    def forward(self, raw_x, x):
        encoding = self.encoder_decoder.encoder(raw_x)
        x_1 = self.block_1(x)
        x_2 = torch.cat([x_1, encoding], axis=1)
        x_3 = self.block_2(x_2)
        x_4 = torch.cat([x_3, encoding], axis=1)
        x_5 = self.block_3(x_4)
        x_6 = self.dense(x_5)
        return x_6

In [27]:
def train_fn_(model, optimizer, train_dataloader, loss_fn, device, pca):
    model.train()
    Final_loss = 0
    for train_data in train_dataloader:
        optimizer.zero_grad()
        raw_features = train_data["feature"]
        label = train_data["label"].to(device)
        features = torch.FloatTensor(pca.transform(raw_features.numpy())).to(device)
        #print(features)
        raw_features = train_data["feature"].to(device)
        #print(raw_features)
        output = model(raw_features, features)
        loss = loss_fn(output, label)
        loss.backward()
        optimizer.step()
        Final_loss += loss.item()
    Final_loss /= len(train_dataloader)
    return Final_loss

In [28]:
def inference_fn_(model, valid_dataloader, device, pca):
    model.eval()
    preds = []
    for valid_data in valid_dataloader:
        raw_features = valid_data["feature"]
        label = valid_data["label"].to(device)
        features = torch.FloatTensor(pca.transform(raw_features.numpy())).to(device)
        raw_features = valid_data["feature"].to(device)
        with torch.no_grad():
            output = model(raw_features, features)
        preds.append(output.detach().cpu().numpy())
    preds = np.concatenate(preds).reshape(-1, 6)
    return preds

In [29]:
def Get_K_Fold_Data_(i, K, df: pd.DataFrame, fea_cols, lab_cols):
    assert K > 1, "K must be larger than 1."
    Fold_size = len(df) // K
    feature = df[fea_cols].values
    label = df[lab_cols].values
    train_feature, train_label = None, None
    for j in range(K):
        idx = slice(j * Fold_size, (j+1) * Fold_size)
        part_feature, part_label = feature[idx], label[idx]
        if j == i:
            valid_feature, valid_label = part_feature, part_label
            df_valid = df[idx]
        elif train_feature is None:
            train_feature, train_label = part_feature, part_label
        else:
            train_feature = np.concatenate((train_feature, part_feature), axis=0)
            train_label = np.concatenate((train_label, part_label), axis=0)
            
    return train_feature, train_label, valid_feature, valid_label, df_valid        

In [30]:
def utility_score_bincount(date, weight, resp, action):
    count_i = len(np.unique(date))
    Pi = np.bincount(date, weight * resp * action)
    t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / count_i)
    u = np.clip(t, 0, 6) * np.sum(Pi)
    return u

In [31]:
TRAIN_MODE = True
EPOCHS = 30
DEVICE = "cuda:0"
BATCH_SIZE = 10240
LEARNING_RATE = 0.001
WEIGHT_DECAY = 0.0001
K = 10


if TRAIN_MODE:
    for i in range(K):
        train_feature, train_label, valid_feature, valid_label, df_valid =  Get_K_Fold_Data_(i, K,  DATA, fea_cols, lab_cols)
        train_dataloader = dataloader(train_feature, train_label, BATCH_SIZE=BATCH_SIZE, shuffle=True)
        valid_dataloader = dataloader(valid_feature, valid_label, BATCH_SIZE=BATCH_SIZE, shuffle=False)
        torch.cuda.empty_cache()
        model = Resnet([48, 48, 32], [132, 131], 6)
        
        model.encoder_decoder.load_state_dict(encoder_decoder.state_dict())
        model.encoder_decoder.encoder.Gaussiannoise.on = False
        model.encoder_decoder.requires_grad_(False)

        model.to(DEVICE)
        
        opt = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
        loss_fn = nn.BCEWithLogitsLoss()
        
        for epo in range(EPOCHS):
            train_loss = train_fn_(model, opt, train_dataloader, loss_fn, DEVICE, PcA)
            valid_pred = inference_fn_(model, valid_dataloader, DEVICE, PcA)
            valid_loss = float((loss_fn(torch.FloatTensor(valid_pred), torch.FloatTensor(valid_label))).detach().cpu().numpy())
            valid_auc = roc_auc_score(valid_label, torch.sigmoid(torch.FloatTensor(valid_pred)).detach().cpu().numpy())
            #valid_pred = np.median(torch.sigmoid(torch.FloatTensor(valid_pred)).detach().cpu().numpy(), axis=1)
            valid_pred = torch.sigmoid(torch.FloatTensor(valid_pred)[:, 0]).detach().cpu().numpy()
            valid_pred = np.where(valid_pred >= 0.5, 1, 0).astype(int)
            valid_u_score = utility_score_bincount(date=df_valid.date.values, weight=df_valid.weight.values,
                                                   resp=df_valid.resp.values, action=valid_pred)
            
            cum_return = sum(df_valid.resp.values * valid_pred * df_valid.weight.values)
            print(f"FOLD{i:2} EPOCH:{epo:3} train_loss={train_loss:.5f} valid_loss={valid_loss:.5f} valid_auc={valid_auc:.5f} cum_return={cum_return:.5f} valid_uility_score={valid_u_score:.5f}")
        
        model.eval()
        model.to("cpu")
        Resnet_Save_Path = "../model_02_18/Resnet_02_18_new_feature_v{}.pth".format(i)
        torch.save(model.state_dict(), Resnet_Save_Path)

FOLD 0 EPOCH:  0 train_loss=0.69533 valid_loss=0.69235 valid_auc=0.51685 cum_return=9.14193 valid_uility_score=2.65453
FOLD 0 EPOCH:  1 train_loss=0.69291 valid_loss=0.69188 valid_auc=0.52449 cum_return=43.84440 valid_uility_score=79.71580
FOLD 0 EPOCH:  2 train_loss=0.69241 valid_loss=0.69118 valid_auc=0.53314 cum_return=80.22290 valid_uility_score=247.40573
FOLD 0 EPOCH:  3 train_loss=0.69187 valid_loss=0.69082 valid_auc=0.53709 cum_return=93.64360 valid_uility_score=366.30887
FOLD 0 EPOCH:  4 train_loss=0.69165 valid_loss=0.69068 valid_auc=0.53774 cum_return=98.91283 valid_uility_score=429.73428
FOLD 0 EPOCH:  5 train_loss=0.69146 valid_loss=0.69054 valid_auc=0.53832 cum_return=104.88398 valid_uility_score=459.11625
FOLD 0 EPOCH:  6 train_loss=0.69138 valid_loss=0.69049 valid_auc=0.53926 cum_return=98.36925 valid_uility_score=421.15095
FOLD 0 EPOCH:  7 train_loss=0.69129 valid_loss=0.69042 valid_auc=0.53975 cum_return=113.42351 valid_uility_score=472.53449
FOLD 0 EPOCH:  8 train_los

FOLD 2 EPOCH:  7 train_loss=0.69123 valid_loss=0.69084 valid_auc=0.53569 cum_return=116.01646 valid_uility_score=381.04689
FOLD 2 EPOCH:  8 train_loss=0.69118 valid_loss=0.69061 valid_auc=0.53657 cum_return=113.60272 valid_uility_score=327.69475
FOLD 2 EPOCH:  9 train_loss=0.69103 valid_loss=0.69061 valid_auc=0.53701 cum_return=128.12081 valid_uility_score=448.03970
FOLD 2 EPOCH: 10 train_loss=0.69100 valid_loss=0.69050 valid_auc=0.53762 cum_return=114.47786 valid_uility_score=315.88708
FOLD 2 EPOCH: 11 train_loss=0.69098 valid_loss=0.69055 valid_auc=0.53771 cum_return=101.82529 valid_uility_score=255.04370
FOLD 2 EPOCH: 12 train_loss=0.69096 valid_loss=0.69050 valid_auc=0.53838 cum_return=128.13740 valid_uility_score=444.79266
FOLD 2 EPOCH: 13 train_loss=0.69096 valid_loss=0.69048 valid_auc=0.53820 cum_return=146.37285 valid_uility_score=503.71633
FOLD 2 EPOCH: 14 train_loss=0.69098 valid_loss=0.69058 valid_auc=0.53823 cum_return=143.52169 valid_uility_score=501.42229
FOLD 2 EPOCH: 15

FOLD 4 EPOCH: 14 train_loss=0.69090 valid_loss=0.69069 valid_auc=0.53612 cum_return=12.90550 valid_uility_score=4.10051
FOLD 4 EPOCH: 15 train_loss=0.69094 valid_loss=0.69072 valid_auc=0.53525 cum_return=-23.53029 valid_uility_score=-0.00000
FOLD 4 EPOCH: 16 train_loss=0.69088 valid_loss=0.69050 valid_auc=0.53682 cum_return=-8.31815 valid_uility_score=-0.00000
FOLD 4 EPOCH: 17 train_loss=0.69088 valid_loss=0.69050 valid_auc=0.53618 cum_return=-2.03816 valid_uility_score=-0.00000
FOLD 4 EPOCH: 18 train_loss=0.69083 valid_loss=0.69052 valid_auc=0.53673 cum_return=-13.60046 valid_uility_score=-0.00000
FOLD 4 EPOCH: 19 train_loss=0.69086 valid_loss=0.69050 valid_auc=0.53635 cum_return=20.06804 valid_uility_score=9.82645
FOLD 4 EPOCH: 20 train_loss=0.69090 valid_loss=0.69064 valid_auc=0.53560 cum_return=-8.03175 valid_uility_score=-0.00000
FOLD 4 EPOCH: 21 train_loss=0.69080 valid_loss=0.69057 valid_auc=0.53712 cum_return=-4.11172 valid_uility_score=-0.00000
FOLD 4 EPOCH: 22 train_loss=0.69

FOLD 6 EPOCH: 21 train_loss=0.69086 valid_loss=0.69046 valid_auc=0.53640 cum_return=150.65311 valid_uility_score=866.28569
FOLD 6 EPOCH: 22 train_loss=0.69082 valid_loss=0.69042 valid_auc=0.53678 cum_return=158.28022 valid_uility_score=949.68130
FOLD 6 EPOCH: 23 train_loss=0.69082 valid_loss=0.69042 valid_auc=0.53843 cum_return=176.77404 valid_uility_score=1060.64423
FOLD 6 EPOCH: 24 train_loss=0.69089 valid_loss=0.69041 valid_auc=0.53701 cum_return=148.14188 valid_uility_score=825.40162
FOLD 6 EPOCH: 25 train_loss=0.69089 valid_loss=0.69050 valid_auc=0.53784 cum_return=160.51971 valid_uility_score=963.11824
FOLD 6 EPOCH: 26 train_loss=0.69085 valid_loss=0.69044 valid_auc=0.53733 cum_return=140.42309 valid_uility_score=842.53856
FOLD 6 EPOCH: 27 train_loss=0.69086 valid_loss=0.69041 valid_auc=0.53718 cum_return=174.77545 valid_uility_score=1048.65272
FOLD 6 EPOCH: 28 train_loss=0.69089 valid_loss=0.69041 valid_auc=0.53698 cum_return=157.01275 valid_uility_score=875.65947
FOLD 6 EPOCH: 

FOLD 8 EPOCH: 28 train_loss=0.69091 valid_loss=0.68987 valid_auc=0.54372 cum_return=142.50387 valid_uility_score=809.65577
FOLD 8 EPOCH: 29 train_loss=0.69089 valid_loss=0.68976 valid_auc=0.54364 cum_return=133.37518 valid_uility_score=715.91969
FOLD 9 EPOCH:  0 train_loss=0.69463 valid_loss=0.69221 valid_auc=0.51974 cum_return=161.69425 valid_uility_score=735.56642
FOLD 9 EPOCH:  1 train_loss=0.69281 valid_loss=0.69173 valid_auc=0.52590 cum_return=95.94807 valid_uility_score=411.64363
FOLD 9 EPOCH:  2 train_loss=0.69239 valid_loss=0.69092 valid_auc=0.53609 cum_return=149.57623 valid_uility_score=897.45738
FOLD 9 EPOCH:  3 train_loss=0.69189 valid_loss=0.69026 valid_auc=0.54229 cum_return=220.48709 valid_uility_score=1322.92252
FOLD 9 EPOCH:  4 train_loss=0.69165 valid_loss=0.69031 valid_auc=0.54332 cum_return=247.39677 valid_uility_score=1484.38061
FOLD 9 EPOCH:  5 train_loss=0.69155 valid_loss=0.69010 valid_auc=0.54504 cum_return=252.55104 valid_uility_score=1515.30625
FOLD 9 EPOCH: 