In [1]:
import pandas as pd
import numpy as np
from torch import nn
import torch.optim as optim
import json, copy, pickle, torch
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler

!set PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128

# Data Merge

In [2]:
info_raw = pd.read_csv("data/inverse/info.csv")
ccba_raw = pd.read_csv("data/inverse/ccba.csv")
cdtx_raw = pd.read_csv("data/inverse/cdtx.csv")
dp_raw = pd.read_csv("data/inverse/dp.csv")
remit_raw = pd.read_csv("data/inverse/remit.csv")

import seaborn as sns
import matplotlib.pyplot as plt

a = 1; b = 1
fig, ax = plt.subplots(a, b, figsize = (5, 4))
sns.countplot(x = "sar_flag", data = info_raw, ax = ax)

for p in ax.patches:
    if np.isnan(p.get_height()):
        continue
    ax.annotate(f'\n{int(p.get_height())}', (p.get_x()+0.325, p.get_height()+100), size=10)

In [3]:
info_raw["month"] = info_raw["date"].apply(lambda X: X[:7])
ccba_raw["month"] = ccba_raw["byymm"].apply(lambda X: X[:7])
info_raw = pd.merge(info_raw, ccba_raw, on = ["cust_id", "month"], how = "left")
info_raw = info_raw.drop(["month", "byymm"], axis = 1)
info_raw.head()

Unnamed: 0,alert_key,date,sar_flag,cust_id,risk_rank,occupation_code,total_asset,AGE,lupay,cycam,usgam,clamt,csamt,inamt,cucsm,cucah
0,171142,2021-04-01,0.0,a39fea9aec90969fe66a2b2b4d1b86368a2d38e8b8d4bf...,3,12.0,241719.0,3,12565.0,150744.0,82748.0,0.0,0.0,12477.0,12477.0,0.0
1,171152,2021-04-01,0.0,7e42b5dca9b28ee8e5545beb834361e90e6197d176b389...,3,13.0,599497.0,6,3581.0,324783.0,64363.0,0.0,0.0,0.0,4981.0,0.0
2,171177,2021-04-01,0.0,a6cdf6302aead77112013168c6d546d2df3bcb551956d2...,1,19.0,51160.0,4,,,,,,,,
3,171178,2021-04-01,0.0,1a3efa69705f611c7ef2384a715c8142e2ee801cfec9df...,3,9.0,3634343.0,6,829364.0,7666339.0,2343836.0,0.0,0.0,781279.0,781279.0,0.0
4,171180,2021-04-01,0.0,67f8cbb64dd3d447e992b1b299e0ceed3372188e47c88e...,1,17.0,4076287.0,4,636.0,256134.0,3538.0,0.0,0.0,0.0,3410.0,0.0


In [29]:
# dp以小時為單位，合併時需以當日最後的時間往前推
info_raw["date"] = pd.to_datetime(info_raw["date"] + " 23")

In [30]:
data_raw = [[cdtx_raw, 0, "date"], [dp_raw, 1, "tx_date"], [remit_raw, 2, "trans_date"], [info_raw, 3, "date"]]

In [31]:
def cyclical_feat_encode(df):
    df["date"] = pd.to_datetime(df["date"])

    df['month'] = df["date"].dt.month
    df['day'] = df["date"].dt.day

    df['month_sin'] = np.sin(2 * np.pi *  df['month']/ df["month"].max())
    df['month_cos'] = np.cos(2 * np.pi * df['month'] / df["month"].max())

    df['day_sin'] = np.sin(2 * np.pi * df['day'] / df["day"].max())
    df['day_cos'] = np.cos(2 * np.pi * df['day'] / df["day"].max())

    df = df.drop(["month", "day"], axis = 1)
    
    return df

In [32]:
with open("feats_type.json", newline='') as file:
    feats_type = json.load(file)

In [33]:
def process_catgorical(df, col):
    df[col].fillna('NULL', inplace=True)
    map_dict = {v:i for i, v in enumerate(set(df[col].unique()))} #set可排序，NULL放最後
    df[col] = df[col].map(map_dict)
    return df

In [34]:
cust_id = {k: i for i, k in enumerate(info_raw["cust_id"].unique())}
scaler = MinMaxScaler()

for i in range(len(data_raw)): # 依序處理各來源資料
    df = data_raw[i][0].copy()

    df = df.rename(columns = {data_raw[i][2]: "date"}) # 統一日期名稱
    df = df.drop(df[df["date"].isnull()].index)
    df["date"] = pd.to_datetime(df["date"]) #日期type轉換

    df = cyclical_feat_encode(df) # 時間特徵生成
    
    source = data_raw[i][1]
    df["source"] = source # 資料源

    df["cust_id"] = df["cust_id"].map(cust_id) # label encoding
    
    # 缺失值處理
    for col in df.columns:
        if col in ["source"]:
            continue

        if (feats_type[str(source)][col] == "category"):
            df = process_catgorical(df, col)
        elif (feats_type[str(source)][col] in ["int", "float"]):
            df[col].fillna(0, inplace=True)
            df[col] = scaler.fit_transform(df[col].to_numpy().reshape(-1, 1))
    
    data_raw[i][0] = df

In [35]:
# 各類別的種類數
category_num = {}
for i in range(4):
    for col, v in data_raw[i][0].items():
        if col == "source":
            continue
        if (feats_type[str(i)][col] == "category"):
            category_num[col] = v.nunique()
category_num

{'cust_id': 7708,
 'country': 128,
 'cur_type': 51,
 'debit_credit': 2,
 'tx_type': 3,
 'info_asset_code': 22,
 'fiscTxId': 30,
 'txbranch': 350,
 'cross_bank': 2,
 'ATM': 2,
 'trans_no': 5,
 'sar_flag': 3,
 'risk_rank': 4,
 'occupation_code': 22,
 'AGE': 11}

In [36]:
with open('category_num.json', 'w') as file:
    json.dump(category_num, file)

In [37]:
# 分群
data_g = copy.deepcopy(data_raw)
for df_g in data_g:
    df_g[0] = df_g[0].groupby("cust_id")

In [38]:
max_len = 256

In [39]:
# 每個id每個info資料所涵蓋的區間之所有資料
data = {i:{} for i in cust_id.values()}

for id_ in cust_id.values():
    # 抓出id的所有資料
    cust_data1 = []
    for df_g, name, date_col in data_g:
        if id_ in df_g.groups:
            df1 = df_g.get_group(id_)
            cust_data1.extend(df1.to_dict('records'))

    # 依日期、資料源排序
    cust_data1 = sorted(cust_data1, key = lambda X: (X["date"].timestamp(), X["source"]))
    cust_data1 = np.array(cust_data1)

    #抓出各區間的資料
    cust_data2 = {}
    idx = 0
    for i, s in enumerate(cust_data1):
        if s["source"] == 3:
            cust_data2[idx] = {}

            if cust_data1[i]["sar_flag"] != 2: # 2為missing value
                cust_data2[idx]["sar"] = cust_data1[i]["sar_flag"]
                cust_data2[idx]["data_type"] = "train"
            else:
                cust_data2[idx]["data_type"] = "test"

            end = i + 1
            start = max(0, end - max_len)

            cust_data2[idx]["data"] = cust_data1[start:end]
            idx += 1

    data[id_] = cust_data2

pickle.dump(data, open('data/inverse/cust_id2.pkl', 'wb'))

# Data Process for Embedding

In [2]:
data = pickle.load(open('data/inverse/cust_id2.pkl', 'rb'))

In [3]:
max_len = 256

In [4]:
def data_process(data, mode = "train"):
    data_X = []
    data_Y = []
    for k1, v1 in data.items():
        for k2, v2 in v1.items():
            # 判斷要抓的是train data還是test data
            if mode == "train":
                if v2["data_type"] == "test":
                    continue
            else:
                if v2["data_type"] == "train":
                    continue
            
            idx = [[] for _ in range(4)]
            data2 = [[] for _ in range(4)]

            max_idx = len(v2["data"]) - 1

            for i, trade in enumerate(v2["data"]):
                source = trade["source"]
                trade1 = {k: v for k, v in trade.items() if k not in ["date", "source", "alert_key"]} #刪除日期跟資料來源

                if i == max_idx:
                    data_Y.append(trade["sar_flag"])
                    trade1["sar_flag"] = 2
                

                idx[source].append(i)
                data2[source].append(list(trade1.values()))

            data_X.append([idx, data2, trade["alert_key"]])


    # train - X shape = (總樣本數(23906), 該樣本下的交易index、內容和alert_key(3), 資料源(4), 數據)
    print(f"Mode: {mode}, Total sample: {len(data_X)}")

    return data_X, data_Y

In [5]:
X_train, y_train = data_process(data, mode = "train")
X_test, y_test = data_process(data, mode = "test")

Mode: train, Total sample: 23906
Mode: test, Total sample: 1845


In [6]:
sar1 = sum(y_train)
sar0 = len(y_train) - sum(y_train)
total = len(y_train)

print(f"train - 0: {sar1}, 1: {sar0}, total: {total}, 0/1: {round(sar0/sar1)}")

train - 0: 234, 1: 23672, total: 23906, 0/1: 101


In [7]:
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size = 0.2, shuffle = True, stratify = y_train, random_state = 99)
print(f"X_train: {len(X_train)}, X_valid: {len(X_valid)}, X_test: {len(X_test)}\ny_train: {len(y_train)}, y_valid: {len(y_valid)}, y_test: {len(y_test)}")

X_train: 19124, X_valid: 4782, X_test: 1845
y_train: 19124, y_valid: 4782, y_test: 1845


In [8]:
print(f"train - 0: {len(y_train) - sum(y_train)}, 1: {sum(y_train)}, total: {len(y_train)}")
print(f"valid - 0: {len(y_valid) - sum(y_valid)}, 1: {sum(y_valid)}, total: {len(y_valid)}")

train - 0: 18937, 1: 187, total: 19124
valid - 0: 4735, 1: 47, total: 4782


In [9]:
import gc

del data
gc.collect()

32

In [10]:
from torch.utils.data import DataLoader, Dataset
class Dataset_transform(Dataset):
    def __init__(self, X, y):
        self.n_samples = len(y)
        self.X = X
        self.y = torch.tensor(y).float().reshape(-1, 1)
                                            
    def __len__(self):
        return self.n_samples

    def __getitem__(self, idx):
        seq_idx = self.X[idx][0]
        x1 = self.X[idx][1]
        alert_key = self.X[idx][2]
        y1 = self.y[idx]


        return [torch.tensor(s).long() for s in seq_idx], [torch.tensor(x2) for x2 in x1], y1, alert_key

In [11]:
train_dataset = Dataset_transform(X_train, y_train)
valid_dataset = Dataset_transform(X_valid, y_valid)
test_dataset = Dataset_transform(X_test, y_test)
# seq_idx, x1, y1, alert_key = train_dataset.__getitem__(2)

In [12]:
def BatchCollate(data):
    batch_idxs = [torch.tensor([], dtype = torch.long) for i in range(4)]
    seq_idxs = [torch.tensor([], dtype = torch.long) for i in range(4)]
    xs = [torch.tensor([]) for i in range(4)]
    targets = torch.tensor([])
    alert_keys = []

    for batch, d in enumerate(data):
        for i in range(4):
            seq_idxs[i] = torch.cat((seq_idxs[i], d[0][i]))
            xs[i] = torch.cat((xs[i], d[1][i]))
            
            batch1 = torch.tensor([batch] * len(d[0][i])).long()
            batch_idxs[i] = torch.cat((batch_idxs[i], batch1))

        targets = torch.cat((targets, d[2]))
        alert_keys.append(d[3])


    return [batch_idxs, seq_idxs, xs], targets.reshape(-1, 1), alert_keys

In [13]:
batch_size = 64
train_dataloader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True, collate_fn = BatchCollate)
valid_dataloader = DataLoader(valid_dataset, batch_size = len(valid_dataset), collate_fn = BatchCollate)
test_dataloader = DataLoader(test_dataset, batch_size = len(test_dataset), collate_fn = BatchCollate)

In [14]:
with open("feats_type.json", newline='') as file:
    feats_type = json.load(file)

with open("category_num.json", newline='') as file:
    category_num = json.load(file)

In [15]:
import torch.nn.functional as F

class Encoder(nn.Module):
    """
    src: https://github.com/baosenguo/Kaggle-MoA-2nd-Place-Solution/blob/main/training/1d-cnn-train.ipynb
    """
    def __init__(self, num_features, embed_output=128, hidden_size=512, dropout=0.3):
        super().__init__()
        cha_1 = 64
        cha_2 = 128
        cha_3 = 128

        cha_1_reshape = int(hidden_size/cha_1)
        cha_po_1 = int(hidden_size/cha_1/2)
        cha_po_2 = int(hidden_size/cha_1/2/2) * cha_3

        self.cha_1 = cha_1
        self.cha_2 = cha_2
        self.cha_3 = cha_3
        self.cha_1_reshape = cha_1_reshape
        self.cha_po_1 = cha_po_1
        self.cha_po_2 = cha_po_2

        self.batch_norm1 = nn.BatchNorm1d(num_features)
        self.dropout1 = nn.Dropout(dropout)
        self.dense1 = nn.utils.weight_norm(nn.Linear(num_features, hidden_size))

        self.batch_norm_c1 = nn.BatchNorm1d(cha_1)
        self.dropout_c1 = nn.Dropout(dropout*0.9)
        self.conv1 = nn.utils.weight_norm(nn.Conv1d(cha_1,cha_2, kernel_size = 5, stride = 1, padding=2,  bias=False),dim=None)

        self.ave_po_c1 = nn.AdaptiveAvgPool1d(output_size = cha_po_1)

        self.batch_norm_c2 = nn.BatchNorm1d(cha_2)
        self.dropout_c2 = nn.Dropout(dropout*0.8)
        self.conv2 = nn.utils.weight_norm(nn.Conv1d(cha_2,cha_2, kernel_size = 3, stride = 1, padding=1, bias=True),dim=None)

        self.batch_norm_c2_1 = nn.BatchNorm1d(cha_2)
        self.dropout_c2_1 = nn.Dropout(dropout*0.6)
        self.conv2_1 = nn.utils.weight_norm(nn.Conv1d(cha_2,cha_2, kernel_size = 3, stride = 1, padding=1, bias=True),dim=None)

        self.batch_norm_c2_2 = nn.BatchNorm1d(cha_2)
        self.dropout_c2_2 = nn.Dropout(dropout*0.5)
        self.conv2_2 = nn.utils.weight_norm(nn.Conv1d(cha_2,cha_3, kernel_size = 5, stride = 1, padding=2, bias=True),dim=None)

        self.max_po_c2 = nn.MaxPool1d(kernel_size=4, stride=2, padding=1)

        self.flt = nn.Flatten()
        
        self.batch_norm3 = nn.BatchNorm1d(cha_po_2)
        self.dropout3 = nn.Dropout(dropout)
        self.dense3 = nn.utils.weight_norm(nn.Linear(cha_po_2, embed_output))

    def forward(self, x):

        x = self.batch_norm1(x)
        x = self.dropout1(x)
        x = F.celu(self.dense1(x), alpha=0.06)

        x = x.reshape(x.shape[0],self.cha_1,
                        self.cha_1_reshape)

        x = self.batch_norm_c1(x)
        x = self.dropout_c1(x)
        x = F.relu(self.conv1(x))

        x = self.ave_po_c1(x)

        x = self.batch_norm_c2(x)
        x = self.dropout_c2(x)
        x = F.relu(self.conv2(x))
        x_s = x

        x = self.batch_norm_c2_1(x)
        x = self.dropout_c2_1(x)
        x = F.relu(self.conv2_1(x))

        x = self.batch_norm_c2_2(x)
        x = self.dropout_c2_2(x)
        x = F.relu(self.conv2_2(x))
        x =  x * x_s

        x = self.max_po_c2(x)

        x = self.flt(x)

        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = self.dense3(x)

        return x

In [16]:
from traceback import format_exc

In [17]:
class FeatureEmbedder(torch.nn.Module):
    def __init__(self, feat_type, category_num, embed_dim = 4, embed_output = 32, hidden_size = 256, dropout = 0.3):
        super().__init__()

        feat_type = {k: v for k, v in feat_type.items() if v in ["category", "int", "float"]}

        layers = []
        for k, v in feat_type.items():
            if v == "category":
                layers.append(nn.Embedding(category_num[k], embed_dim))
            else:
                layers.append(nn.Linear(1, embed_dim))
        self.embeddings = torch.nn.ModuleList(layers)

        self.encoder = Encoder(
            num_features = len(feat_type) * embed_dim, 
            embed_output = embed_output, 
            hidden_size = hidden_size,
            dropout = dropout
        )

        self.feat_type = feat_type
    
    def forward(self, x):
        # 透過embedding把每個feature的數據轉成(embed_dim)維的向量
        embs = []
        
        for i, (type_, emb_layer) in enumerate(zip(self.feat_type.values(), self.embeddings)):
            if type_ == "category": # 類別變數
                x1 = emb_layer(x[:, i].long())
            else: # 連續變數
                x1 = emb_layer(x[:, i].reshape(-1, 1))
            
            embs.append(x1)
        
        
        embs = torch.cat(embs, dim=1)# 把所有feature的向量合併
        embs = self.encoder(embs) # 透過encoder轉換，統一輸出維度(embed_output)
            
        return embs


In [18]:
embed_dim = 4
embed_output = 32
embed_hidden_size = 256
embed_dropout = 0.3

In [19]:
# 每個資料源建一個embedder
layers = []
for k, v in feats_type.items():
    embedder = FeatureEmbedder(v, category_num, embed_dim, embed_output, embed_hidden_size, embed_dropout)
    layers.append(embedder)
embedders = torch.nn.ModuleList(layers)

class Model(nn.Module):
    def __init__(self, embedders, input_size, max_len, hidden_size):
        super().__init__()
        self.embedders = embedders
        
        self.max_len = max_len
        self.input_size = input_size
        
        self.rnn = nn.LSTM(input_size, hidden_size)
        self.norm = nn.BatchNorm1d(hidden_size)
        self.sigmoid = nn.Sigmoid()
        self.linear = nn.Linear(hidden_size, 1)
    
    def forward(self, x, batch):
        with torch.no_grad():
            # 透過embedder統一各資料源的feature數量
            for s in range(4):
                if len(x[2][s]) == 1:
                    x[2][s] = torch.zeros(1, self.input_size)# 只有1個sample，沒辦法做batch_norm1，encoder會出錯，直接給0
                else:
                    x[2][s] = self.embedders[s](x[2][s])
            

            x1 = torch.zeros(batch, self.max_len, self.input_size) # shape: (batch, max_len, features)
            
            # 合併各資料源的資料
            for s in range(4):
                for i in range(len(x[0][s])):
                    batch_idx, seq_idx, features = x[0][s][i], x[1][s][i], x[2][s][i]
                    x1[batch_idx][seq_idx] = features


        h0 = torch.randn(1, self.max_len, 16)
        c0 = torch.randn(1, self.max_len, 16)


        out, (hn, cn) = self.rnn(x1, (h0, c0))
        out = self.norm(out[:, -1, :])
        out = self.sigmoid(out)
        out = self.linear(out)
        out = self.sigmoid(out)

        return out

In [20]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [21]:
from temporal_aggregator import *

class Model(nn.Module):
    def __init__(self, embedders, input_size, max_len, hidden_size, temporal_aggregator_type="TemporalDebertaAggregator", 
                temporal_aggregator_args={
                    "hidden_size": 32,
                    "num_layers": 3,
                    "dropout": 0.3,
                    "max_len": 512
                }
                ):
        super().__init__()
        self.embedders = embedders
        
        self.max_len = max_len
        self.input_size = input_size

        self.temporal_aggregator = eval(
            f"{temporal_aggregator_type}")(**temporal_aggregator_args)
        self.classifier = nn.Sequential(
            nn.Linear(temporal_aggregator_args["hidden_size"], 1),
            nn.Sigmoid()
        )
    
    def forward(self, x, batch):
        with torch.no_grad():
            # 透過embedder統一各資料源的feature數量
            for s in range(4):
                if len(x[2][s]) == 0:
                    continue
                elif len(x[2][s]) == 1:
                    x[2][s] = torch.zeros(1, self.input_size)# 只有1個sample，沒辦法做batch_norm1，encoder會出錯，直接給0
                else:
                    x[2][s] = self.embedders[s](x[2][s].to(device))
            

            x1 = torch.zeros(batch, self.max_len, self.input_size).to(device) # shape: (batch, max_len, features)
            mask = torch.zeros((batch, self.max_len)).long().to(device)
            
            # 合併各資料源的資料
            for s in range(4):
                for i in range(len(x[0][s])):
                    batch_idx, seq_idx, features = x[0][s][i], x[1][s][i], x[2][s][i]
                    x1[batch_idx][seq_idx] = features
                    mask[batch_idx, seq_idx] = 1


        out = self.temporal_aggregator(x1, mask)
        out = self.classifier(out).squeeze(-1)

        return out.reshape(-1, 1)

In [22]:
hidden_size = 16

In [23]:
epochs = 100
model = Model(embedders, embed_output, max_len, hidden_size).to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr = 1e-3)

In [24]:
def recall_n(output, target):
    comb = list(zip(output, target))
    comb.sort(key=lambda x:x[0])
    flag = False
    for i, (out, gt) in enumerate(comb):
        if gt == 1:
            if flag:
                break
            flag = True

    recall = ((sum(target)-1) / (len(target)-i))
    
    return recall.item()

In [25]:
def loss_weight(labels):
    weight = []
    for label in labels:
        if label == 1:
            weight.append(75)
        elif label == 0:
            weight.append(1)
            
    return torch.tensor(weight).reshape(-1, 1)

In [26]:
# validate、test預測後的損失函數，以及相關分數
def eval_score(dataloader, model, criterion, mode = "eval"):
    with torch.no_grad():
        losses = 0
        pred1, y1 = torch.Tensor([]).to(device), torch.Tensor([]).to(device)
        for batch, batch_data in enumerate(dataloader):
            X, y, alert_key = batch_data
            y = y.to(device)

            pred = model(X, len(y)) #預測

            if mode == "train":
                weight = loss_weight(y)
                criterion = nn.BCELoss(weight = weight).to(device)
                loss = criterion(pred, y) #計算損失函數
                losses += loss.item()

            pred1 = torch.concat([pred1, pred])
            y1 = torch.concat([y1, y])


        if mode == "train":
            losses /= (batch + 1)
            
            recall = recall_n(pred1, y1)

            return losses, recall
            
    return pred1, alert_key

In [27]:
#初始化權重，使其符合常態分布
for m in model.modules():
    if isinstance(m, (nn.Linear)):
        nn.init.kaiming_normal_(m.weight)
        nn.init.constant_(m.bias, 0)
    elif isinstance(m, nn.BatchNorm1d):
        nn.init.constant_(m.weight, 1)
        nn.init.constant_(m.bias, 0)
    elif isinstance(m, nn.LSTM):
        for name, param in m.named_parameters():
            if name.startswith("weight"):
                nn.init.kaiming_normal_(param)
            else:
                nn.init.constant_(param, 0)

In [28]:
# 建模
best_loss = np.inf
paitence = 5
remain_patience = 0
train_losses = []
valid_losses = []
for epoch in range(epochs):
    train_loss = 0
    valid_loss = 0
    # train model
    model.train() # 模型為訓練模式
    
    with tqdm(total = len(train_dataloader), ncols = 200) as _tqdm: # 使用需要的參數對tqdm進行初始化
        _tqdm.set_description('epoch: {}/{}'.format(epoch + 1, epochs))# 設置前綴 一般為epoch的信息
        
        for batch, batch_data in enumerate(train_dataloader):
            X_train1, y_train1, alert_key = batch_data
            y_train1 = y_train1.to(device)

            train_pred = model(X_train1, len(y_train1)) #預測

            if train_pred == "error":
                break

            weight = loss_weight(y_train1)
            criterion = nn.BCELoss(weight = weight).to(device)
            loss = criterion(train_pred, y_train1) #計算損失函數
            

            _tqdm.set_postfix({"train_loss" : loss.item()})
            _tqdm.update(1)
            
            optimizer.zero_grad() # 梯度在反向傳播前先清零
            loss.backward() # 反向傳播，計算權重對損失函數的梯度
            optimizer.step()  # 根據梯度更新權重
            train_loss += loss.item()

        if train_pred == "error":
            break
            
        train_loss /= (batch + 1)
        train_losses.append(train_loss)

        # validate model
        model.eval()# 模型為評估模式
        valid_loss, valid_recall = eval_score(valid_dataloader, model, criterion, mode = "train")
        valid_losses.append(valid_loss)

       

        # 損失函數連續30個epoches都沒下降的話就終止訓練
        if valid_loss < best_loss:
            best_loss = valid_loss
            remain_patience = paitence
            _tqdm.set_postfix({"train_loss" : train_loss, "valid_loss": valid_loss, "best_loss": best_loss, "valid_recall": valid_recall})# 設置想在本次循環監視變量，可作後綴打印出來
        else:
            _tqdm.set_postfix({"train_loss" : train_loss, "valid_loss": valid_loss, "best_loss": best_loss, "valid_recall": valid_recall})# 設置想在本次循環監視變量，可作後綴打印出來
            remain_patience -= 1
            if remain_patience == 0:
                print('early stop!')
                break
        
print("Done!")

epoch: 1/100:  23%|██████████████████████████████▍                                                                                                   | 70/299 [00:51<02:48,  1.36it/s, train_loss=0.583]


KeyboardInterrupt: 

In [None]:
torch.save(model.state_dict(), "model/20221201.pt") # 儲存權重

In [None]:
pred, alert_key = eval_score(test_dataloader, model, criterion, mode = "eval")

In [None]:
result = pd.DataFrame(zip(alert_key, pred.reshape(-1).tolist()), columns = ["alert_key", "probability"])
result = result.sort_values("probability", ascending = False)

In [None]:
submit = pd.read_csv("data\\submit\\預測的案件名單及提交檔案範例.csv")
submit = submit[["alert_key"]]
submit = pd.merge(submit, result, on = "alert_key", how = "left")
# submit["probability"] = submit["probability"].astype(float)
submit = submit.fillna(1e-6)
submit = submit.sort_values("probability", ascending = False)
submit

In [None]:
submit.to_csv("data/submit/1202_1.csv", index = False)