## Import

In [6]:
import random
import pandas as pd
import numpy as np
import os
import cv2
from tqdm.auto import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import torchvision
from torchvision import transforms as T
import albumentations

import albumentations as A
from albumentations.pytorch import transforms

import torchvision.models as models

from sklearn import metrics
from sklearn.preprocessing import StandardScaler, LabelEncoder

from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings(action='ignore') 

In [7]:
from albumentations.pytorch.transforms import ToTensorV2

In [8]:
device = torch.device('mps') if torch.backends.mps.is_available() else torch.device('cpu')

In [9]:
torch.backends.mps.is_available()

True

## Hyperparameter Setting

In [10]:
CFG = {
    'IMG_SIZE':800,
    'IMG_SIZE_D':800,
    'EPOCHS':40,
    'LEARNING_RATE':1e-4,
    'BATCH_SIZE':10,
    'SEED':41
}

## Fixed RandomSeed

In [11]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    #torch.cuda.manual_seed(seed)
    torch.backends.mps.deterministic = True
    torch.backends.mps.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

## Data Pre-processing
#### 1. Load Dataframe
#### 2. 결측치 보완
#### 3. Train / Validation Split
#### 4. Numeric Feature Scaling / Categorical Featrue Label-Encoding

In [12]:
train_df = pd.read_csv('/Users/krc/Documents/breast_dacon/open/train.csv')
test_df = pd.read_csv('/Users/krc/Documents/breast_dacon/open/test.csv')

In [13]:
train_df.rename(columns = {'나이':'age',
                           '수술연월일':'yr_mn_dt',
                           '진단명':'diag_cat',
                           '암의 위치':'location',
                           '암의 개수':'sng_mul',
                           '암의 장경':'size_mm',
                           'DCIS_or_LCIS_여부':'DCIS_or_LCIS'},inplace=True)

test_df.rename(columns = {'나이':'age',
                           '수술연월일':'yr_mn_dt',
                           '진단명':'diag_cat',
                           '암의 위치':'location',
                           '암의 개수':'sng_mul',
                           '암의 장경':'size_mm',
                           'DCIS_or_LCIS_여부':'DCIS_or_LCIS'},inplace=True)

In [17]:
train_df, val_df, train_labels, val_labels = train_test_split(
                                                    train_df.drop(columns=['N_category']), 
                                                    train_df['N_category'], 
                                                    test_size=0.1,  # 0.2 
                                                    random_state=CFG['SEED']
                                                )

In [18]:
def get_values(value):
    return value.values.reshape(-1, 1)

numeric_cols = ['age', 'size_mm', 'ER_Allred_score', 'PR_Allred_score', 'KI-67_LI_percent', 'HER2_SISH_ratio']
ignore_cols = ['ID', 'img_path', 'mask_path', 'yr_mn_dt', 'N_category']

for col in train_df.columns:
    if col in ignore_cols:
        continue
    if col in numeric_cols:
        scaler = StandardScaler()
        train_df[col] = scaler.fit_transform(get_values(train_df[col]))
        val_df[col] = scaler.transform(get_values(val_df[col]))
        test_df[col] = scaler.transform(get_values(test_df[col]))
    else:
        le = LabelEncoder()
        train_df[col] = le.fit_transform(get_values(train_df[col]))
        val_df[col] = le.transform(get_values(val_df[col]))
        test_df[col] = le.transform(get_values(test_df[col]))

## CustomDataset

In [19]:
os.getcwd()

'/Users/krc/Documents/breast_dacon/dacon_bc_prediction'

In [20]:
base = '/Users/krc/Documents/breast_dacon/open'

In [21]:
base + train_df['img_path'].iloc[0][1:]

'/Users/krc/Documents/breast_dacon/open/train_imgs/BC_01_2853.png'

In [22]:
class CustomDataset(Dataset):
    def __init__(self, medical_df, labels, transforms=None):
        self.medical_df = medical_df
        self.transforms = transforms
        self.labels = labels
        
    def __getitem__(self, index):
        img_path = base + self.medical_df['img_path'].iloc[index][1:]
        # print(img_path)
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.transforms is not None:
            image = self.transforms(image=image)['image']
                
        if self.labels is not None:
            label = self.labels[index]
            return image,  label #tabular = 2nd
        else:
            return image #, tabular
          
    def __len__(self):
        return len(self.medical_df)

In [23]:
train_transforms = A.Compose([
                            A.HorizontalFlip(),
                            A.VerticalFlip(),
                            A.Rotate(limit=180, border_mode=cv2.BORDER_CONSTANT,p=0.3),
                            A.GaussNoise(p=0.5), #NOISE ADD 
                            A.Resize(CFG['IMG_SIZE_D'],CFG['IMG_SIZE']),
                            #밝기/대비 조정
                            A.augmentations.transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, always_apply=False, p=0.5),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            A.pytorch.transforms.ToTensorV2()
                            ])

test_transforms = A.Compose([
                            A.Resize(CFG['IMG_SIZE_D'],CFG['IMG_SIZE']),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),

                            A.pytorch.transforms.ToTensorV2()
                            ])

In [24]:
train_dataset = CustomDataset(train_df, train_labels.values, train_transforms)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val_df, val_labels.values, test_transforms)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

## Model Architecture

In [26]:
class ImgFeatureExtractor(nn.Module):
    def __init__(self):
        super(ImgFeatureExtractor, self).__init__()
        self.backbone = models.resnet50(weights=True)
#         for i, param in enumerate(self.backbone.parameters()):
#               if i >139:
#                 param.requires_grad= True
#        self.embedding = nn.Linear(1000,512)
        
    def forward(self, x):
        x = self.backbone(x)

#        x = self.embedding(x)
        return x
    
    def freeze(self):
        # To freeze the residual layers
        for param in self.backbone.parameters():
            param.require_grad = False
        for param in self.backbone.fc.parameters():
            param.require_grad = True
    
    def unfreeze(self):
        # Unfreeze all layers
        for param in self.backbone.parameters():
            param.require_grad = True

In [27]:
class ClassificationModel(nn.Module):
    def __init__(self):
        super(ClassificationModel, self).__init__()
        self.img_feature_extractor = ImgFeatureExtractor()
        self.classifier = nn.Sequential(
            nn.Linear(in_features=1000, out_features=1024),
            nn.BatchNorm1d(1024),
            nn.LeakyReLU(),
            
            nn.Linear(in_features=1024, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(),
            
            nn.Linear(in_features=512, out_features=256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(),

            nn.Linear(in_features=256, out_features=128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),

            nn.Linear(in_features=128, out_features=64),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(),
            
            nn.Linear(in_features=64, out_features=1),
            nn.Sigmoid(),
        )
        
    def forward(self, img): 
        img_feature = self.img_feature_extractor(img)
        output = self.classifier(img_feature) 
        return output

## Train

In [28]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.BCEWithLogitsLoss().to(device)
    
    best_score = 0
    best_epcoh = 0 
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        
        for img, label in tqdm(iter(train_loader)): 
            img = img.float().to(device)
            label = label.float().to(device)
            
            optimizer.zero_grad()
            
            model_pred = model(img)
            print(model_pred)
            
            loss = criterion(model_pred, label.reshape(-1,1))
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
        
        val_loss, val_score = validation(model, criterion, val_loader, device)
        print(f'Epoch [{epoch}], Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}] Val Score : [{val_score:.5f}]')
        
        if scheduler is not None: #epoch 마다 업데이트
            scheduler.step(val_loss) #val_loss
        
        if best_score < val_score:
            best_epoch = epoch
            best_score = val_score
            best_model = model
            torch.save(best_model.state_dict(), f'./{epoch}_model_{best_score}.pth')
    
    return best_model

In [29]:
def validation(model, criterion, val_loader, device):
    model.eval()
    pred_labels = []
    true_labels = []
    val_loss = []
    threshold = 0.5
    with torch.no_grad():
        for img, label in tqdm(iter(val_loader)): 
            true_labels += label.tolist()
            
            img = img.float().to(device)
            label = label.float().to(device)
            
            model_pred = model(img)
            
            loss = criterion(model_pred, label.reshape(-1,1))
            
            val_loss.append(loss.item())
            
            model_pred = model_pred.squeeze(1).to('cpu')  
            pred_labels += model_pred.tolist()
    
    pred_labels = np.where(np.array(pred_labels) > threshold, 1, 0)
    val_score = metrics.f1_score(y_true=true_labels, y_pred=pred_labels, average='macro')
    return np.mean(val_loss), val_score

## WANDB setting

In [32]:
import wandb

wandb.init()

[34m[1mwandb[0m: Currently logged in as: [33mrumj[0m. Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01668932776665315, max=1.0)…

In [33]:
api = wandb.Api()

run = api.run("rumj/dacon_bc_prediction/78r04zfs")
run.config["size"] = 800
run.config["epochs"] = 30
run.config["initial_lr"] = 1e-4
run.config["batch_size"] = 5
run.config["scheduler"] = "CosineAnnealingLR"
history = run.scan_history(keys=["Loss"])
losses = [row["Loss"] for row in history]

run.update()

## Run!!

In [34]:
model = nn.DataParallel(ClassificationModel())
wandb.watch(model)
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer = optimizer, T_max = CFG['EPOCHS'], verbose = True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

Adjusting learning rate of group 0 to 1.0000e-04.


  0%|          | 0/90 [00:00<?, ?it/s]

tensor([[0.4558],
        [0.3228],
        [0.4044],
        [0.4685],
        [0.3751],
        [0.4456],
        [0.5401],
        [0.3749],
        [0.2760],
        [0.3385]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.2932],
        [0.2942],
        [0.3464],
        [0.4634],
        [0.4953],
        [0.5664],
        [0.3652],
        [0.3007],
        [0.4683],
        [0.3780]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.4232],
        [0.4928],
        [0.3779],
        [0.5050],
        [0.4086],
        [0.3158],
        [0.3119],
        [0.4581],
        [0.2770],
        [0.3905]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.2749],
        [0.4679],
        [0.4495],
        [0.3032],
        [0.4989],
        [0.3490],
        [0.4566],
        [0.4627],
        [0.3418],
        [0.3902]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.3944],
        [0.4589],
        [0.5450],
        [0.3853],
        [0.3554],
        [0

tensor([[0.3976],
        [0.3918],
        [0.5206],
        [0.2836],
        [0.5684],
        [0.5994],
        [0.2013],
        [0.3200],
        [0.3806],
        [0.3044]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.2296],
        [0.4340],
        [0.1230],
        [0.3495],
        [0.3580],
        [0.3545],
        [0.6718],
        [0.5459],
        [0.3581],
        [0.5969]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.5514],
        [0.3317],
        [0.1764],
        [0.3804],
        [0.4956],
        [0.4490],
        [0.1867],
        [0.3766],
        [0.6468],
        [0.3866]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.5218],
        [0.1206],
        [0.4436],
        [0.2918],
        [0.4127],
        [0.5508],
        [0.5821],
        [0.3946],
        [0.2811],
        [0.4011]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.5101],
        [0.4246],
        [0.1959],
        [0.4789],
        [0.2448],
        [0

tensor([[0.4509],
        [0.3288],
        [0.7947],
        [0.2262],
        [0.2658],
        [0.4354],
        [0.1070],
        [0.3541],
        [0.6775],
        [0.3452]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.3302],
        [0.4408],
        [0.8000],
        [0.1183],
        [0.3647],
        [0.2502],
        [0.3609],
        [0.3479],
        [0.7280],
        [0.2415]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.1951],
        [0.5511],
        [0.1189],
        [0.4365],
        [0.2969],
        [0.2487],
        [0.2539],
        [0.4610],
        [0.8615],
        [0.5000]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.2415],
        [0.3518],
        [0.2154],
        [0.2245],
        [0.8496],
        [0.7830],
        [0.1043],
        [0.3567],
        [0.3781],
        [0.3966]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.1584],
        [0.5906],
        [0.2005],
        [0.4213],
        [0.5566],
        [0

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.69223] Val Loss : [0.67809] Val Score : [0.61390]
Epoch 0.68: adjusting learning rate of group 0 to 9.9929e-05.


  0%|          | 0/90 [00:00<?, ?it/s]

tensor([[0.1591],
        [0.4936],
        [0.1463],
        [0.8548],
        [0.4836],
        [0.7510],
        [0.0634],
        [0.4647],
        [0.3127],
        [0.3878]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.2802],
        [0.4224],
        [0.1517],
        [0.1269],
        [0.2465],
        [0.7405],
        [0.5765],
        [0.8486],
        [0.2164],
        [0.3607]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.1281],
        [0.1958],
        [0.8634],
        [0.4331],
        [0.2099],
        [0.4668],
        [0.6885],
        [0.1922],
        [0.2225],
        [0.5233]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.3017],
        [0.5338],
        [0.1450],
        [0.2660],
        [0.2490],
        [0.9049],
        [0.5344],
        [0.4543],
        [0.1167],
        [0.3427]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.5179],
        [0.5410],
        [0.8816],
        [0.1761],
        [0.4169],
        [0

tensor([[0.3011],
        [0.3444],
        [0.7355],
        [0.2133],
        [0.1410],
        [0.6650],
        [0.5154],
        [0.6872],
        [0.1005],
        [0.2491]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.6237],
        [0.3442],
        [0.8199],
        [0.4370],
        [0.3880],
        [0.2293],
        [0.1971],
        [0.0888],
        [0.7701],
        [0.0868]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.1969],
        [0.3730],
        [0.0706],
        [0.6795],
        [0.2756],
        [0.8500],
        [0.1993],
        [0.2381],
        [0.7494],
        [0.3416]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.2012],
        [0.0868],
        [0.0944],
        [0.3046],
        [0.2597],
        [0.5957],
        [0.5795],
        [0.7018],
        [0.3247],
        [0.8314]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.6059],
        [0.4068],
        [0.2146],
        [0.0626],
        [0.2585],
        [0

tensor([[0.0924],
        [0.1961],
        [0.6641],
        [0.1854],
        [0.4762],
        [0.4236],
        [0.0889],
        [0.4845],
        [0.9264],
        [0.3303]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.8213],
        [0.4932],
        [0.1722],
        [0.2399],
        [0.3149],
        [0.1251],
        [0.1731],
        [0.1164],
        [0.9084],
        [0.3598]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.1729],
        [0.1933],
        [0.8163],
        [0.1572],
        [0.2870],
        [0.7113],
        [0.0585],
        [0.2000],
        [0.8534],
        [0.4963]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.5612],
        [0.1689],
        [0.1291],
        [0.4116],
        [0.0917],
        [0.5399],
        [0.1310],
        [0.1919],
        [0.9295],
        [0.6975]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.1771],
        [0.1604],
        [0.5444],
        [0.1071],
        [0.0885],
        [0

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.67016] Val Loss : [0.68082] Val Score : [0.64714]
Epoch 0.68: adjusting learning rate of group 0 to 9.9929e-05.


  0%|          | 0/90 [00:00<?, ?it/s]

tensor([[0.9537],
        [0.1391],
        [0.1345],
        [0.4783],
        [0.1146],
        [0.1845],
        [0.1293],
        [0.6651],
        [0.3274],
        [0.5762]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0432],
        [0.3028],
        [0.1312],
        [0.4438],
        [0.1119],
        [0.9260],
        [0.3997],
        [0.6860],
        [0.1921],
        [0.6951]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.1173],
        [0.1867],
        [0.2761],
        [0.8792],
        [0.2412],
        [0.2638],
        [0.0995],
        [0.5106],
        [0.1908],
        [0.8970]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.3518],
        [0.4949],
        [0.4635],
        [0.2759],
        [0.1416],
        [0.3148],
        [0.2759],
        [0.2002],
        [0.1170],
        [0.9481]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.6086],
        [0.9429],
        [0.5611],
        [0.0956],
        [0.1840],
        [0

tensor([[0.4529],
        [0.6798],
        [0.0921],
        [0.6427],
        [0.3310],
        [0.8943],
        [0.1682],
        [0.2132],
        [0.2221],
        [0.0840]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.4480],
        [0.9350],
        [0.4263],
        [0.1764],
        [0.6147],
        [0.5425],
        [0.1159],
        [0.1238],
        [0.2651],
        [0.0984]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.2189],
        [0.1306],
        [0.0951],
        [0.8572],
        [0.2294],
        [0.1140],
        [0.9134],
        [0.4994],
        [0.2366],
        [0.4157]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.8767],
        [0.2789],
        [0.0648],
        [0.3889],
        [0.1228],
        [0.4307],
        [0.0992],
        [0.4265],
        [0.9081],
        [0.2054]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.9541],
        [0.5995],
        [0.6264],
        [0.2041],
        [0.1272],
        [0

tensor([[0.1458],
        [0.2617],
        [0.0483],
        [0.6794],
        [0.9229],
        [0.2488],
        [0.3329],
        [0.0789],
        [0.3194],
        [0.7561]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.6235],
        [0.1703],
        [0.1484],
        [0.2043],
        [0.4377],
        [0.9271],
        [0.0748],
        [0.0412],
        [0.6993],
        [0.6362]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.9678],
        [0.3581],
        [0.0412],
        [0.0625],
        [0.3831],
        [0.2532],
        [0.4932],
        [0.3174],
        [0.2380],
        [0.4879]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.7928],
        [0.6518],
        [0.1409],
        [0.1346],
        [0.0629],
        [0.2186],
        [0.1166],
        [0.7920],
        [0.1555],
        [0.8651]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.9050],
        [0.1344],
        [0.1033],
        [0.5493],
        [0.5815],
        [0

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.66722] Val Loss : [0.66618] Val Score : [0.66667]
Epoch 0.67: adjusting learning rate of group 0 to 9.9932e-05.


  0%|          | 0/90 [00:00<?, ?it/s]

tensor([[0.1567],
        [0.4950],
        [0.1246],
        [0.5713],
        [0.4089],
        [0.1652],
        [0.0505],
        [0.0984],
        [0.9549],
        [0.7230]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.7604],
        [0.1664],
        [0.0442],
        [0.1486],
        [0.7958],
        [0.1975],
        [0.0533],
        [0.4110],
        [0.8239],
        [0.6752]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.6622],
        [0.0339],
        [0.0395],
        [0.7634],
        [0.2459],
        [0.2148],
        [0.7556],
        [0.3412],
        [0.4317],
        [0.7004]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.5566],
        [0.9287],
        [0.7445],
        [0.0541],
        [0.1476],
        [0.1038],
        [0.5494],
        [0.2356],
        [0.0589],
        [0.5713]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0508],
        [0.6995],
        [0.1211],
        [0.3961],
        [0.1634],
        [0

tensor([[0.6019],
        [0.9070],
        [0.1008],
        [0.6872],
        [0.0525],
        [0.1983],
        [0.6216],
        [0.1756],
        [0.0390],
        [0.7218]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.1541],
        [0.6422],
        [0.1598],
        [0.0905],
        [0.1017],
        [0.9126],
        [0.0342],
        [0.3327],
        [0.8247],
        [0.6748]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0842],
        [0.0853],
        [0.1179],
        [0.1504],
        [0.4272],
        [0.2171],
        [0.3559],
        [0.8128],
        [0.9574],
        [0.4123]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.9429],
        [0.1037],
        [0.0833],
        [0.1052],
        [0.7879],
        [0.1729],
        [0.3407],
        [0.1636],
        [0.5709],
        [0.4015]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0428],
        [0.1096],
        [0.6690],
        [0.5066],
        [0.1664],
        [0

tensor([[0.0742],
        [0.2483],
        [0.1865],
        [0.9178],
        [0.1840],
        [0.0677],
        [0.4750],
        [0.8985],
        [0.2826],
        [0.2085]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0790],
        [0.2248],
        [0.0830],
        [0.6784],
        [0.9070],
        [0.5549],
        [0.0693],
        [0.5613],
        [0.0386],
        [0.8127]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.6224],
        [0.0635],
        [0.8433],
        [0.7997],
        [0.1829],
        [0.2485],
        [0.0496],
        [0.0829],
        [0.8271],
        [0.1889]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.1125],
        [0.1762],
        [0.3439],
        [0.1154],
        [0.9431],
        [0.1194],
        [0.8564],
        [0.2047],
        [0.4462],
        [0.1109]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.5978],
        [0.4908],
        [0.0872],
        [0.3621],
        [0.4521],
        [0

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.65988] Val Loss : [0.65917] Val Score : [0.66372]
Epoch 0.66: adjusting learning rate of group 0 to 9.9933e-05.


  0%|          | 0/90 [00:00<?, ?it/s]

tensor([[0.6425],
        [0.1647],
        [0.3803],
        [0.9373],
        [0.0631],
        [0.4621],
        [0.8064],
        [0.0798],
        [0.1670],
        [0.0682]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.7713],
        [0.2628],
        [0.6945],
        [0.3695],
        [0.0519],
        [0.1274],
        [0.3011],
        [0.9265],
        [0.1353],
        [0.0726]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.6384],
        [0.3744],
        [0.8708],
        [0.7692],
        [0.0708],
        [0.7489],
        [0.0394],
        [0.1505],
        [0.1226],
        [0.1915]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0524],
        [0.0852],
        [0.3261],
        [0.8707],
        [0.1264],
        [0.8705],
        [0.6246],
        [0.3449],
        [0.5310],
        [0.0586]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.8105],
        [0.5206],
        [0.9081],
        [0.3580],
        [0.1105],
        [0

tensor([[0.3965],
        [0.3512],
        [0.0472],
        [0.0720],
        [0.0559],
        [0.8230],
        [0.1637],
        [0.9408],
        [0.2398],
        [0.6579]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.9463],
        [0.0591],
        [0.1034],
        [0.5002],
        [0.1551],
        [0.6321],
        [0.0588],
        [0.3300],
        [0.8000],
        [0.0882]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0925],
        [0.4812],
        [0.1658],
        [0.0618],
        [0.3426],
        [0.3018],
        [0.4250],
        [0.9736],
        [0.5333],
        [0.0504]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.7137],
        [0.0694],
        [0.0564],
        [0.0832],
        [0.0734],
        [0.3685],
        [0.1240],
        [0.6136],
        [0.6457],
        [0.9487]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.1691],
        [0.1009],
        [0.1966],
        [0.0572],
        [0.2540],
        [0

tensor([[0.1676],
        [0.1137],
        [0.9638],
        [0.0520],
        [0.2815],
        [0.7576],
        [0.5413],
        [0.1982],
        [0.0407],
        [0.4834]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.7246],
        [0.3140],
        [0.0675],
        [0.6861],
        [0.1130],
        [0.8798],
        [0.9002],
        [0.0474],
        [0.0797],
        [0.0667]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.2293],
        [0.4932],
        [0.0139],
        [0.9645],
        [0.3493],
        [0.6100],
        [0.6052],
        [0.2312],
        [0.1108],
        [0.1625]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.1679],
        [0.6297],
        [0.5347],
        [0.0581],
        [0.5056],
        [0.3579],
        [0.3750],
        [0.0764],
        [0.0255],
        [0.9667]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0495],
        [0.4702],
        [0.9232],
        [0.2160],
        [0.8990],
        [0

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.66106] Val Loss : [0.64404] Val Score : [0.71492]
Epoch 0.64: adjusting learning rate of group 0 to 9.9936e-05.


  0%|          | 0/90 [00:00<?, ?it/s]

tensor([[0.4373],
        [0.0914],
        [0.0290],
        [0.7009],
        [0.0296],
        [0.5581],
        [0.3123],
        [0.1029],
        [0.7639],
        [0.9409]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0281],
        [0.8953],
        [0.7679],
        [0.1178],
        [0.0298],
        [0.1930],
        [0.3078],
        [0.2141],
        [0.8799],
        [0.4547]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.4985],
        [0.2143],
        [0.9448],
        [0.3545],
        [0.2547],
        [0.2513],
        [0.0857],
        [0.9022],
        [0.0722],
        [0.0273]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.9362],
        [0.8987],
        [0.4724],
        [0.0921],
        [0.0885],
        [0.1988],
        [0.6100],
        [0.1113],
        [0.0533],
        [0.0980]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.9458],
        [0.1226],
        [0.4513],
        [0.1251],
        [0.0502],
        [0

tensor([[0.4198],
        [0.0497],
        [0.0786],
        [0.0624],
        [0.2627],
        [0.7616],
        [0.5728],
        [0.9658],
        [0.0804],
        [0.3109]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.1262],
        [0.2038],
        [0.0209],
        [0.2003],
        [0.7214],
        [0.1501],
        [0.2603],
        [0.2984],
        [0.4241],
        [0.9752]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.9126],
        [0.7990],
        [0.1403],
        [0.1702],
        [0.1170],
        [0.8979],
        [0.0861],
        [0.3624],
        [0.0301],
        [0.1381]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0911],
        [0.0610],
        [0.1077],
        [0.6361],
        [0.9719],
        [0.2611],
        [0.2765],
        [0.1577],
        [0.0782],
        [0.6793]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0731],
        [0.4133],
        [0.0671],
        [0.3521],
        [0.1922],
        [0

tensor([[0.0245],
        [0.8838],
        [0.2137],
        [0.9432],
        [0.4906],
        [0.1542],
        [0.0421],
        [0.1579],
        [0.1381],
        [0.6485]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0368],
        [0.1281],
        [0.2050],
        [0.1738],
        [0.9277],
        [0.3796],
        [0.0862],
        [0.0431],
        [0.8414],
        [0.8582]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0543],
        [0.2085],
        [0.0548],
        [0.4935],
        [0.8906],
        [0.9129],
        [0.1006],
        [0.0624],
        [0.8110],
        [0.1054]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.5232],
        [0.1242],
        [0.6873],
        [0.5545],
        [0.0692],
        [0.8289],
        [0.0274],
        [0.0379],
        [0.9440],
        [0.1010]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.3924],
        [0.1918],
        [0.8884],
        [0.0498],
        [0.0613],
        [0

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.65622] Val Loss : [0.66214] Val Score : [0.47246]
Epoch 0.66: adjusting learning rate of group 0 to 9.9932e-05.


  0%|          | 0/90 [00:00<?, ?it/s]

tensor([[0.5190],
        [0.0604],
        [0.9463],
        [0.0953],
        [0.0339],
        [0.6223],
        [0.0181],
        [0.3376],
        [0.6387],
        [0.7468]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.1161],
        [0.1746],
        [0.2952],
        [0.4935],
        [0.3169],
        [0.7588],
        [0.0304],
        [0.9772],
        [0.0823],
        [0.0777]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.2739],
        [0.5948],
        [0.9252],
        [0.0354],
        [0.9342],
        [0.1022],
        [0.0719],
        [0.0809],
        [0.1135],
        [0.4045]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.2803],
        [0.7466],
        [0.2288],
        [0.1247],
        [0.9787],
        [0.0699],
        [0.1494],
        [0.4783],
        [0.0331],
        [0.1655]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0412],
        [0.7969],
        [0.1421],
        [0.0201],
        [0.9494],
        [0

tensor([[0.0430],
        [0.5518],
        [0.6367],
        [0.6778],
        [0.9685],
        [0.0418],
        [0.5193],
        [0.0517],
        [0.2036],
        [0.0480]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0818],
        [0.0540],
        [0.8492],
        [0.1554],
        [0.5451],
        [0.9250],
        [0.0281],
        [0.8353],
        [0.0386],
        [0.3556]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.3281],
        [0.4907],
        [0.8951],
        [0.0329],
        [0.0902],
        [0.7426],
        [0.0603],
        [0.0396],
        [0.3259],
        [0.8862]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.6077],
        [0.9070],
        [0.9582],
        [0.2675],
        [0.0485],
        [0.0660],
        [0.0772],
        [0.0815],
        [0.0853],
        [0.3687]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.1125],
        [0.4052],
        [0.0356],
        [0.2468],
        [0.0746],
        [0

tensor([[0.0557],
        [0.0449],
        [0.0360],
        [0.9090],
        [0.3238],
        [0.0648],
        [0.7664],
        [0.5535],
        [0.9288],
        [0.0864]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0609],
        [0.9445],
        [0.0624],
        [0.0421],
        [0.3883],
        [0.5518],
        [0.5208],
        [0.0621],
        [0.9219],
        [0.0563]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.9509],
        [0.0710],
        [0.8482],
        [0.1164],
        [0.5866],
        [0.0656],
        [0.3059],
        [0.0275],
        [0.7428],
        [0.0551]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0398],
        [0.0441],
        [0.4177],
        [0.0421],
        [0.5952],
        [0.1633],
        [0.9122],
        [0.4683],
        [0.9431],
        [0.0585]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.4933],
        [0.1651],
        [0.6836],
        [0.0222],
        [0.9669],
        [0

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.65270] Val Loss : [0.63417] Val Score : [0.71264]
Epoch 0.63: adjusting learning rate of group 0 to 9.9938e-05.


  0%|          | 0/90 [00:00<?, ?it/s]

tensor([[0.0474],
        [0.6354],
        [0.9231],
        [0.7545],
        [0.8684],
        [0.0293],
        [0.5062],
        [0.0884],
        [0.0401],
        [0.0617]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.5298],
        [0.0457],
        [0.0838],
        [0.0454],
        [0.0619],
        [0.0803],
        [0.9040],
        [0.7599],
        [0.2548],
        [0.9348]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.1142],
        [0.0840],
        [0.4051],
        [0.9432],
        [0.0651],
        [0.1802],
        [0.5206],
        [0.0571],
        [0.0807],
        [0.9372]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0810],
        [0.0266],
        [0.2195],
        [0.0413],
        [0.9363],
        [0.7815],
        [0.2140],
        [0.8577],
        [0.6317],
        [0.0443]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0858],
        [0.3145],
        [0.0796],
        [0.0577],
        [0.3728],
        [0

tensor([[0.8918],
        [0.0193],
        [0.0463],
        [0.6525],
        [0.8070],
        [0.0805],
        [0.0429],
        [0.0687],
        [0.8253],
        [0.6243]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.6779],
        [0.0461],
        [0.0394],
        [0.9727],
        [0.2490],
        [0.4733],
        [0.2003],
        [0.7689],
        [0.0210],
        [0.0914]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0523],
        [0.6560],
        [0.1085],
        [0.1479],
        [0.9829],
        [0.0641],
        [0.0665],
        [0.3673],
        [0.6629],
        [0.0955]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.7233],
        [0.0273],
        [0.9655],
        [0.1066],
        [0.2396],
        [0.6196],
        [0.0349],
        [0.1716],
        [0.7661],
        [0.0301]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0727],
        [0.1686],
        [0.0393],
        [0.9689],
        [0.6537],
        [0

tensor([[0.0412],
        [0.3781],
        [0.9847],
        [0.0314],
        [0.0856],
        [0.1131],
        [0.6666],
        [0.2879],
        [0.3585],
        [0.2170]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.6388],
        [0.0262],
        [0.1537],
        [0.0360],
        [0.0529],
        [0.7029],
        [0.9727],
        [0.1472],
        [0.1161],
        [0.6873]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.1333],
        [0.5695],
        [0.1215],
        [0.0421],
        [0.0207],
        [0.3729],
        [0.4103],
        [0.8112],
        [0.0442],
        [0.9723]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.1213],
        [0.0610],
        [0.0708],
        [0.9774],
        [0.0421],
        [0.1069],
        [0.7974],
        [0.4272],
        [0.5729],
        [0.0712]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0528],
        [0.0736],
        [0.1557],
        [0.2131],
        [0.6279],
        [0

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.65038] Val Loss : [0.66523] Val Score : [0.43574]
Epoch 0.67: adjusting learning rate of group 0 to 9.9932e-05.


  0%|          | 0/90 [00:00<?, ?it/s]

tensor([[0.7133],
        [0.1584],
        [0.5477],
        [0.0411],
        [0.1011],
        [0.0916],
        [0.9777],
        [0.0770],
        [0.6241],
        [0.0341]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0637],
        [0.3862],
        [0.0228],
        [0.0362],
        [0.6342],
        [0.3714],
        [0.0204],
        [0.5766],
        [0.9766],
        [0.6877]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0254],
        [0.0171],
        [0.0218],
        [0.5106],
        [0.3811],
        [0.9114],
        [0.0950],
        [0.6492],
        [0.5525],
        [0.9168]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.1091],
        [0.1663],
        [0.5953],
        [0.9069],
        [0.1178],
        [0.7794],
        [0.0642],
        [0.9198],
        [0.0801],
        [0.0114]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.8905],
        [0.9021],
        [0.0291],
        [0.7753],
        [0.0369],
        [0

tensor([[0.0424],
        [0.0396],
        [0.0260],
        [0.2916],
        [0.5585],
        [0.7685],
        [0.9136],
        [0.9253],
        [0.0335],
        [0.2230]], device='mps:0', grad_fn=<SigmoidBackward0>)


wandb: Network error (ConnectionError), entering retry loop.


tensor([[0.0820],
        [0.0352],
        [0.8728],
        [0.9378],
        [0.0512],
        [0.5241],
        [0.5644],
        [0.0489],
        [0.0313],
        [0.7082]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0408],
        [0.0651],
        [0.4257],
        [0.0849],
        [0.0857],
        [0.9027],
        [0.7766],
        [0.0405],
        [0.9419],
        [0.1583]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0783],
        [0.0517],
        [0.9584],
        [0.2288],
        [0.0605],
        [0.5700],
        [0.7277],
        [0.8620],
        [0.0602],
        [0.0377]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.6158],
        [0.0362],
        [0.5791],
        [0.2181],
        [0.0340],
        [0.7338],
        [0.0203],
        [0.3636],
        [0.9774],
        [0.0700]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.4721],
        [0.0244],
        [0.2056],
        [0.8667],
        [0.0530],
        [0

tensor([[0.1758],
        [0.3974],
        [0.0280],
        [0.9833],
        [0.0393],
        [0.0272],
        [0.6098],
        [0.3646],
        [0.1156],
        [0.6379]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.8186],
        [0.5549],
        [0.0663],
        [0.0281],
        [0.6554],
        [0.6199],
        [0.0872],
        [0.0690],
        [0.0117],
        [0.9555]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0180],
        [0.5321],
        [0.3183],
        [0.0072],
        [0.7526],
        [0.3787],
        [0.0129],
        [0.7247],
        [0.6853],
        [0.9299]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0962],
        [0.0699],
        [0.0740],
        [0.1759],
        [0.5700],
        [0.6771],
        [0.0842],
        [0.7029],
        [0.0168],
        [0.9803]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0207],
        [0.0137],
        [0.9276],
        [0.9040],
        [0.0579],
        [0

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.65573] Val Loss : [0.65318] Val Score : [0.61279]
Epoch 0.65: adjusting learning rate of group 0 to 9.9934e-05.


  0%|          | 0/90 [00:00<?, ?it/s]

tensor([[0.3432],
        [0.5645],
        [0.0152],
        [0.0425],
        [0.9723],
        [0.8709],
        [0.0354],
        [0.0595],
        [0.5182],
        [0.1847]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.2059],
        [0.8194],
        [0.4668],
        [0.8464],
        [0.8768],
        [0.8170],
        [0.0177],
        [0.0511],
        [0.0142],
        [0.0342]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0774],
        [0.0365],
        [0.0653],
        [0.0336],
        [0.0813],
        [0.9150],
        [0.9269],
        [0.7642],
        [0.0959],
        [0.6229]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.9721],
        [0.1509],
        [0.0405],
        [0.2010],
        [0.7544],
        [0.2189],
        [0.7834],
        [0.0244],
        [0.0176],
        [0.4870]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.7749],
        [0.0254],
        [0.4877],
        [0.0482],
        [0.9715],
        [0

tensor([[0.0308],
        [0.9651],
        [0.0420],
        [0.0541],
        [0.6035],
        [0.1875],
        [0.9282],
        [0.2034],
        [0.0445],
        [0.3581]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.5492],
        [0.2896],
        [0.9796],
        [0.8573],
        [0.1224],
        [0.1248],
        [0.0402],
        [0.0514],
        [0.0810],
        [0.1183]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.9729],
        [0.0678],
        [0.1089],
        [0.0464],
        [0.0786],
        [0.7311],
        [0.2258],
        [0.8528],
        [0.0477],
        [0.1592]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.9634],
        [0.4609],
        [0.0644],
        [0.2084],
        [0.0952],
        [0.0335],
        [0.1144],
        [0.0263],
        [0.4210],
        [0.9387]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0511],
        [0.9700],
        [0.6564],
        [0.0426],
        [0.2249],
        [0

tensor([[0.0317],
        [0.0407],
        [0.0236],
        [0.5933],
        [0.9743],
        [0.4634],
        [0.1874],
        [0.8916],
        [0.1160],
        [0.0762]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0425],
        [0.7633],
        [0.0336],
        [0.0326],
        [0.0491],
        [0.5009],
        [0.9381],
        [0.3187],
        [0.0414],
        [0.9412]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.3569],
        [0.3340],
        [0.0186],
        [0.9772],
        [0.0603],
        [0.0388],
        [0.1260],
        [0.0453],
        [0.5893],
        [0.8832]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.6332],
        [0.1820],
        [0.1538],
        [0.0223],
        [0.9776],
        [0.0394],
        [0.6057],
        [0.8060],
        [0.0459],
        [0.0370]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0444],
        [0.0511],
        [0.0414],
        [0.4720],
        [0.9677],
        [0

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.64536] Val Loss : [0.65810] Val Score : [0.50000]
Epoch 0.66: adjusting learning rate of group 0 to 9.9933e-05.


  0%|          | 0/90 [00:00<?, ?it/s]

tensor([[0.0237],
        [0.5138],
        [0.0558],
        [0.0721],
        [0.2386],
        [0.9151],
        [0.1181],
        [0.9745],
        [0.3663],
        [0.0342]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.9627],
        [0.7924],
        [0.0617],
        [0.0352],
        [0.3853],
        [0.9001],
        [0.0501],
        [0.0710],
        [0.0940],
        [0.0576]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.2183],
        [0.2036],
        [0.9600],
        [0.1230],
        [0.0123],
        [0.1296],
        [0.9196],
        [0.0688],
        [0.7705],
        [0.0511]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0459],
        [0.0676],
        [0.5382],
        [0.9581],
        [0.5482],
        [0.9403],
        [0.0159],
        [0.2143],
        [0.0612],
        [0.0846]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0369],
        [0.0204],
        [0.9481],
        [0.1575],
        [0.0410],
        [0

tensor([[0.0502],
        [0.0638],
        [0.4181],
        [0.8838],
        [0.0311],
        [0.0779],
        [0.6437],
        [0.0591],
        [0.9755],
        [0.0816]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0306],
        [0.1020],
        [0.5804],
        [0.0496],
        [0.9315],
        [0.9711],
        [0.0698],
        [0.2123],
        [0.1208],
        [0.0742]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0467],
        [0.0788],
        [0.0515],
        [0.6985],
        [0.2149],
        [0.9799],
        [0.0642],
        [0.0883],
        [0.0761],
        [0.8591]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0270],
        [0.2172],
        [0.9852],
        [0.1305],
        [0.0283],
        [0.0649],
        [0.6625],
        [0.0297],
        [0.7278],
        [0.4901]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.4504],
        [0.9037],
        [0.0273],
        [0.0447],
        [0.9722],
        [0

tensor([[0.0148],
        [0.0813],
        [0.6451],
        [0.8521],
        [0.0900],
        [0.9424],
        [0.0184],
        [0.9008],
        [0.0678],
        [0.1350]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0523],
        [0.0188],
        [0.0624],
        [0.0225],
        [0.4129],
        [0.8955],
        [0.5090],
        [0.0209],
        [0.9398],
        [0.8946]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.7662],
        [0.0211],
        [0.0882],
        [0.7790],
        [0.0407],
        [0.0228],
        [0.1500],
        [0.9326],
        [0.9090],
        [0.0500]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.4372],
        [0.8286],
        [0.2672],
        [0.0682],
        [0.0144],
        [0.3137],
        [0.1501],
        [0.9863],
        [0.0428],
        [0.0895]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.9416],
        [0.4073],
        [0.5110],
        [0.2587],
        [0.1476],
        [0

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [11], Train Loss : [0.65655] Val Loss : [0.64020] Val Score : [0.71820]
Epoch 0.64: adjusting learning rate of group 0 to 9.9937e-05.


  0%|          | 0/90 [00:00<?, ?it/s]

tensor([[0.6982],
        [0.9521],
        [0.0767],
        [0.0283],
        [0.1724],
        [0.1612],
        [0.9465],
        [0.4470],
        [0.0172],
        [0.0254]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.3220],
        [0.8311],
        [0.3170],
        [0.9635],
        [0.0168],
        [0.1905],
        [0.8743],
        [0.0597],
        [0.0269],
        [0.0272]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.9822],
        [0.0618],
        [0.4644],
        [0.3273],
        [0.8002],
        [0.0417],
        [0.6678],
        [0.0155],
        [0.1905],
        [0.0145]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.5223],
        [0.2950],
        [0.1510],
        [0.0148],
        [0.0898],
        [0.9799],
        [0.8392],
        [0.6327],
        [0.0130],
        [0.0407]], device='mps:0', grad_fn=<SigmoidBackward0>)
tensor([[0.0846],
        [0.0190],
        [0.0222],
        [0.3237],
        [0.9543],
        [0

NotImplementedError: The operator 'aten::histc' is not currently implemented for the MPS device. If you want this op to be added in priority during the prototype phase of this feature, please comment on https://github.com/pytorch/pytorch/issues/77764. As a temporary fix, you can set the environment variable `PYTORCH_ENABLE_MPS_FALLBACK=1` to use the CPU as a fallback for this op. WARNING: this will be slower than running natively on MPS.

wandb: Network error (ReadTimeout), entering retry loop.
wandb: Network error (ReadTimeout), entering retry loop.
wandb: ERROR Error while calling W&B API: internal database error (<Response [500]>)
wandb: Network error (ConnectionError), entering retry loop.
wandb: Network error (ConnectionError), entering retry loop.
wandb: Network error (ConnectionError), entering retry loop.
wandb: Network error (ConnectionError), entering retry loop.
wandb: Network error (ConnectionError), entering retry loop.
wandb: Network error (ConnectionError), entering retry loop.
wandb: Network error (ConnectionError), entering retry loop.
wandb: Network error (ConnectionError), entering retry loop.
wandb: Network error (ConnectionError), entering retry loop.
wandb: Network error (ConnectionError), entering retry loop.
wandb: Network error (ConnectionError), entering retry loop.
wandb: Network error (ConnectionError), entering retry loop.
wandb: Network error (ConnectionError), entering retry loop.
wandb: N

## Inference

In [38]:
model.state_dict(torch.load('best_model.pth'))

OrderedDict([('module.img_feature_extractor.backbone.conv1.weight',
              tensor([[[[ 1.1621e-02,  1.3460e-02, -1.7270e-02,  ..., -4.1604e-02,
                         -4.3836e-02, -7.1067e-02],
                        [ 2.8087e-03,  4.7682e-03,  1.3908e-02,  ...,  2.4000e-03,
                         -2.0863e-02, -3.8716e-02],
                        [ 2.0766e-02,  2.2789e-02,  1.4497e-02,  ...,  1.0222e-01,
                          6.1504e-02,  5.0362e-02],
                        ...,
                        [-2.5452e-03,  2.7358e-02, -1.1304e-02,  ..., -1.2676e-01,
                         -7.6393e-02,  7.2464e-03],
                        [ 2.3338e-03,  4.6975e-02,  6.1181e-02,  ...,  2.3610e-02,
                         -3.3929e-02, -1.6875e-02],
                        [-8.1019e-02, -3.2434e-02, -1.8621e-02,  ...,  3.4472e-02,
                          2.1279e-02,  3.9584e-04]],
              
                       [[-2.0022e-02,  1.0747e-02,  2.2347e-02,  ...,  5.4279

In [35]:
test_dataset = CustomDataset(test_df, None, test_transforms)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [36]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    preds = []
    threshold = 0.5
    
    with torch.no_grad():
        for img in tqdm(iter(test_loader)): # , tabular   = 2nd
            img = img.float().to(device)
            # tabular = tabular.float().to(device)
            
            model_pred = model(img)
            
            model_pred = model_pred.squeeze(1).to('cpu')
            
            preds += model_pred.tolist()
    
    preds = np.where(np.array(preds) > threshold, 1, 0)
    
    return preds

In [37]:
preds = inference(model, test_loader, device)

  0%|          | 0/25 [00:00<?, ?it/s]

## Submission

In [38]:
submit = pd.read_csv('/Users/krc/Documents/breast_dacon/open/sample_submission.csv')

In [39]:
submit['N_category'] = preds
submit.to_csv('./submit_1110_size800.csv', index=False)

In [60]:
os.getcwd()

'/Users/krc/Documents/breast_dacon/dacon_bc_prediction'