In [1]:
import os
import sys

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
import cv2
import numpy as np
import torch
import albumentations
import matplotlib.pyplot as plt
import glob
import math
from PIL import Image as Image
import torchvision
from torchvision import transforms
import sklearn.metrics
from sklearn.model_selection import StratifiedKFold
import torch.nn as nn
import torch.nn.functional as FT
import torch.optim as optim
from torch.optim import lr_scheduler
import matplotlib.pyplot as plt
%matplotlib inline
from tqdm import tqdm as tqdm
from albumentations.core.transforms_interface import ImageOnlyTransform
from albumentations.augmentations import functional as F
import albumentations as A
device = torch.device("cuda")
import time
import random

In [4]:
from albumentations.core.transforms_interface import DualTransform
from albumentations.augmentations import functional as F

from PIL import Image, ImageOps, ImageEnhance
from albumentations.core.transforms_interface import ImageOnlyTransform

import numpy as np
from PIL import Image, ImageOps, ImageEnhance
class GridMask(DualTransform):
    def __init__(self, num_grid=3, fill_value=0, rotate=0, mode=0, always_apply=False, p=0.5):
        super(GridMask, self).__init__(always_apply, p)
        if isinstance(num_grid, int):
            num_grid = (num_grid, num_grid)
        if isinstance(rotate, int):
            rotate = (-rotate, rotate)
        self.num_grid = num_grid
        self.fill_value = fill_value
        self.rotate = rotate
        self.mode = mode
        self.masks = None
        self.rand_h_max = []
        self.rand_w_max = []

    def init_masks(self, height, width):
        if self.masks is None:
            self.masks = []
            n_masks = self.num_grid[1] - self.num_grid[0] + 1
            for n, n_g in enumerate(range(self.num_grid[0], self.num_grid[1] + 1, 1)):
                grid_h = height / n_g
                grid_w = width / n_g
                # this_mask = np.ones((int((n_g + 1) * grid_h), int((n_g + 1) * grid_w))).astype(np.uint8)
                this_mask = np.zeros((int((n_g + 1) * grid_h), int((n_g + 1) * grid_w))).astype(np.uint8)

                for i in range(n_g + 1):
                    for j in range(n_g + 1):
                        this_mask[
                             int(i * grid_h) : int(i * grid_h + grid_h / 2),
                             int(j * grid_w) : int(j * grid_w + grid_w / 2)
                        ] = self.fill_value
                        if self.mode == 2:
                            this_mask[
                                 int(i * grid_h + grid_h / 2) : int(i * grid_h + grid_h),
                                 int(j * grid_w + grid_w / 2) : int(j * grid_w + grid_w)
                            ] = self.fill_value
                
                if self.mode == 1:
                    this_mask = 1 - this_mask

                self.masks.append(this_mask)
                self.rand_h_max.append(grid_h)
                self.rand_w_max.append(grid_w)

    def apply(self, image, mask, rand_h, rand_w, angle, **params):
        h, w = image.shape[:2]
        mask = F.rotate(mask, angle) if self.rotate[1] > 0 else mask
        mask = mask[:,:,np.newaxis] if image.ndim == 3 else mask
        image =np.maximum(image , mask[rand_h:rand_h+h, rand_w:rand_w+w].astype(image.dtype))
        # image =image | mask[rand_h:rand_h+h, rand_w:rand_w+w].astype(image.dtype)
        return image

    def get_params_dependent_on_targets(self, params):
        img = params['image']
        height, width = img.shape[:2]
        self.init_masks(height, width)

        mid = np.random.randint(len(self.masks))
        mask = self.masks[mid]
        rand_h = np.random.randint(self.rand_h_max[mid])
        rand_w = np.random.randint(self.rand_w_max[mid])
        angle = np.random.randint(self.rotate[0], self.rotate[1]) if self.rotate[1] > 0 else 0

        return {'mask': mask, 'rand_h': rand_h, 'rand_w': rand_w, 'angle': angle}

    @property
    def targets_as_params(self):
        return ['image']

    def get_transform_init_args_names(self):
        return ('num_grid', 'fill_value', 'rotate', 'mode')

In [5]:
class Albumentations_cls():
    def __init__(self, augmentations):
        self.augmentations  = A.Compose(augmentations)
    
    def __call__(self, image):
        return self.augmentations(image = image)["image"]

In [6]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, images_path, df, folds, mode, transform = None, transforms_orig= None ):
        df = df[df.fold.isin(folds)].reset_index(drop = True)
        self.image_ids = df["Frame_ID"].values
        self.labels = None
        if mode !="test":
            self.labels = df["Emotion_id"].values
        self.images_path = images_path
        self.mode = mode
        self.transforms = transform
        self.transforms_orig = transforms_orig
        
    def __len__(self):
        return len(self.image_ids)
    def __getitem__(self, index):
        image = cv2.imread(data_dir + self.images_path + "/" + self.image_ids[index])
        image_orig = image.astype(np.float32).copy()
        
        if self.transforms:
            image = self.transforms(image)
            image = transforms.Normalize(MEAN, STD)(image)
        
        if self.transforms_orig:
            image_orig = self.transforms(image_orig)
            image_orig = transforms.Normalize(MEAN, STD)(image_orig)
            
            
        if self.mode != "test":
            label = self.labels[index]
            return torch.tensor(image), torch.tensor(image_orig), torch.tensor(label)
        
        return torch.tensor(image), torch.tensor(image_orig)
            

In [7]:
freeze_layers = 600
FREEZE = True
if FREEZE:
    if freeze_layers == None:
        kernel_type = "efficientnet-b7_base_freeze_gridmask_augmix_fulltrain_extra_dataset"
    else:
        kernel_type = f"efficientnet-b7_{freeze_layers}_freeze_gridmask_augmix_fulltrain_extra_dataset"
else:
    kernel_type = "efficientnet-b7_gridmask_augmix_fulltrain_extra_dataset"
backbone = "resnet-18"
RANDOM_STATE = 47
MEAN = [0.485, 0.456, 0.406] 
STD = [0.229, 0.224, 0.225]
n_folds = 5
n_epochs = 380
HEIGHT = 224
WIDTH = 224
num_workers = 0
batch_size = 32
data_dir = "./Dataset/"
idx2class = {i:class_name for i,class_name in enumerate(["angry", "happy", "sad", "surprised", "Unknown"]) }
class2idx = {class_name:i for i,class_name in enumerate(["angry", "happy", "sad", "surprised", "Unknown"]) }
out_dim = len(["angry", "happy", "sad", "surprised", "Unknown"])
fold = 6
out_dim  = len(idx2class)
init_lr = 0.01

In [8]:
df = pd.read_csv(data_dir + "train_corrected_balanced.csv")
df.loc[df.Emotion == "suprised", "Emotion"] = "surprised"
df["Emotion_id"] = df.Emotion.map(class2idx)
skf = StratifiedKFold(n_folds, shuffle = True, random_state = RANDOM_STATE)
df = df[df.Emotion != "Unknown"]
df = df[df.Frame_ID.isin(os.listdir(data_dir + "train"))].reset_index(drop = True)
for i_fold, (train_idx, val_idx) in enumerate(skf.split(df, df.Emotion_id)):
    df.loc[val_idx, "fold"] = i_fold
df.fold = df.fold.astype(np.int)


In [9]:
preprocess = [
    A.Resize(height = HEIGHT, width = WIDTH, always_apply=True),
]

augmentations = [
    A.OneOf([
        A.MotionBlur(blur_limit=3),
        A.MedianBlur(blur_limit=3),
        A.GaussianBlur(blur_limit=3),
    ], p=0.65),
    # A.OneOf([
    #     A.OpticalDistortion(distort_limit=0.5),
    #     A.GridDistortion(num_steps=2, distort_limit=0.5),
    # ], p=0.6),
    A.imgaug.transforms.IAAAffine(shear=5, mode='constant', cval=255, p = 0.65),
    A.OneOf([
        A.ShiftScaleRotate(rotate_limit=90, border_mode=cv2.BORDER_CONSTANT, value=[255, 255, 255], mask_value=[255, 255, 255], always_apply=True),
        GridMask(mode = 0, always_apply= True),
        GridMask(mode = 1, always_apply= True),
        GridMask(mode = 2, always_apply= True)
    ], 0.75)
]
transforms_train = transforms.Compose([
    np.uint8,    
    Albumentations_cls(preprocess + augmentations),
    transforms.ToTensor(),
    
])

transforms_val = transforms.Compose([
    np.uint8,
    # transforms.Normalize(mean = MEAN, std = STD),
    Albumentations_cls(preprocess),
    transforms.ToTensor(),
    
])
transforms_orig = transforms.Compose([
    np.uint8,    
    Albumentations_cls(preprocess ),
    transforms.ToTensor(),
])

In [11]:
pretrained_dict = {f'efficientnet-b{i}': path for i,path in enumerate(sorted(glob.glob('../input/efficientnet-pytorch/*pth')))}
from efficientnet_pytorch import EfficientNet
import torchvision.models as models
sigmoid = nn.Sigmoid()
class Swish(torch.autograd.Function):
    @staticmethod
    def forward(ctx, i):
        result = i * sigmoid(i)
        ctx.save_for_backward(i)
        return result
    
    def backward(ctx, grad_output):
        i = ctx.saved_variables[0]
        sigmoid_i = sigmoid(i)
        return grad_output * (sigmoid_i + i*(1 - sigmoid_i))
swish = Swish.apply
class Swish_module(nn.Module):
    def forward(self, x):
        return swish(x)
    
swish_layer = Swish_module()

def relu_fn(x):
    return swish_layer(x)

class GlobalAvgPool(nn.Module):
        def __init__(self):
            super(GlobalAvgPool, self).__init__()
        def forward(self, x):
            return x.view(*(x.shape[:-2]),-1).mean(-1)


class Seq_Ex_Block(nn.Module):
        def __init__(self, in_ch, r):
            super(Seq_Ex_Block, self).__init__()
            self.se = nn.Sequential(
                GlobalAvgPool(),
                nn.Linear(in_ch, in_ch//r),
                nn.ReLU(inplace=True),
                nn.Linear(in_ch//r, in_ch),
                nn.Sigmoid()
            )

        def forward(self, x):
            se_weight = self.se(x).unsqueeze(-1).unsqueeze(-1)
            #print(f'x:{x.sum()}, x_se:{x.mul(se_weight).sum()}')
            return x.mul(se_weight)

class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)
      
class ClassifierNew(nn.Module):
    def __init__(self, inp = 2208, h1=1024, out = 102, d=0.35):
        super().__init__()
        self.ap = nn.AdaptiveAvgPool2d((1,1))
        self.mp = nn.AdaptiveMaxPool2d((1,1))
        self.fla = Flatten()
        self.bn0 = nn.BatchNorm1d(inp*2,eps=1e-05, momentum=0.1, affine=True)
        self.dropout0 = nn.Dropout(d)
        self.fc1 = nn.Linear(inp*2, h1)
        self.bn1 = nn.BatchNorm1d(h1,eps=1e-05, momentum=0.1, affine=True)
        self.dropout1 = nn.Dropout(d)
        self.fc2 = nn.Linear(h1, out)
        self.activation = nn.Softmax()
        
    def forward(self, x):
        ap = self.ap(x)
        mp = self.mp(x)
        x = torch.cat((ap,mp),dim=1)
        x = self.fla(x)
        x = self.bn0(x)
        x = self.dropout0(x)
        x = FT.relu(self.fc1(x))
        x = self.bn1(x)
        x = self.dropout1(x)         
        x = self.fc2(x)
        x = self.activation(x)
        return x
class NeuralNet(nn.Module):
    def __init__(self, pretrained = True, Freeze_base = False, layers_freeze = None):
        super(NeuralNet, self).__init__()
        # self.cnn = models.resnet101(pretrained= pretrained)
        # self.cnn = models.resnet50(pretrained= pretrained)
        self.cnn = EfficientNet.from_pretrained('efficientnet-b7')
        self.cnn = nn.Sequential(*list(self.cnn.children())[:-2])
        if Freeze_base:
            if layers_freeze == None:
                for p in self.cnn.parameters():
                    p.requires_grad = False
            else:
                c = 0
                for p in self.cnn.parameters():
                    c+=1
                    if c < layers_freeze:
                        p.requires_grad = False
                    else:
                        p.requires_grad = True
                        

        self.fc = ClassifierNew(2048, 1024, 4, 0.35)
    def forward(self, input):
        x = self.cnn(input)
        x = self.fc(x)
        return x
class EfficientNet_NeuralNet(nn.Module):
    def __init__(self, pretrained = True, Freeze_base = False, layers_freeze = None):
        super(EfficientNet_NeuralNet, self).__init__()
        
        self.cnn = EfficientNet.from_pretrained('efficientnet-b7')
        self.cnn._avg_pooling = nn.Identity()
        self.cnn._dropout = nn.Identity()
        self.cnn._swish = nn.Identity()
        if Freeze_base:
            if layers_freeze == None:
                for p in self.cnn.parameters():
                    p.requires_grad = False
            else:
                c = 0
                for p in self.cnn.parameters():
                    c+=1
                    if c < layers_freeze:
                        p.requires_grad = False
                    else:
                        p.requires_grad = True
        self.fc = ClassifierNew(2560, 1024, 4, 0.35)
        self.cnn._fc = nn.Identity()
    def forward(self, input):
        x = self.cnn.extract_features(input)
        x = self.fc(x)
        return x

In [10]:
from torch.autograd import Variable

class FocalLoss(nn.Module):
    def __init__(self, gamma=0, alpha=None, size_average=True):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        if isinstance(alpha,(float,int)): self.alpha = torch.Tensor([alpha,1-alpha])
        if isinstance(alpha,list): self.alpha = torch.Tensor(alpha)
        self.size_average = size_average

    def forward(self, input, target):
        if input.dim()>2:
            input = input.view(input.size(0),input.size(1),-1)  # N,C,H,W => N,C,H*W
            input = input.transpose(1,2)    # N,C,H*W => N,H*W,C
            input = input.contiguous().view(-1,input.size(2))   # N,H*W,C => N*H*W,C
        target = target.view(-1,1)

        logpt = FT.log_softmax(input)
        logpt = logpt.gather(1,target)
        logpt = logpt.view(-1)
        pt = Variable(logpt.data.exp())

        if self.alpha is not None:
            if self.alpha.type()!=input.data.type():
                self.alpha = self.alpha.type_as(input.data)
            at = self.alpha.gather(0,target.data.view(-1))
            logpt = logpt * Variable(at)

        loss = -1 * (1-pt)**self.gamma * logpt
        if self.size_average: return loss.mean()
        else: return loss.sum()

In [12]:
class F1_Loss(nn.Module):

    def __init__(self, epsilon=1e-7):
        super().__init__()
        self.epsilon = epsilon
        
    def forward(self, y_pred, y_true,):
        assert y_pred.ndim == 2
        assert y_true.ndim == 1
        y_true_unique = torch.tensor([0,1,2,3])
        w = torch.stack([(y_true==x_u).sum() for x_u in y_true_unique])

        y_true = FT.one_hot(y_true, 4).to(torch.float32)
        y_pred = FT.softmax(y_pred, dim=1)


        tp = (y_true * y_pred).sum(dim=0).to(torch.float32)
        tn = ((1 - y_true) * (1 - y_pred)).sum(dim=0).to(torch.float32)
        fp = ((1 - y_true) * y_pred).sum(dim=0).to(torch.float32)
        fn = (y_true * (1 - y_pred)).sum(dim=0).to(torch.float32)

        precision = tp / (tp + fp + self.epsilon)
        recall = tp / (tp + fn + self.epsilon)

        f1 = 2* (precision*recall) / (precision + recall + self.epsilon)
        f1 = f1.clamp(min=self.epsilon, max=1-self.epsilon)
        return 1 - ((f1 * w).sum(dim = 0))/(w.sum(dim = 0))
        # return 1 - f1.mean()


In [13]:
# logits = torch.from_numpy(np.asarray([[1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0]]))
# targets = torch.from_numpy(np.asarray([0,2])).to(torch.int64)
# loss = F1_Loss()(logits, targets)
# print(loss)

In [14]:
from sklearn.metrics import f1_score
def criterion(logits, targets):
    return FocalLoss()(logits, targets)
    # return nn.CrossEntropyLoss(weight = cls_wts)(logits, targets)
    # return nn.CrossEntropyLoss()(logits, targets)
    # return F1_Loss()(logits, targets)



def get_score(submission, solution):
    y_pred = submission.Emotion_id.values
    y_true = solution.Emotion_id.values
    return 100 * (f1_score(y_pred, y_true, average = "weighted"))

In [15]:
from sklearn.metrics import confusion_matrix


def train_epoch(loader, optimizer):
    model.train()
    bar = tqdm(loader)
    train_loss = []
    
    for (data, data_orig, targets) in bar:
        optimizer.zero_grad()
        data, data_orig, targets = data.to(device), data_orig.to(device), targets.to(device)
        
        loss_func = criterion

        cutmix_threshold = random.uniform(0,1)
        
        logits = model(data)
        
        loss = loss_func(logits, targets)
        
        loss.backward()
        optimizer.step()
        
        loss_np = loss.detach().cpu().numpy()
        train_loss.append(loss_np)
        smooth_loss = sum(train_loss[-20:])/min(len(train_loss) ,20)
        
        bar.set_description('loss: %.5f, smth: %.5f' % (loss_np, smooth_loss))
        
    return train_loss

def val_epoch(loader, get_output = False):
    model.eval()
    val_loss = []
    outputs = []
    LOGITS = []
    acc = 0
    pred = []
    with torch.no_grad():
        for (data, data_orig, target) in tqdm(loader):
            data, data_orig, target = data.to(device), data_orig.to(device), target.to(device)
            logits = model(data)
            loss = criterion(logits, target)
            pred = logits.argmax(1).detach()
            outputs.append(pred)
            acc += (target == pred).sum().detach().cpu().numpy()
            if get_output:
                LOGITS.append(logits)
            val_loss.append(loss.cpu().numpy())
        val_loss = np.mean(val_loss)
        acc /= len(dataset_valid)
    solution = df.iloc[valid_idx]
    preds = torch.cat(outputs).cpu().numpy()
    score = get_score(pd.DataFrame({"Emotion_id": preds}),pd.DataFrame({"Emotion_id": solution.Emotion_id.values}) )
    if DEBUG:
        print()
        print(confusion_matrix(solution.Emotion_id.values, preds, ))
        print()
    if get_output:
        LOGITS = torch.cat(LOGITS).detach().cpu().numpy()
        return LOGITS
    else:
        return val_loss, acc, score

In [17]:
for i in range(n_folds):
    print(df[df.fold == i].Emotion_id.value_counts())

3    19
2    19
1    19
0    18
Name: Emotion_id, dtype: int64
3    19
2    19
1    19
0    18
Name: Emotion_id, dtype: int64
2    19
1    19
0    19
3    18
Name: Emotion_id, dtype: int64
3    19
0    19
2    18
1    18
Name: Emotion_id, dtype: int64
3    19
2    19
1    18
0    18
Name: Emotion_id, dtype: int64


In [16]:
DEBUG = True

record = [{'train_loss': [], 'val_loss': [], "score": []} for x in range(n_folds)]


i_fold = fold
folds = [i for i in range(n_folds)]
train_idx, valid_idx = np.where((df['fold'] != i_fold))[0], np.where((df['fold'] == i_fold))[0]

train_folds = []
val_folds = []
for i in range(n_folds):
    if i == fold:
        val_folds.append(i)
    else:
        train_folds.append(i)

dataset_train = Dataset("train", df, train_folds,"train",transforms_train, transforms_orig )
dataset_valid = Dataset("train", df, val_folds,"validation",transforms_val, transforms_orig )


train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size, shuffle=True,num_workers=num_workers)
valid_loader = torch.utils.data.DataLoader(dataset_valid, batch_size=batch_size, shuffle=False, sampler=None, num_workers=num_workers)

model = EfficientNet_NeuralNet(pretrained= True, Freeze_base= FREEZE, layers_freeze = freeze_layers)
model = model.to(device)

max_score = 0
model_file = f'{kernel_type}_best_fold{i_fold}.pth'
# model.load_state_dict(torch.load("./resnet-101_base_freeze_gridmask_augmix_fulltrain_best_fold0_epoch60.pth"))

# optimizer = torch.optim.SGD(model.parameters(), lr = init_lr, momentum = 0.9, )
# optimizer = torch.optim.Adadelta(model.parameters(), lr = init_lr )
optimizer = torch.optim.Adam(model.parameters(), lr = init_lr)




Loaded pretrained weights for efficientnet-b7


In [18]:
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10, verbose=False, threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08)

lmbda = lambda epoch: 0.95
scheduler_mul = torch.optim.lr_scheduler.MultiplicativeLR(optimizer, lr_lambda=lmbda)
scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 120,  last_epoch= -1)
print('Training All Layers...\n')

for epoch in range(1, n_epochs+1):
    print(time.ctime(), 'Epoch:', epoch)
    
    scheduler_cosine.step(epoch-1)
    train_loss = train_epoch(train_loader, optimizer)
    
    # val_loss, acc, score = val_epoch(valid_loader)
    
    # scheduler.step(val_loss)
    # content = time.ctime() + ' ' + f'Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {np.mean(train_loss):.5f}, val loss: {np.mean(val_loss):.5f}, acc: {(acc):.5f}, score: {(score):.6f}'
    
    
    content = time.ctime() + ' ' + f'Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {np.mean(train_loss):.5f}'
    
    scheduler_mul.step()

    print(content)
    with open(f'log_{kernel_type}.txt', 'a') as appender:
        appender.write(content + '\n')
        
    if (epoch+1) % 125==0 or (epoch+1)%120 == 0:
        print(f"Saving model on epoch {epoch+1}")
        torch.save(model.state_dict(), f'{kernel_type}_best_fold{i_fold}_epoch{epoch+1}.pth')

    # if score > max_score:
    #     print('score ({:.6f} --> {:.6f}).  Saving model ...'.format(max_score, score))
    #     torch.save(model.state_dict(), f'{kernel_type}_best_fold{i_fold}.pth')
    #     max_score = score
        
    # record[i_fold]['train_loss'].append(np.mean(train_loss))
    # record[i_fold]['val_loss'].append(val_loss)
    # record[i_fold]['score'].append(score)
    
torch.save(model.state_dict(), os.path.join(f'{kernel_type}_model_fold{i_fold}.pth'))

2/12 [00:12<00:00,  1.07s/it]
  0%|          | 0/12 [00:00<?, ?it/s]Fri Apr 24 22:00:29 2020 Epoch 291, lr: 0.0062941, train loss: 0.86568
Fri Apr 24 22:00:29 2020 Epoch: 292
loss: 0.93414, smth: 0.87541: 100%|██████████| 12/12 [00:12<00:00,  1.06s/it]
  0%|          | 0/12 [00:00<?, ?it/s]Fri Apr 24 22:00:42 2020 Epoch 292, lr: 0.0061672, train loss: 0.87541
Fri Apr 24 22:00:42 2020 Epoch: 293
loss: 0.83891, smth: 0.88185: 100%|██████████| 12/12 [00:12<00:00,  1.06s/it]
  0%|          | 0/12 [00:00<?, ?it/s]Fri Apr 24 22:00:55 2020 Epoch 293, lr: 0.0060396, train loss: 0.88185
Fri Apr 24 22:00:55 2020 Epoch: 294
loss: 0.93414, smth: 0.90651: 100%|██████████| 12/12 [00:13<00:00,  1.09s/it]
  0%|          | 0/12 [00:00<?, ?it/s]Fri Apr 24 22:01:08 2020 Epoch 294, lr: 0.0059112, train loss: 0.90651
Fri Apr 24 22:01:08 2020 Epoch: 295
loss: 0.88653, smth: 0.85294: 100%|██████████| 12/12 [00:12<00:00,  1.06s/it]
  0%|          | 0/12 [00:00<?, ?it/s]Fri Apr 24 22:01:21 2020 Epoch 295, lr: 