lumina37 · lumina37 · Jan 20, 2023 · Jan 20, 2023 · Jan 20, 2023
diff --git a/.gitignore b/.gitignore
@@ -18,3 +18,6 @@ traditional_method.py
 
 !models/1
 !models/1/**
+
+__pycache__
+.ipynb_checkpoints
diff --git a/config.yaml b/config.yaml
@@ -6,16 +6,15 @@ dataset:
   test_ratio: 0.05
 
 train:
-  lr: 0.0008
+  lr: 0.0004
   lr_scheduler:
     T_0: 5
     T_mult: 2
   loss:
     lambda_cos: 0.24
     exponent: 2
-  epoches: 35
-  batch_size: 48
-  steps: 135
+  epoches: 3000
+  batch_size: 128
 
 evaluate:
-  batch_size: 64
+  batch_size: 128
diff --git a/models/1/34.pth b/models/1/34.pth
diff --git a/models/1/eval_loss.png b/models/1/eval_loss.png
diff --git a/models/1/lr.png b/models/1/lr.png
diff --git a/models/1/train.log b/models/1/train.log
diff --git a/models/1/train_loss.png b/models/1/train_loss.png
diff --git a/rotate_captcha_crack/model.py b/rotate_captcha_crack/model.py
@@ -20,19 +20,27 @@ def __init__(self, train: bool = True) -> None:
         self.backbone = models.regnet_x_1_6gf(pretrained=train)
 
         fc_channels = self.backbone.fc.in_features
-        self.backbone.fc = nn.Linear(fc_channels, 1)
+        self.fc0 = nn.Linear(fc_channels, fc_channels)
+        self.act = nn.LeakyReLU()
+        self.fc1 = nn.Linear(fc_channels, 1)
+        del self.backbone.fc
 
         if train:
-            nn.init.normal_(self.backbone.fc.weight, mean=0.0, std=0.01)
-            nn.init.zeros_(self.backbone.fc.bias)
+            nn.init.normal_(self.fc0.weight, mean=0.0, std=0.01)
+            nn.init.normal_(self.fc1.weight, mean=0.0, std=0.01)
+            nn.init.zeros_(self.fc0.bias)
+            nn.init.zeros_(self.fc1.bias)
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         x = self.backbone.stem(x)
         x = self.backbone.trunk_output(x)
 
         x = self.backbone.avgpool(x)
         x = x.flatten(start_dim=1)
-        x = self.backbone.fc(x)
+
+        x = self.fc0(x)
+        x = self.act(x)
+        x = self.fc1(x)
 
         x.squeeze_(dim=1)
         return x
diff --git a/train.py b/train.py
@@ -1,4 +1,5 @@
 import time
+import os
 from pathlib import Path
 
 import numpy as np
@@ -10,7 +11,6 @@
 
 batch_size: int = CONFIG['train']['batch_size']
 epoches: int = CONFIG['train']['epoches']
-steps: int = CONFIG['train']['steps']
 lr: float = CONFIG['train']['lr']
 lambda_cos: float = CONFIG['train']['loss']['lambda_cos']
 exponent: float = CONFIG['train']['loss']['exponent']
@@ -39,10 +39,14 @@
 lr_vec = np.empty(epoches, dtype=np.float64)
 train_loss_vec = np.empty(epoches, dtype=np.float64)
 eval_loss_vec = np.empty(epoches, dtype=np.float64)
+best_eval_loss = 10000000.0
+previous_checkpoint_path = None
 
 for epoch_idx in range(epoches):
     model.train()
     total_train_loss: float = 0
+    steps = 0
+
     for step_idx, (source, target) in enumerate(train_dataloader):
         source: torch.Tensor = source.to(device)
         target: torch.Tensor = target.to(device)
@@ -53,13 +57,11 @@
         loss.backward()
         total_train_loss += loss.cpu().item()
         optmizer.step()
-
-        if step_idx + 1 == steps:
-            break
+        steps += 1
 
     scheduler.step()
     lr_vec[epoch_idx] = scheduler.get_last_lr()[0]
-
+    
     train_loss = total_train_loss / steps
     train_loss_vec[epoch_idx] = train_loss
 
@@ -81,9 +83,16 @@
     LOG.info(
         f"Epoch#{epoch_idx}. time_cost: {time.time()-start_time:.2f} s. train_loss: {train_loss:.8f}. eval_loss: {eval_loss:.4f} degrees"
     )
-
-    if epoch_idx >= epoches / 2:
-        torch.save(model.state_dict(), str(model_dir / f"{epoch_idx}.pth"))
+
+    torch.save(model.state_dict(), str(model_dir / "last.pth"))
+    if eval_loss < best_eval_loss:
+        best_eval_loss = eval_loss
+        new_checkpoint_path = str(model_dir / f"{epoch_idx}_{eval_loss:.4f}.pth")
+        torch.save(model.state_dict(), new_checkpoint_path)
+        if previous_checkpoint_path is not None:
+            os.remove(previous_checkpoint_path)
+
+        previous_checkpoint_path = new_checkpoint_path
 
 x = np.arange(epoches, dtype=np.int16)