## Baseline for DL


### 数据
- 区分本地/Kaggle/Colab
- 数据集导入加载
- 数据预处理

### 模型
- Pre-Trained Model, Pre-Processing & Post-Processing
- 常见模型：
    - ResNet
    - Transformer
    - Bert
    - Auto-Encoder
    - GAN
    - Self-Attention
    - LSTM
    - VAE
- 常见训练手段： 
    - Automatic Mixed Precision
    - Gradient accumulation
    - Learning Rate Decay
    - 手动L1,L2正则化
- 常见可视化手段： 
    - loss-curve & acc-curve


借鉴：
- [深度学习框架]PyTorch常用代码段 - Jack Stark的文章 - 知乎 https://zhuanlan.zhihu.com/p/104019160

- Kaggle知识点：深度学习代码规范 - 阿水的文章 - 知乎 https://zhuanlan.zhihu.com/p/399681621

- 【深度学习】深度学习手写代码汇总（建议收藏，面试用） - 机器<em>学习</em>社区的文章 - 知乎 https://zhuanlan.zhihu.com/p/404749749

- 一文搞定深度学习中的规范化BN,LN,IN,GN,CBN - 凡心所向素履所往的文章 - 知乎   https://zhuanlan.zhihu.com/p/115949091

- 深度强化学习库的设计思想（还没写完） - 曾伊言的文章 - 知乎  https://zhuanlan.zhihu.com/p/343559335

In [1]:
config = {
    'task_name': 'classifier',

    'env': 'local',
    # 'env': 'kaggle',
    # 'env': 'colab',

    'use_image': False,

    'is_renew': False,
    'is_train': False,
    'is_valid': False,
    'is_test': False,
    'is_early_stop': False,
    'is_do_semi': False,


    'epoches' : 1,
    'early_stop': 5,

    'max_steps': 100000,
    'train_steps': 1000,
    'log_steps': 100,
    'valid_steps': 100,
    'test_steps': 100,
    'warmup_steps': 10000,
    'decay_steps': 50000,

    'dataset_dir': './data/',
    'train_path': './data/',
    'valid_path': './data/',
    'test_path': './data',
    
    'valid_ratio': 0.15,

    'learning_rate': 1e-3,
    'weight_decay_l1': 1e-4,
    'weight_dacay_l2': 1e-5,

    'momentum': 0.9,
    'batch_size': 16,
    'n_workers': 0,


    'load_model_path': './checkpoint/',
    'save_model_path': './checkpoint/',

    'fp16_training': False,
}


In [None]:
# import Packages
import torch, os, sys, csv, json, random
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt


from torch.utils.data import Dataset, Dataloader, Subset, ConcatDataset
from torch.autograd import Variable
from pathlib import Path

from torch.nn.utils.rnn import pad_sequence

from tdqm.auto import tqdm

if config['use_image']:
    from torchvision.datasets import DatasetFolder
    from PIL import Image
    from torchvision.transforms import transforms

In [None]:
if config['env'] == 'kaggle':
    !pip install transforms

    task_path = '/kaggle/working/' + config['task_name']
    if not os.path.exists(task_path):
        os.makedirs(task_path)
    
    !pwd
    !ls

elif config['env'] == 'colab':
    !pip install transforms

    from google.drive import drive
    task_path = '/content/drive/MyDrive/'+config['task_name']
    if not os.path.exists(task_path):
        os.makedirs(task_path)
    drive.mount(task_path)

    if not os.path.exists(task_path + '/data'):
        !gdown 
        !unzip


    !pwd
    !ls


import transforms


In [None]:
# Common Functions
def refresh_gpu_cache():
    return torch.cuda.empty_cache()


def random_seed(seed=0):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


def get_device():
    return torch.device('cuda' if torch.cuda.is_available() else 'cpu')



random_seed(0)
device = get_device()

if config['fp16_training'] and config['env'] != 'local':
    !pip install accelerate==0.2.0:
    from accelerate import Accelerator
    accelerator = Accelerator(fp16=True)
    device = accelerator.device

In [3]:
# DataSet and Dataloader
class MyDataset(Dataset):
    def __init__(self, path=None, data=None, label=None, 
                transform=None, transform_flag=False, mode='train',
                func=None):
        """
        args: 
            path:    dataset path
            X:       data
            y:       label
            in_dim:  input dim
            out_dim: output dim

        """
        super(MyDataset, self).__init__()

        assert (path is None and data is None ), "Data path and Data are both None"

        
        self.data_dir = path
        self.data = data
        self.label = label

        self.transform = transform
        self.mode = mode




    def __getitem__(self, idx):
        if 
        if mode == 'train':
            
            return self.data[idx], self.label[idx]
        elif mode == 'valid':
            
            return self.data[idx], self.label[idx]
        else:
            
            return self.data[idx]


    def __len__(self):
        return len(self.data)




'pwd' �����ڲ����ⲿ���Ҳ���ǿ����еĳ���
���������ļ���
