# 读入数据集

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl
import torchmetrics as tm

In [4]:
train_origin = pd.read_csv('./MNIST/data/train.csv')
test = pd.read_csv('./MNIST/data/test.csv')
print(train_origin.shape, test.shape)

# split the data into train and validation
train, val = train_test_split(train_origin, test_size=0.05, random_state=42)
print(train.shape, val.shape)

(42000, 785) (28000, 784)
(39900, 785) (2100, 785)


# 创建Dataset, Dataloader

In [5]:
# Dataset with no augmentation
# for training
class MNISTDataset(Dataset):
    def __init__(self, df, X_col, y_col):
        self.features = df[X_col].values / 255
        self.target = df[y_col].values.reshape((-1, 1))

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        image = self.features[idx].reshape((1, 28, 28))
        label = self.target[idx]
        return torch.FloatTensor(image), torch.FloatTensor(label)

# for inference
class MNISTInferenceDataset(Dataset):
    def __init__(self, df, X_col):
        self.features = df[X_col].values

    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        image = self.features[idx].reshape((1, 28, 28))
        return torch.FloatTensor(image)

In [6]:
y_col = 'label'
X_col = [col for col in train.columns if col != 'label']

training_set = MNISTDataset(train, X_col, y_col)
validation_set = MNISTDataset(val, X_col, y_col)
test_set = MNISTInferenceDataset(test, X_col)

# 定义模型

In [7]:
class Classifier(pl.LightningModule):
    def __init__(self, hparams):
        super().__init__()

        self.params = hparams
        self.num_classes = self.params['num_classes']
        self.lr = self.params['lr']
        self.batch_size = self.params['batch_size']
        self.weight_decay = self.params['weight_decay']
        self.train_acc = tm.Accuracy()
        self.val_acc = tm.Accuracy()

        # define the model
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=(5,5), padding=0) # 128
        # activation
        self.actv = nn.LeakyReLU() # ReLU, LeakyReLU, PReLU, ELU, SELU, Tanh
        # Batch normalization 1
        self.batchnorm1 = nn.BatchNorm2d(32)
        # Max pool: down sample the detected features in feature maps
        self.maxpool = nn.MaxPool2d(kernel_size=(2,2)) # 2
        # Dropout
        self.dropout = nn.Dropout(0.25) 
        # Convolution
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(5,5), padding=0) # 64
        # Batch normalization 2
        self.batchnorm2 = nn.BatchNorm2d(64)        
        # flatten the feature map: reduce dimensionality
        self.flatten = nn.Flatten()
        # Fully connected
        self.fc1 = nn.Linear(64 * 4 * 4, 256)
        # Batch normalization 3
        self.batchnorm3 = nn.BatchNorm1d(256)  # 1 D because it is called after the flatten layer
        # The last fully connected layer must output the number of classes
        self.classifier = nn.Linear(256, 10)
    
    def forward(self, x):
        # conv1 block
        x = self.conv1(x)
        x = self.actv(x)
        x = self.batchnorm1(x)
        x = self.maxpool(x)
        # conv2 block
        x = self.conv2(x)
        x = self.actv(x)
        x = self.batchnorm2(x)
        x = self.maxpool(x)
        # flatten
        x = self.flatten(x)
        # Linear functions
        x = self.fc1(x)
        x = self.batchnorm3(x)
        out = self.classifier(x)
        return out

    def configure_optimizers(self):
        optimzer = torch.optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)
        return optimzer

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y.long().squeeze())
        
        # _, preds = torch.max(y_hat, dim=1)
        # acc = torch.sum(preds == y.data) / (y.shape[0] * 1.0)
        # return {'loss': loss, 'train_acc': acc}
        self.train_acc(y_hat, y.long().squeeze())
        self.log('step', self.trainer.current_epoch)
        self.log('train_acc', self.train_acc)
        self.log('loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        # _, preds = torch.max(y_hat, dim=1)
        # acc = torch.sum(preds == y.data) / (y.shape[0] * 1.0)
        # return {'val_acc': acc}
        self.val_acc(y_hat, y.long().squeeze())
        self.log('step', self.trainer.current_epoch)
        self.log('val_acc', self.val_acc)

    # def training_epoch_end(self, outputs):
    #     avg_loss = torch.stack([x['loss'] for x in outputs]).mean()
    #     # avg_acc = torch.stack([x['train_acc'] for x in outputs]).mean()
    #     self.log('step', self.trainer.current_epoch)
    #     self.log('train_loss', avg_loss)
    #     # self.log('train_acc', avg_acc)
    #     self.log('train_acc', self.accuracy)
    
    # def validation_epoch_end(self, outputs):
    #     # avg_acc = torch.stack([x['val_acc'] for x in outputs]).mean()
    #     self.log('step', self.trainer.current_epoch)
    #     # self.log('val_acc', avg_acc)
    #     self.log('val_acc', self.accuracy)

    def train_dataloader(self):
        return DataLoader(training_set, batch_size=self.batch_size, shuffle=True)

    def val_dataloader(self):
        return DataLoader(validation_set, batch_size=self.batch_size, shuffle=False)

    def test_dataloader(self):
        return DataLoader(test_set, batch_size=self.batch_size, shuffle=False)

# 训练并预测

In [8]:
# !rmdir /s/q lightning_logs

In [9]:
hparams = {
    'num_classes': 10,
    'lr': 0.01,
    'batch_size': 128,
    'weight_decay': 0
}

model = Classifier(hparams)
trainer = pl.Trainer(gpus=1, max_epochs=50, check_val_every_n_epoch=1)
trainer.fit(model)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name       | Type        | Params
--------------------------------------------
0  | train_acc  | Accuracy    | 0     
1  | val_acc    | Accuracy    | 0     
2  | conv1      | Conv2d      | 832   
3  | actv       | LeakyReLU   | 0     
4  | batchnorm1 | BatchNorm2d | 64    
5  | maxpool    | MaxPool2d   | 0     
6  | dropout    | Dropout     | 0     
7  | conv2      | Conv2d      | 51.3 K
8  | batchnorm2 | BatchNorm2d | 128   
9  | flatten    | Flatten     | 0     
10 | fc1        | Linear      | 262 K 
11 | batchnorm3 | BatchNorm1d | 512   
12 | classifier | Linear      | 2.6 K 
--------------------------------------------
317 K     Trainable params
0         Non-trainable params
317 K     Total params
1.271     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

In [10]:
predictions = trainer.predict(model, model.test_dataloader())

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Predicting: 312it [00:00, ?it/s]

In [13]:
preds = []
for batch in predictions:
  preds.extend(batch.argmax(dim=-1).numpy())

submission = pd.read_csv("./MNIST/data/sample_submission.csv")
submission.shape
submission["Label"] = preds

submission.to_csv('submission.csv', index = False)
submission.head()

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,9
4,5,3
