<a href="https://colab.research.google.com/github/Re14m/isk/blob/master/2022_0223_digit_recognizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Google Driveをマウント
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
#Kaggle APIのインストール
!pip install kaggle



In [3]:
#jsonで認証
from google.colab import files
uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

# Then move kaggle.json into the folder where the API expects to find it.
!mkdir -p ~/.kaggle/ && mv kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json

Saving kaggle.json to kaggle.json
User uploaded file "kaggle.json" with length 62 bytes


In [4]:
#datasetダウンロード
!kaggle competitions download -c digit-recognizer

Downloading train.csv.zip to /content
 76% 7.00M/9.16M [00:00<00:00, 62.1MB/s]
100% 9.16M/9.16M [00:00<00:00, 54.3MB/s]
Downloading sample_submission.csv to /content
  0% 0.00/235k [00:00<?, ?B/s]
100% 235k/235k [00:00<00:00, 109MB/s]
Downloading test.csv.zip to /content
 82% 5.00M/6.09M [00:00<00:00, 49.8MB/s]
100% 6.09M/6.09M [00:00<00:00, 49.2MB/s]


In [7]:
#zip解凍
!unzip train.csv.zip
!unzip test.csv.zip

Archive:  train.csv.zip
  inflating: train.csv               
Archive:  test.csv.zip
  inflating: test.csv                


In [5]:
# ライブラリをインポート
import  numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from tqdm import tqdm
import seaborn as sns
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")

import torch
import torch.nn as nn
import torchvision
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms

In [8]:
# CSVファイルを読み込み、正解ラベルのデータを分ける
data = pd.read_csv('train.csv', dtype = np.float32)
labels = data.pop('label').astype('int64')

In [9]:
# 0～255のピクセルデータを0～1に変換
data = data.to_numpy() / 255.0
labels = labels.to_numpy()

In [10]:
data = data.reshape(-1, 28, 28, 1)
labels = labels.reshape(-1,1)
print(labels.shape)

(42000, 1)


In [11]:
# 訓練データと評価データに分ける
x_train, x_val, y_train, y_val = train_test_split(data, labels, test_size=0.2)
print(f'x_train.shape: {x_train.shape}, x_val.shape: {x_val.shape}')

x_train.shape: (33600, 28, 28, 1), x_val.shape: (8400, 28, 28, 1)


In [12]:
# カスタムデータセットの定義
class MNISTDataset(Dataset):
        
    def __init__(self, images, labels, transform = None):
        """Method to initilaize variables.""" 
        self.images = images
        self.labels = labels
        self.transform = transform

    def __getitem__(self, index):
        label = self.labels[index]
        image = self.images[index]
        
        if self.transform is not None:
            image = self.transform(image)
        image = image.repeat(3, 1, 1)
        return image, label

    def __len__(self):
        return len(self.images)

In [13]:
# データを0～1のテンソルに変換
train_set = MNISTDataset(x_train, y_train, transform=transforms.Compose([transforms.ToTensor()]))
val_set = MNISTDataset(x_val, y_val, transform=transforms.Compose([transforms.ToTensor()]))
all_data = MNISTDataset(data, labels, transform=transforms.Compose([transforms.ToTensor()]))

In [14]:
train_loader = DataLoader(train_set, batch_size=32)
val_loader = DataLoader(val_set, batch_size=32)
all_data_loader = DataLoader(all_data, batch_size=32)

In [15]:
# GPUが使えればGPUを使用
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [16]:
learning_rate = 0.001
num_classes = 10
num_epochs = 10

In [17]:
# TorchvisionからResNet-18（畳み込みニューラル ネットワーク）モデルをダウンロード
model = torchvision.models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

In [18]:
# 全結合を行う3層構造のニューラルネットワークを生成
model.fc = nn.Linear(num_ftrs, num_classes)

In [19]:
model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
criterion = nn.CrossEntropyLoss()

In [None]:
# 全てのパラメータが最適化されることを観察
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

In [None]:
# ７エポックごとにLRを0.1ずつ減らす
# LR range test：初期学習率を決める手段で、ある幅で学習率を徐々に増加させながらAccuracyないしLossを観察し決定する手法
exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [None]:
# 損失関数と最適化アルゴリズムを生成
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [None]:
# 学習率を更新する
def update_lr(optimizer, lr):
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [None]:
# 学習する
total_step = len(train_loader)
curr_lr = learning_rate
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass（順伝搬：初期の入力を層ごとに処理して出力に向けて送ること）
        outputs = model(images)
        loss = criterion(outputs, labels.flatten())

        # Backward and optimize（逆伝播と最適化を行う）
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i + 1) % 300 == 0:
            print(f'Epoch: {epoch + 1}/{num_epochs}, Loss: {loss.item()}')

Epoch: 1/10, Loss: 0.2998724579811096
Epoch: 1/10, Loss: 0.319784015417099
Epoch: 1/10, Loss: 0.15973897278308868
Epoch: 2/10, Loss: 0.02989523485302925
Epoch: 2/10, Loss: 0.13289044797420502
Epoch: 2/10, Loss: 0.06090101599693298
Epoch: 3/10, Loss: 0.013707166537642479
Epoch: 3/10, Loss: 0.061143454164266586
Epoch: 3/10, Loss: 0.021016106009483337
Epoch: 4/10, Loss: 0.007110942155122757
Epoch: 4/10, Loss: 0.028975604102015495
Epoch: 4/10, Loss: 0.009665836580097675
Epoch: 5/10, Loss: 0.004808831959962845
Epoch: 5/10, Loss: 0.018560420721769333
Epoch: 5/10, Loss: 0.005073982756584883
Epoch: 6/10, Loss: 0.003653968684375286
Epoch: 6/10, Loss: 0.013183138333261013
Epoch: 6/10, Loss: 0.0032614616211503744
Epoch: 7/10, Loss: 0.0028419967275112867
Epoch: 7/10, Loss: 0.009468715637922287
Epoch: 7/10, Loss: 0.002478603273630142
Epoch: 8/10, Loss: 0.002291696146130562
Epoch: 8/10, Loss: 0.006992284674197435
Epoch: 8/10, Loss: 0.0020673214457929134
Epoch: 9/10, Loss: 0.0018868263578042388
Epoch

In [None]:
# 評価する
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in val_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels.flatten()).sum()

        print(f'Test acc: {100 * correct / total}')

Test acc: 100.0
Test acc: 98.4375
Test acc: 98.95833587646484
Test acc: 98.4375
Test acc: 98.75
Test acc: 98.4375
Test acc: 98.21428680419922
Test acc: 98.046875
Test acc: 98.2638931274414
Test acc: 98.4375
Test acc: 98.57955169677734
Test acc: 98.69792175292969
Test acc: 98.55769348144531
Test acc: 98.66072082519531
Test acc: 98.54167175292969
Test acc: 98.4375
Test acc: 98.34558868408203
Test acc: 98.4375
Test acc: 98.51973724365234
Test acc: 98.4375
Test acc: 98.21428680419922
Test acc: 98.29545593261719
Test acc: 98.23369598388672
Test acc: 98.30729675292969
Test acc: 98.125
Test acc: 98.0769271850586
Test acc: 98.14814758300781
Test acc: 98.21428680419922
Test acc: 98.27586364746094
Test acc: 98.22917175292969
Test acc: 98.08467102050781
Test acc: 98.046875
Test acc: 98.10606384277344
Test acc: 98.1617660522461
Test acc: 98.21428680419922
Test acc: 98.2638931274414
Test acc: 98.22635650634766
Test acc: 98.19078826904297
Test acc: 98.2371826171875
Test acc: 98.28125
Test acc: 98.32

In [None]:
test_data = pd.read_csv('test.csv', dtype=np.float32)
test_data = test_data.to_numpy() / 255.0
test_data = test_data.reshape(-1, 28, 28, 1)  # reshapeの一つのサイズが決まっているとき、もう一方を-1とすると、-1には元の形

In [None]:
test_tensor = torch.from_numpy(test_data).permute(0, 3, 1, 2)  # numpy ndarrayからPytorch tensorに変換し、次元の入れ替え(permute)
test_tensor = test_tensor.repeat(1, 3, 1, 1)  # データセットの繰り返し

In [None]:
images = test_tensor.to(device)
outputs = model(images)
_, predictions = torch.max(outputs, 1)  # 配列の最大値の要素を返す

In [None]:
predictions = predictions.cpu()
submission = pd.DataFrame({'ImageId': np.arange(1, (predictions.size(0) + 1)), 'Label': predictions})
submission.to_csv("submission.csv", index = False)
print("# 終了 #")

# 終了 #
