<a href="https://colab.research.google.com/github/SunghoonSeok/Study/blob/master/noval.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Colab 구독 최대한 활용하기



In [1]:
import os
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [8]:

import cv2
import numpy as np
from matplotlib import pyplot as plt

for i in range(50000):
    image_path = '/content/drive/My Drive/dirty_mnist_/dirty_mnist_2nd/%05d.png'%i
    image = cv2.imread(image_path)
    image2 = np.where((image <= 254) & (image != 0), 0, image)
    image3 = cv2.dilate(image2, kernel=np.ones((2, 2), np.uint8), iterations=1)
    image4 = cv2.medianBlur(src=image3, ksize= 5)
    cv2.imwrite('/content/drive/My Drive/dirty_mnist_/dirty_mnist_clean/%05d.png'%i, image4)

for i in range(50000,55000):
    image_path = '/content/drive/My Drive/dirty_mnist_/test_dirty_mnist_2nd/%05d.png'%i
    image = cv2.imread(image_path)
    image2 = np.where((image <= 254) & (image != 0), 0, image)
    image3 = cv2.dilate(image2, kernel=np.ones((2, 2), np.uint8), iterations=1)
    image4 = cv2.medianBlur(src=image3, ksize= 5)
    cv2.imwrite('/content/drive/My Drive/dirty_mnist_/test_dirty_mnist_clean/%05d.png'%i, image4)



In [17]:
import os
from typing import Tuple, Sequence, Callable
import csv
import cv2
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.optim as optim
from torch import nn, Tensor
from torch.utils.data import Dataset, DataLoader
# from torchinfo import summary

from torchvision import transforms, utils
from torchvision.models import resnet50
from skimage import io, transform

class MnistDataset(Dataset):
    def __init__(self,dir: os.PathLike,image_ids: os.PathLike,transforms: Sequence[Callable]) -> None:
        self.dir = dir
        self.transforms = transforms

        self.labels = {}
        with open(image_ids, 'r') as f:
            reader = csv.reader(f)
            next(reader)
            for row in reader:
                self.labels[int(row[0])] = list(map(int, row[1:]))

        self.image_ids = list(self.labels.keys())

    def __len__(self) -> int:
        return len(self.image_ids)

    def __getitem__(self, index: int) -> Tuple[Tensor]:
        image_id = self.image_ids[index]
        image = Image.open(
            os.path.join(
                self.dir, f'{str(image_id).zfill(5)}.png')).resize((128,128)).convert('RGB')
        target = np.array(self.labels.get(image_id)).astype(np.float32)

        if self.transforms is not None:
            image = self.transforms(image)

        return image, target

transforms_train = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225]
    )
])

transforms_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225]
    )
])

trainset = MnistDataset('/content/drive/My Drive/dirty_mnist_/dirty_mnist_clean/', '/content/drive/My Drive/dirty_mnist_/dirty_mnist_2nd_answer.csv', transforms_train)
testset = MnistDataset('/content/drive/My Drive/dirty_mnist_/test_dirty_mnist_clean/', '/content/drive/My Drive/dirty_mnist_/sample_submission.csv', transforms_test)

train_loader = DataLoader(trainset, batch_size=128, num_workers=8)
test_loader = DataLoader(testset, batch_size=32, num_workers=4)

class MnistModel(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.resnet = resnet50(pretrained=True)
        self.classifier = nn.Linear(1000, 26)

    def forward(self, x):
        x = self.resnet(x)
        x = self.classifier(x)

        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MnistModel().to(device)
# print(summary(model, input_size=(1, 3, 128, 128), verbose=1))

optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MultiLabelSoftMarginLoss()

num_epochs = 40
model.train()

for epoch in range(num_epochs):
    for i, (images, targets) in enumerate(train_loader):
        optimizer.zero_grad()

        images = images.to(device)
        targets = targets.to(device)

        outputs = model(images)
        loss = criterion(outputs, targets)

        loss.backward()
        optimizer.step()

        if (i+1) % 10 == 0:
            outputs = outputs > 0.5
            acc = (outputs == targets).float().mean()
            torch.save(model.state_dict(), os.path.join('/content/drive/My Drive/dirty_mnist_/checkpoint/', f'pretrained_model{epoch}.pth'))
            print(f'{epoch}: {loss.item():.5f}, {acc.item():.5f}')

submit = pd.read_csv('/content/drive/My Drive/dirty_mnist_/sample_submission.csv')

model.eval()
batch_size = test_loader.batch_size
batch_index = 0
for i, (images, targets) in enumerate(test_loader):
    images = images.to(device)
    targets = targets.to(device)
    outputs = model(images)
    outputs = outputs > 0.5
    batch_index = i * batch_size
    submit.iloc[batch_index:batch_index+batch_size, 1:] = \
        outputs.long().squeeze(0).detach().cpu().numpy()
    
submit.to_csv('/content/drive/My Drive/dirty_mnist_/submission_torch2.csv', index=False)

0: 0.71403, 0.54056
0: 0.69937, 0.54056
0: 0.69390, 0.53125
0: 0.69379, 0.53486
0: 0.68992, 0.54117
0: 0.69495, 0.54778
0: 0.68688, 0.53756
0: 0.68675, 0.55439
0: 0.69036, 0.54387
0: 0.68282, 0.54207
0: 0.69159, 0.54177
0: 0.68101, 0.54117
0: 0.67897, 0.55379
0: 0.68765, 0.54177
0: 0.68182, 0.54688
0: 0.68139, 0.54958
0: 0.67884, 0.54657
0: 0.68079, 0.55379
0: 0.68251, 0.54657
0: 0.68540, 0.54237
0: 0.67930, 0.55919
0: 0.68167, 0.54928
0: 0.67254, 0.56070
0: 0.68224, 0.55589
0: 0.68435, 0.55349
0: 0.67895, 0.55679
0: 0.67511, 0.55980
0: 0.67283, 0.54718
0: 0.67094, 0.55679
0: 0.67316, 0.55499
0: 0.67786, 0.55258
0: 0.66940, 0.55529
0: 0.66766, 0.57422
0: 0.66432, 0.56340
0: 0.66336, 0.56911
0: 0.66800, 0.56611
0: 0.66448, 0.56911
0: 0.66642, 0.56941
0: 0.66321, 0.56520
1: 0.66650, 0.57091
1: 0.66259, 0.56911
1: 0.66486, 0.56581
1: 0.66672, 0.55980
1: 0.65718, 0.57061
1: 0.66870, 0.56190
1: 0.66314, 0.57242
1: 0.66303, 0.58023
1: 0.66955, 0.57242
1: 0.66353, 0.57091
1: 0.66108, 0.57452


In [33]:
import os
from typing import Tuple, Sequence, Callable
import csv
import cv2
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.optim as optim
from torch import nn, Tensor
from torch.utils.data import Dataset, DataLoader
# from torchinfo import summary

from torchvision import transforms, utils
from torchvision.models import resnet50
from skimage import io, transform

class MnistDataset(Dataset):
    def __init__(self,dir: os.PathLike,image_ids: os.PathLike,transforms: Sequence[Callable]) -> None:
        self.dir = dir
        self.transforms = transforms

        self.labels = {}
        with open(image_ids, 'r') as f:
            reader = csv.reader(f)
            next(reader)
            for row in reader:
                self.labels[int(row[0])] = list(map(int, row[1:]))

        self.image_ids = list(self.labels.keys())

    def __len__(self) -> int:
        return len(self.image_ids)

    def __getitem__(self, index: int) -> Tuple[Tensor]:
        image_id = self.image_ids[index]
        image = Image.open(
            os.path.join(
                self.dir, f'{str(image_id).zfill(5)}.png')).resize((128,128)).convert('RGB')
        target = np.array(self.labels.get(image_id)).astype(np.float32)

        if self.transforms is not None:
            image = self.transforms(image)

        return image, target

transforms_train = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225]
    )
])

transforms_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225]
    )
])

trainset = MnistDataset('/content/drive/My Drive/dirty_mnist_/dirty_mnist_2nd/', '/content/drive/My Drive/dirty_mnist_/dirty_mnist_2nd_answer.csv', transforms_train)
testset = MnistDataset('/content/drive/My Drive/dirty_mnist_/test_dirty_mnist_2nd/', '/content/drive/My Drive/dirty_mnist_/sample_submission.csv', transforms_test)

train_loader = DataLoader(trainset, batch_size=128, num_workers=8)
test_loader = DataLoader(testset, batch_size=32, num_workers=4)

class MnistModel(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.resnet = resnet50(pretrained=True)
        self.classifier = nn.Linear(1000, 26)

    def forward(self, x):
        x = self.resnet(x)
        x = self.classifier(x)

        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MnistModel().to(device)
# print(summary(model, input_size=(1, 3, 128, 128), verbose=1))

optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MultiLabelSoftMarginLoss()

num_epochs = 30
model.train()

for epoch in range(num_epochs):
    for i, (images, targets) in enumerate(train_loader):
        optimizer.zero_grad()

        images = images.to(device)
        targets = targets.to(device)

        outputs = model(images)
        loss = criterion(outputs, targets)

        loss.backward()
        optimizer.step()

        if (i+1) % 10 == 0:
            outputs = outputs > 0.5
            acc = (outputs == targets).float().mean()
            torch.save(model.state_dict(), os.path.join('/content/drive/My Drive/dirty_mnist_/checkpoint/', f'pretrained_model2_{epoch}.pth'))
            print(f'{epoch}: {loss.item():.5f}, {acc.item():.5f}')

submit = pd.read_csv('/content/drive/My Drive/dirty_mnist_/sample_submission.csv')
# model2 = MnistModel().to(device)
# model2.load_state_dict(torch.load('/content/drive/My Drive/dirty_mnist_/checkpoint/pretrained_model19.pth'))
model.eval()
batch_size = test_loader.batch_size
batch_index = 0
for i, (images, targets) in enumerate(test_loader):
    images = images.to(device)
    targets = targets.to(device)
    outputs = model(images)
    outputs = outputs > 0.5
    batch_index = i * batch_size
    submit.iloc[batch_index:batch_index+batch_size, 1:] = \
        outputs.long().squeeze(0).detach().cpu().numpy()
    
submit.to_csv('/content/drive/My Drive/dirty_mnist_/submission_torch7.csv', index=False)

KeyboardInterrupt: ignored

In [7]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Fri Feb 12 02:37:20 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.39       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    23W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

메모장에서 GPU를 사용하려면 런타임 &#62; 런타임 유형 변경 메뉴를 선택한 다음 하드웨어 가속기 드롭다운을 GPU로 설정하세요.

## 추가 메모리

<p>Colab Pro를 구독하면 사용 가능한 경우 고용량 메모리 VM에 액세스할 수 있습니다. 고용량 메모리 런타임을 사용하도록 메모장 환경설정을 지정하려면 런타임 &#62; '런타임 유형 변경' 메뉴를 선택한 다음 런타임 구성 드롭다운에서 고용량 RAM을 선택하세요.</p>
<p>다음 코드를 실행하여 언제든지 사용 가능한 메모리 용량을 확인할 수 있습니다.</p>

In [None]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('To enable a high-RAM runtime, select the Runtime > "Change runtime type"')
  print('menu, and then select High-RAM in the Runtime shape dropdown. Then, ')
  print('re-execute this cell.')
else:
  print('You are using a high-RAM runtime!')