# 재구축 데이터셋 Scratch



In [22]:
import torch
import torch.utils.data as data
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, datasets, models
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset
from PIL import Image
import numpy as np
from tqdm import tqdm

In [23]:
# 구축된 .npy파일을 Pytorch DataLoader을 사용할 수 있도록 CUSTOM DATASET을 만듬.
import numpy as np
from google.colab import drive
from sklearn.model_selection import train_test_split

default_path = "/content/drive/MyDrive/인공지능 수업/final/"

CUB200_TYPE_TRAIN = 1
CUB200_TYPE_TEST = 2
CUB200_TYPE_SUBMIT = 3

drive.mount('/content/drive')
class CUB200(data.Dataset):

    def __init__(self, type, transform = None):
        super(CUB200, self).__init__()
        """
        type : int = 1, 2, 3
        """

        # train_data = np.load(default_path + 'train_image.npy')
        # train_label = np.load(default_path + 'train_label.npy')

        original_train_data = np.load(default_path + 'train_image.npy')
        original_train_label = np.load(default_path + 'train_label.npy')

        train_data, test_data, train_label, test_label = train_test_split(
            original_train_data,
            original_train_label,
            test_size = 0.3,
            random_state = 1)
        
        if type == CUB200_TYPE_TRAIN:
          self.image = train_data
          self.label = train_label
        elif type == CUB200_TYPE_TEST:
          self.image = test_data
          self.label = test_label
        elif type == CUB200_TYPE_SUBMIT:
          self.image = np.load(default_path + 'test_image.npy')
          self.label = np.zeros(500)
        
        self.transform = transform

    def __getitem__(self, index):
        img, target = self.image[index], self.label[index]
        img = Image.fromarray(img)

        if self.transform is not None:
            img = self.transform(img)

        return img, target

    def __len__(self):
        return len(self.image)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [24]:
trainCUB = CUB200(CUB200_TYPE_TRAIN)
print(trainCUB.image.shape)
print(trainCUB.label.shape)

testCUB = CUB200(CUB200_TYPE_TEST)
print(testCUB.image.shape)
print(testCUB.label.shape)
print(np.max(testCUB.label), np.min(testCUB.label))

submitCUB = CUB200(CUB200_TYPE_SUBMIT)
print(submitCUB.image.shape)

(626, 256, 256, 3)
(626,)
(269, 256, 256, 3)
(269,)
49 0
(500, 256, 256, 3)


In [25]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [26]:
# train_data에만 data augmentaion을 적용
transform_train = transforms.Compose([        
        transforms.Resize(550),
        transforms.RandomCrop(512),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

transform_test = transforms.Compose([
        transforms.Resize(550),
        transforms.RandomCrop(512),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

In [27]:
# CUSTOM DATASET을 이용하여 train_loader, test_loader을 구축

batch_size = 48

train_loader = torch.utils.data.DataLoader(
    dataset = CUB200(CUB200_TYPE_TRAIN, transform = transform_train),
    batch_size = batch_size,
    shuffle = True
)

test_loader = torch.utils.data.DataLoader(
    dataset = CUB200(CUB200_TYPE_TEST, transform = transform_test),
    batch_size = batch_size,
    shuffle = False
)

submit_loader = torch.utils.data.DataLoader(
    dataset = CUB200(CUB200_TYPE_SUBMIT, transform = transform_test),
    batch_size = batch_size,
    shuffle = False
)

In [28]:
def training_model(model, criterion, optimizer, scheduler, num_epochs = 25):

    for epoch in range(num_epochs):
        scheduler.step()

        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            if i % 60 == 59:
                print('[%d, %5d] loss: %.7f' %
                    (epoch + 1, (i + 1), running_loss / 20))
                running_loss = 0.0
        
        train_correct = 0
        train_total = 0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            inputs = inputs.squeeze()
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)

            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()

        print('[%d epoch] Accuracy of the network on the train images: %d %%' %
              (epoch + 1, 100 * train_correct / train_total))
        
    print("End Training do it eval_accuracy")
    return model

In [29]:
def eval_accuracy(model):
    class_correct = list(0. for i in range(50))
    class_total = list(0. for i in range(50))

    correct = 0
    total = 0
    
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(test_loader, 0):
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            c = (predicted == labels).squeeze()
                    
            for i in range(labels.shape[0]):
                label = labels[i]
                class_correct[label] += c[i].item()
                class_total[label] += 1
                
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

    print('Accuracy of the network on test images: %d %%' % (
        100 * correct / total))            
                
    return 

In [30]:
linear1 = nn.Linear(2048, 50, bias=True)
linear1_1 = nn.Linear(2048, 50, bias=True)
linear1_2 = nn.Linear(2048, 50, bias=True)
linear1_3 = nn.Linear(2048, 50, bias=True)
linear2 = nn.Linear(50, 50, bias=True)
linear3 = nn.Linear(512, 50, bias=True)
relu = nn.ReLU()

# xavier initialization
nn.init.xavier_uniform_(linear1.weight)
nn.init.xavier_uniform_(linear1_1.weight)
nn.init.xavier_uniform_(linear1_2.weight)
nn.init.xavier_uniform_(linear1_3.weight)
nn.init.xavier_uniform_(linear2.weight)
nn.init.xavier_uniform_(linear3.weight)

Parameter containing:
tensor([[-0.0563,  0.0842, -0.0787,  ...,  0.0952,  0.0243,  0.0907],
        [-0.0765, -0.0640,  0.0694,  ...,  0.0681, -0.0153, -0.1019],
        [ 0.0466,  0.0698,  0.0490,  ...,  0.0358,  0.0484,  0.0821],
        ...,
        [-0.0838,  0.0308,  0.0213,  ..., -0.0355,  0.0524, -0.0363],
        [-0.0250, -0.0145, -0.0475,  ..., -0.0096, -0.0100, -0.0800],
        [ 0.0567,  0.0869,  0.0738,  ..., -0.0896, -0.0378, -0.0028]],
       requires_grad=True)

In [31]:
#resnext101
model_rsn = models.resnext101_32x8d(pretrained=True)
for param in model_rsn.parameters():
  param.requires_grad = False
print(model_rsn)
model_rsn.fc = nn.Sequential(
        linear1
    )
print(model_rsn)

num_epochs = 25
model_rsn.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_rsn.parameters(), lr = 0.0008)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 16, gamma = 0.1)

model_rs = training_model(model_rsn, criterion, optimizer, lr_scheduler, num_epochs)

eval_accuracy(model_rsn)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1



[1 epoch] Accuracy of the network on the train images: 5 %
[2 epoch] Accuracy of the network on the train images: 21 %
[3 epoch] Accuracy of the network on the train images: 39 %
[4 epoch] Accuracy of the network on the train images: 51 %
[5 epoch] Accuracy of the network on the train images: 58 %
[6 epoch] Accuracy of the network on the train images: 65 %
[7 epoch] Accuracy of the network on the train images: 67 %
[8 epoch] Accuracy of the network on the train images: 72 %
[9 epoch] Accuracy of the network on the train images: 74 %
[10 epoch] Accuracy of the network on the train images: 78 %
[11 epoch] Accuracy of the network on the train images: 79 %
[12 epoch] Accuracy of the network on the train images: 79 %
[13 epoch] Accuracy of the network on the train images: 79 %
[14 epoch] Accuracy of the network on the train images: 83 %
[15 epoch] Accuracy of the network on the train images: 82 %
[16 epoch] Accuracy of the network on the train images: 84 %
[17 epoch] Accuracy of the network

In [None]:
#resnet152
model_rs = models.resnet152(pretrained=True)
for param in model_rs.parameters():
  param.requires_grad = False
print(model_rs)
model_rs.fc = nn.Sequential(
        linear1_1
    )
print(model_rs)

num_epochs = 16
model_rs.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_rs.parameters(), lr=0.0075, momentum=0.9)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 8, gamma = 0.1)

model_rs = training_model(model_rs, criterion, optimizer, lr_scheduler, num_epochs)

eval_accuracy(model_rs)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 



[1 epoch] Accuracy of the network on the train images: 3 %
[2 epoch] Accuracy of the network on the train images: 11 %


In [None]:
#resnet101
model_rs1 = models.resnet101(pretrained=True)
for param in model_rs1.parameters():
  param.requires_grad = False
print(model_rs1)
model_rs1.fc = nn.Sequential(
        linear1_2
    )
print(model_rs1)

num_epochs = 16
model_rs1.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_rs1.parameters(), lr=0.0075, momentum=0.9)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 16, gamma = 0.1)

model_rs1 = training_model(model_rs1, criterion, optimizer, lr_scheduler, num_epochs)

eval_accuracy(model_rs1)

In [None]:
#resnext50
model_rsn5 = models.resnext50_32x4d(pretrained=True)
for param in model_rsn5.parameters():
  param.requires_grad = False
print(model_rsn5)
model_rsn5.fc = nn.Sequential(
        linear1_3
    )
print(model_rsn5)

num_epochs = 25
model_rsn5.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_rsn5.parameters(), lr=0.005, momentum=0.9)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 16, gamma = 0.1)

model_rsn5 = training_model(model_rsn5, criterion, optimizer, lr_scheduler, num_epochs)

eval_accuracy(model_rsn5)

In [None]:
# num_epochs = 25
# model_ft.to(device)
# criterion = nn.CrossEntropyLoss()
# #optimizer = optim.Adam(model_ft.parameters(), lr = 0.005)
# optimizer = optim.SGD(model_ft.parameters(), lr=0.005, momentum=0.9)
# lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 16, gamma = 0.1)

# model_ft = training_model(model_ft, criterion, optimizer, lr_scheduler, num_epochs)

In [None]:
# eval_accuracy(model_rsn)

In [None]:
# eval_accuracy(model_rs)

In [None]:
# eval_accuracy(model_rs1)

In [None]:
# eval_accuracy(model_rsn5)

In [None]:
# eval_accuracy2(model_rsn,model_rs,model_rs1,model_rsn5)

In [None]:
class MyEnsemble(nn.Module):

    def __init__(self, modelA, modelB, modelC,modelD, input):
        super(MyEnsemble, self).__init__()
        self.modelA = modelA
        self.modelB = modelB
        self.modelC = modelC
        self.modelD = modelD

        self.fc1 = linear2

    def forward(self, x):
        out1 = self.modelA(x)
        out2 = self.modelB(x)
        out3 = self.modelC(x)
        out4 = self.modelD(x)

        out = out1 + out2 + out3 + out4

        x = self.fc1(out)
        return x

In [None]:
model = MyEnsemble(model_rs, model_rsn, model_rs1, model_rsn5, 50)

num_epochs = 32
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=0.003)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 16, gamma = 0.1)

model = training_model(model, criterion, optimizer, lr_scheduler, num_epochs)

In [None]:
eval_accuracy(model)

In [None]:
# import itertools

# def get_result(model1, model2, model3, model4):
#   result=[]
#   model1.eval()
#   model2.eval()
#   model3.eval()
#   model4.eval()
#   with torch.no_grad():
#     for i, data in enumerate(submit_loader, 0):
#       images, _ = data
#       images = images.to(device)
#       outputs1 = model1(images)
#       outputs2 = model2(images)
#       outputs3 = model3(images)
#       outputs4 = model4(images)
#       outputs = (outputs1+outputs2+outputs3+outputs4)/4
#       _, predicted = torch.max(outputs, 1)
#       result.append(predicted.cpu().numpy())
#   return list(itertools.chain(*result))

In [None]:
# submit_result = get_result(model_rs,model_rsn,model_rsn5,model_rs1)

In [None]:
import itertools

def get_result(model):
  result=[]
  model.eval()
  with torch.no_grad():
    for i, data in enumerate(submit_loader, 0):
      images, _ = data
      images = images.to(device)
      outputs = model(images)
      _, predicted = torch.max(outputs, 1)
      result.append(predicted.cpu().numpy())
  return list(itertools.chain(*result))

In [None]:
submit_result = get_result(model)

In [None]:
pip install pycryptodomex --no-binary :all:

In [None]:
import json
from base64 import b64encode
from Cryptodome.Cipher import AES
from Cryptodome.Util.Padding import pad

def read_txt(fileName):
    with open(fileName, 'rt') as f:
        list_data = [a.strip('\n\r') for a in f.readlines()]
    return list_data

def write_json(fileName, data):
    with open(fileName, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)

def load_key(key_path):
    with open(key_path, "rb") as f:
        key = f.read()
    return key

def encrypt_data(key_path, ans_list, encrypt_store_path='ans.json'):
    key = load_key(key_path)
    print(key)
    data = " ".join([str(i) for i in ans_list])
    encode_data = data.encode()
    cipher = AES.new(key, AES.MODE_CBC)
    ct_bytes = cipher.encrypt(pad(encode_data, AES.block_size))
    iv = b64encode(cipher.iv).decode('utf-8')
    ct = b64encode(ct_bytes).decode('utf-8')
    write_json(encrypt_store_path, {'iv':iv, 'ciphertext':ct})

if __name__=="__main__":
    # 1.이메일을 통해서 전달 받은 키 파일의 경로 입력
    #key_path = default_path + "team9.pem"
    key_path = "/content/drive/MyDrive/인공지능 수업/team9.pem"
    # 2. 예측한 결과를 텍스트 파일로 저장했을 경우 리스트로 다시 불러오기
    # 본인이 원하는 방식으로 리스트 형태로 예측 값을 불러오기만 하면 됨(순서를 지킬것)
    #raw_ans_path = "ans.txt"
    #ans = read_txt(raw_ans_path)
    #ans에 result 저장한 리스트 넣기
    ans = submit_result
    # 3. 암호화된 파일을 저장할 위치
    encrypt_ans_path = default_path + "ai_새앙상블_answer.json"
    # 4. 암호화!(pycrytodome 설치)
    encrypt_data(key_path, ans, encrypt_ans_path)
    print("finished!")

0 0 0 0 ... 50
...
500

np.torch([0 0 0 0 0])

