# 생육 기간 예측 프로젝트

### 목적 및 배경
* 한 쌍의 이미지를 입력받아 작물의 생육 기간을 예측하는 모델 개발<br/>
 ※ 이후 환경 변수 데이터가 추가 확보되는 시점에는 작물의 효율적인 생육을 위한 최적의 환경을 도출하는 작업으로 연계도 가능할 것으로 전망

### 데이터 정보 및 학습 진행 방식
* DACON의 "생육 기간 예측 경진대회"에서 제공된 데이터로 진행
* 2개 작물(청경채, 적상추)에 대한 생육 기간 경과일자별 이미지 데이터 저장<br/>
\- 총 753개(청경채 353개, 적상추 400개)
* 작물별 이미지 2장씩을 다양하게 조합하여 2장의 이미지간 경과일을 기준으로 학습 및 평가 진행 예정

### 모델 평가 기준
* RMSE(Root Mean Squared Error)

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms

In [2]:
import os
import random
from PIL import Image
from glob import glob
from tqdm.notebook import tqdm

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#### 기본 셋팅

In [26]:
# seed 고정 함수 정의 => seed 고정을 통해 재현성을 확보하기 위함
def seed_everything(seed):
    # 파이토치 및 넘파이, random 등 관련 모듈에 대한 seed 일괄 설정
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

# seed 고정
seed_everything(2048)

is_cuda = torch.cuda.is_available()
device = torch.device('cuda' if is_cuda else 'cpu')
print(device)

LEARNING_RATE = 0.00005
EPOCHS = 20  # 초기 설정 10
BATCH_SIZE = 64  # 초기 설정 64
VALID_BATCH_SIZE = 50  # 초기 설정 50 => 체크 예정

cuda


# 2개 작물별 데이터 정리 및 DataFrame 저장

#### 데이터프레임 생성 관련 함수 정의

In [5]:
def get_image_path(root_path=None, resize_path=None):
    # 2개 작물별 디렉토리명 list 추출
    if root_path is None:  # None일 경우, 현재 경로 기준의 하위 디렉토리명 추출
        if resize_path: # resize 옵션값 체크
          bc_directories = glob('./BC_Resize/*')
          lt_directories = glob('./LT_Resize/*')
        else:
          bc_directories = glob('./BC/*')
          lt_directories = glob('./LT/*')
    else:
        if resize_path: # resize 옵션값 체크
          bc_directories = glob(root_path + 'BC_Resize/*')
          lt_directories = glob(root_path + 'LT_Resize/*')  
        else:
          bc_directories = glob(root_path + 'BC/*')
          lt_directories = glob(root_path + 'LT/*')
    
    # 2개 작물별 모든 이미지 파일 경로 list로 저장
    bc_image_path = []
    for bc_directory in bc_directories:
        images = glob(bc_directory + '/*.png')
        bc_image_path.extend(images)
    lt_image_path = []
    for lt_directory in lt_directories:
        images = glob(lt_directory + '/*.png')
        lt_image_path.extend(images)
    
    return bc_image_path, lt_image_path

def get_dataframe(root_path=None, resize_path=None):
    # 2개 작물별 이미지 파일 경로 list로 저장(BC : 청경채, LT : 적상추)
    bc_image_path, lt_image_path = get_image_path(root_path, resize_path)
    
    # 각 파일명에서 기준일자(day) 정보 추출 및 np.array로 저장 => 파일명에서 마지막 2자리 숫자 정보 추출
    bc_day_array = np.array([int(path.split('.')[-2][-2:]) for path in bc_image_path])
    lt_day_array = np.array([int(path.split('.')[-2][-2:]) for path in lt_image_path])
    
    # 데이터프레임 생성
    bc_df = pd.DataFrame({'image_path' : bc_image_path, 'day' : bc_day_array})
    bc_df['species'] = 'bc'
    lt_df = pd.DataFrame({'image_path' : lt_image_path, 'day' : lt_day_array})
    lt_df['species'] = 'lt'
    
    total_df = pd.concat([bc_df, lt_df]).reset_index(drop=True)
    
    return total_df

#### 데이터프레임 생성 => "total_df"

In [6]:
TRAIN_FOLDER = '/content/drive/MyDrive/ds_study/data1/open/train_dataset/'  # 구글 코랩 기준 경로
TEST_FOLDER = '/content/drive/MyDrive/ds_study/data1/open/test_dataset/'  # 구글 코랩 기준 경로

# total_df = get_dataframe(root_path=TRAIN_FOLDER)
# len(total_df), total_df.head()  # 총 753개 데이터 저장 결과 확인

#### 이미지 사이즈 체크 및 Resize 진행

In [None]:
# img = Image.open(total_df['image_path'][0])
# print(img.size)
# print(img.mode)

(3280, 2464)
RGB


> 이미지 사이즈가 매우 크므로, 코랩 환경 등을 고려하여 모델에 적용할 사이즈로 resize 및 별도 폴더로 저장 후 진행하고자 함

In [None]:
# dir_list = ['BC', 'LT']

# # train_dataset 내 Resize 폴더 생성
# for dir in dir_list:
#   os.mkdir(TRAIN_FOLDER + dir + '_Resize')
#   for sub_dir in os.listdir(TRAIN_FOLDER + dir): # 서브 폴더 생성
#     os.mkdir(TRAIN_FOLDER + dir + '_Resize/' + sub_dir)
#     for image_path in glob(TRAIN_FOLDER + dir + '/' + sub_dir + '/*'): # 이미지 resize 및 저장
#       image_file_name = image_path.split('/')[-1]
#       img = Image.open(image_path)
#       img = img.resize((224, 224))
#       img.save(TRAIN_FOLDER + dir + '_Resize/' + sub_dir + '/' + image_file_name)

# # test_dataset 내 Resize 폴더 생성
# for dir in dir_list:
#   os.mkdir(TEST_FOLDER + dir + '_Resize')
#   for sub_dir in os.listdir(TEST_FOLDER + dir): # 서브 폴더 생성
#     os.mkdir(TEST_FOLDER + dir + '_Resize/' + sub_dir)
#     for image_path in glob(TEST_FOLDER + dir + '/' + sub_dir + '/*'): # 이미지 resize 및 저장
#       image_file_name = image_path.split('/')[-1]
#       img = Image.open(image_path)
#       img = img.resize((224, 224))
#       img.save(TEST_FOLDER + dir + '_Resize/' + sub_dir + '/' + image_file_name)

#### 변경된 경로("Resize") 기준 데이터프레임 다시 생성 => "total_df_v2"

In [7]:
total_df_v2 = get_dataframe(root_path=TRAIN_FOLDER, resize_path=True) # resize 경로로 반영
len(total_df_v2), total_df_v2.head()  # 총 753개 데이터 저장 결과 확인

(753,                                           image_path  day species
 0  /content/drive/MyDrive/ds_study/data1/open/tra...    2      bc
 1  /content/drive/MyDrive/ds_study/data1/open/tra...    1      bc
 2  /content/drive/MyDrive/ds_study/data1/open/tra...    3      bc
 3  /content/drive/MyDrive/ds_study/data1/open/tra...    6      bc
 4  /content/drive/MyDrive/ds_study/data1/open/tra...   10      bc)

In [8]:
# 이미지 사이즈 재확인 => (224, 224)로 반영된 것을 확인함
img = Image.open(total_df_v2['image_path'][0])
print(img.size)
print(img.mode)

(224, 224)
RGB


#### 작물의 생육 상태가 이상치로 판단되는 이미지 제외 처리 후 데이터프레임 다시 생성 => "total_df_v3"
\- 이상치로 판단되는 작물 이미지 폴더명 : "BC_03"<br/>
  => 테스트 결과, 제외 처리 전보다 오히려 성능이 떨어진 것으로 확인되어 제외 처리하지 않기로 함

In [9]:
total_df_v3 = total_df_v2.copy()
# total_df_v3.loc[:, 'folder'] = ''

# # 'folder' 컬럼에 폴더명 정보 저장
# for idx, row in total_df_v3.iterrows():
#   check_name = row['image_path'].split('/')[-2]
#   total_df_v3.loc[idx, 'folder'] = check_name

# total_df_v3.head()

In [10]:
len(total_df_v3)

753

In [11]:
# "BC_03"(41개 이미지) 제외 후 현황 시각화
# total_df_v3 = total_df_v3[total_df_v3['folder'] != 'BC_03']
len(total_df_v3)

753

In [None]:
# 폴더별 개수 현황 시각화 => 일부 차이가 있긴 하나, 대부분 폴더별 40개 수준임
# sns.countplot(x=total_df_v3['folder'])
# plt.xticks(rotation=90)
# plt.show()

In [None]:
# 'folder' 컬럼 drop
# total_df_v3.drop('folder', axis=1, inplace=True)
# total_df_v3.head()

Unnamed: 0,image_path,day,species
0,/content/drive/MyDrive/ds_study/data1/open/tra...,10,bc
1,/content/drive/MyDrive/ds_study/data1/open/tra...,4,bc
2,/content/drive/MyDrive/ds_study/data1/open/tra...,1,bc
3,/content/drive/MyDrive/ds_study/data1/open/tra...,5,bc
4,/content/drive/MyDrive/ds_study/data1/open/tra...,9,bc


# 모델 선언
* 우선 baseline 셋팅 완료 후, 다양한 모델을 적용해 평가 및 테스트 진행 예정

In [12]:
from torchvision.models import mobilenet_v2

class CompareCNN(nn.Module):
    
    def __init__(self):
        super(CompareCNN, self).__init__()
        self.mobile_net = mobilenet_v2(pretrained=True)
        self.fc_layer = nn.Linear(1000, 1)
    
    def forward(self, input):
        x = self.mobile_net(input)
        output = self.fc_layer(x)
        return output

class CompareNet(nn.Module):
    
    def __init__(self):
        super(CompareNet, self).__init__()
        self.before_net = CompareCNN()
        self.after_net = CompareCNN()
    
    def forward(self, before_input, after_input):
        before = self.before_net(before_input)
        after = self.after_net(after_input)
        delta = after - before
        return delta

# 학습을 위한 데이터셋 생성

#### 작물별 이미지 조합 및 train, valid 데이터 저장

In [13]:
# 동일 작물 내 샘플링된 2개 이미지별 time_delta 산출한 데이터프레임 생성 함수
def get_combination_df(length, species, df):
  before_image_path = []
  after_image_path = []
  time_delta = []

  for i in range(length):
    sample = df[df['species'] == species].sample(2)
    before = sample[sample['day'] == min(sample['day'])].reset_index(drop=True)
    after = sample[sample['day'] == max(sample['day'])].reset_index(drop=True)

    before_image_path.append(before.iloc[0]['image_path'])
    after_image_path.append(after.iloc[0]['image_path'])
    delta = after.iloc[0]['day'] - before.iloc[0]['day']
    time_delta.append(delta)
  
  combination_df = pd.DataFrame({
      'before_image_path' : before_image_path,
      'after_image_path' : after_image_path,
      'time_delta' : time_delta
  })

  combination_df['species'] = species

  return combination_df

In [14]:
data_length = 5000  # 추출할 조합의 개수
valid_size = 0.1  # validation 데이터 비율
train_data_length = int(data_length*(1-valid_size))

# 이미지 조합 데이터프레임 생성
bc_comb_df = get_combination_df(data_length, 'bc', total_df_v3) # total_df_v3 기준
lt_comb_df = get_combination_df(data_length, 'lt', total_df_v3) # total_df_v3 기준

# train, valid 각각 분리
bc_train = bc_comb_df[:train_data_length]
lt_train = lt_comb_df[:train_data_length]

bc_valid = bc_comb_df[train_data_length:]
lt_valid = lt_comb_df[train_data_length:]

# train_set, valid_set 저장
train_data = pd.concat([bc_train, lt_train])
valid_data = pd.concat([bc_valid, lt_valid])

In [15]:
train_data.reset_index(drop=True, inplace=True)
train_data.tail()

Unnamed: 0,before_image_path,after_image_path,time_delta,species
8995,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,27,lt
8996,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,19,lt
8997,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,6,lt
8998,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,16,lt
8999,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,6,lt


In [16]:
valid_data.reset_index(drop=True, inplace=True)
valid_data.tail()

Unnamed: 0,before_image_path,after_image_path,time_delta,species
995,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,28,lt
996,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,15,lt
997,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,8,lt
998,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,17,lt
999,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,12,lt


In [17]:
# train, valid 데이터 csv파일로 저장
train_data.to_csv('/content/drive/MyDrive/ds_study/save/train_data.csv', index=False)
valid_data.to_csv('/content/drive/MyDrive/ds_study/save/valid_data.csv', index=False)

In [18]:
# train, valid 데이터 csv파일 불러오기
train_data = pd.read_csv('/content/drive/MyDrive/ds_study/save/train_data.csv')
valid_data = pd.read_csv('/content/drive/MyDrive/ds_study/save/valid_data.csv')

In [19]:
train_data.tail() # train_data 확인

Unnamed: 0,before_image_path,after_image_path,time_delta,species
8995,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,27,lt
8996,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,19,lt
8997,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,6,lt
8998,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,16,lt
8999,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,6,lt


In [20]:
valid_data.tail()  # valid_data 확인

Unnamed: 0,before_image_path,after_image_path,time_delta,species
995,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,28,lt
996,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,15,lt
997,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,8,lt
998,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,17,lt
999,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,12,lt


#### 데이터셋 만들기
\- Normalize 추가 적용<br/>
\- RandomHorizontalFlip 및 RandomVerticalFlip 추가 적용<br/>
\- RandomRotation 추가 적용

In [None]:
# # 현재 이미지 데이터 기준, RGB 평균 및 표준편차 구하기
# mean_rgb = []
# std_rgb = []
# for _, row in total_df_v3.iterrows():
#   img = Image.open(row['image_path'])
#   mean_rgb.append(np.mean(np.array(img), axis=(0,1)) / 255.0)
#   std_rgb.append(np.std(np.array(img), axis=(0,1)) / 255.0)

# len(mean_rgb), mean_rgb[0], len(std_rgb), std_rgb[0]

(753,
 array([0.9183464 , 0.90566094, 0.95120634]),
 753,
 array([0.11392699, 0.12514097, 0.09008806]))

In [None]:
# mean_r = np.mean([rgb[0] for rgb in mean_rgb])
# mean_g = np.mean([rgb[1] for rgb in mean_rgb])
# mean_b = np.mean([rgb[2] for rgb in mean_rgb])

# std_r = np.mean([rgb[0] for rgb in std_rgb])
# std_g = np.mean([rgb[1] for rgb in std_rgb])
# std_b = np.mean([rgb[2] for rgb in std_rgb])

# print(mean_r, mean_g, mean_b) # 전체 이미지 데이터 기준 평균
# print(std_r, std_g, std_b) # 전체 이미지 데이터 기준 표준편차

0.7485439488295317 0.7608712307549181 0.8175421300450789
0.17449895204237378 0.17197505433483895 0.18335567523917604


In [21]:
# 현재 이미지 데이터 기준으로 산출한 RGB 평균 및 표준편차 활용 시
mean_r, mean_g, mean_b = 0.7485439488295317, 0.7608712307549181, 0.8175421300450789
std_r, std_g, std_b = 0.17449895204237378, 0.17197505433483895, 0.18335567523917604

# # Imagenet 데이터셋을 통해 산출된 평균 및 표준편차 활용 시
# mean_r, mean_g, mean_b = 0.485, 0.456, 0.406
# std_r, std_g, std_b = 0.229, 0.224, 0.225

In [22]:
# 사용자 정의 데이터셋 클래스
class ImageDataset(Dataset):
  def __init__(self, combination_df, is_test=None):
    self.combination_df = combination_df
    if is_test:
      self.transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([mean_r, mean_g, mean_b], [std_r, std_g, std_b])
      ])
    else:
      self.transform = transforms.Compose([
        # transforms.Resize(224),  # 이미지 resize 기적용 완료
        transforms.ToTensor(),
        transforms.Normalize([mean_r, mean_g, mean_b], [std_r, std_g, std_b]),  # Normalize 적용
        transforms.RandomHorizontalFlip(p=0.5),  # 좌우반전 적용
        transforms.RandomVerticalFlip(p=0.5),  # 상하반전 적용
        transforms.RandomRotation(90)  # 90도 회전 적용
      ])
    self.is_test = is_test
  
  def __len__(self):
    return len(self.combination_df)

  def __getitem__(self, idx):
    before_image = Image.open(self.combination_df.iloc[idx]['before_image_path'])
    after_image = Image.open(self.combination_df.iloc[idx]['after_image_path'])

    before_image = self.transform(before_image)
    after_image = self.transform(after_image)

    if self.is_test:
      return before_image, after_image
    
    time_delta = self.combination_df.iloc[idx]['time_delta']

    return before_image, after_image, time_delta

In [23]:
train_dataset = ImageDataset(train_data)
valid_dataset = ImageDataset(valid_data)

# 학습 진행

#### 미니 배치 구성

In [24]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=VALID_BATCH_SIZE)

In [25]:
SAVE_FOLDER = '/content/drive/MyDrive/ds_study/save/'

model = CompareNet().to(device)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth


  0%|          | 0.00/13.6M [00:00<?, ?B/s]

In [27]:
# 저장된 체크포인트 불러오기
model_state = torch.load(SAVE_FOLDER + 'checkpoint_14.pt')
model.load_state_dict(model_state['model'])
optimizer.load_state_dict(model_state['optimizer'])

In [28]:
for epoch in tqdm(range(15, EPOCHS)):  # 체크포인트 이후 시점부터 재개
  print(f'============ epoch : {epoch} ===============')
  for step, (before_image, after_image, time_delta) in tqdm(enumerate(train_loader)):
    before_image = before_image.to(device)
    after_image = after_image.to(device)
    time_delta = time_delta.to(device)

    optimizer.zero_grad()
    logit = model(before_image, after_image)
    train_loss = torch.sum(torch.abs(logit.squeeze(1).float() - time_delta.float())) / torch.LongTensor([BATCH_SIZE]).squeeze(0).to(device)
    train_loss.backward()
    optimizer.step()

    if step % 15 == 0:
      print(f'------------ step : {step} ------------')
      print('MAE_loss :', train_loss.detach().cpu().numpy())
  
  valid_losses = []
  with torch.no_grad():
    for valid_before, valid_after, time_delta in tqdm(valid_loader):
      valid_before = valid_before.to(device)
      valid_after = valid_after.to(device)
      valid_time_delta = time_delta.to(device)


      logit = model(valid_before, valid_after)
      valid_loss = torch.sum(torch.abs(logit.squeeze(1).float() - valid_time_delta.float())) / torch.LongTensor([VALID_BATCH_SIZE]).squeeze(0).to(device)
      valid_losses.append(valid_loss.detach().cpu())


  print(f'VALIDATION_LOSS MAE : {sum(valid_losses)/len(valid_losses)}')

  checkpoint = {
      'model' : model.state_dict(),
      'optimizer' : optimizer.state_dict()
  }

  torch.save(checkpoint, SAVE_FOLDER + f'checkpoint_{epoch}.pt')

  0%|          | 0/5 [00:00<?, ?it/s]



0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 3.678786
------------ step : 15 ------------
MAE_loss : 1.6537251
------------ step : 30 ------------
MAE_loss : 1.3774213
------------ step : 45 ------------
MAE_loss : 1.5778935
------------ step : 60 ------------
MAE_loss : 1.7006482
------------ step : 75 ------------
MAE_loss : 1.1728015
------------ step : 90 ------------
MAE_loss : 1.4686725
------------ step : 105 ------------
MAE_loss : 1.1396562
------------ step : 120 ------------
MAE_loss : 1.3209732
------------ step : 135 ------------
MAE_loss : 1.4523358


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.9529073238372803


0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 1.5277071
------------ step : 15 ------------
MAE_loss : 1.5457041
------------ step : 30 ------------
MAE_loss : 1.6245532
------------ step : 45 ------------
MAE_loss : 2.1999755
------------ step : 60 ------------
MAE_loss : 1.2561488
------------ step : 75 ------------
MAE_loss : 1.2370738
------------ step : 90 ------------
MAE_loss : 2.3420196
------------ step : 105 ------------
MAE_loss : 1.1050222
------------ step : 120 ------------
MAE_loss : 1.3024696
------------ step : 135 ------------
MAE_loss : 1.342889


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.406151533126831


0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 1.6308144
------------ step : 15 ------------
MAE_loss : 1.8526118
------------ step : 30 ------------
MAE_loss : 1.2537949
------------ step : 45 ------------
MAE_loss : 1.4220133
------------ step : 60 ------------
MAE_loss : 1.3432188
------------ step : 75 ------------
MAE_loss : 1.6195791
------------ step : 90 ------------
MAE_loss : 1.3611968
------------ step : 105 ------------
MAE_loss : 1.1959219
------------ step : 120 ------------
MAE_loss : 1.8475444
------------ step : 135 ------------
MAE_loss : 1.472232


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.0510923862457275


0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 1.934201
------------ step : 15 ------------
MAE_loss : 1.3716815
------------ step : 30 ------------
MAE_loss : 1.1546961
------------ step : 45 ------------
MAE_loss : 1.0135908
------------ step : 60 ------------
MAE_loss : 1.131278
------------ step : 75 ------------
MAE_loss : 1.1231098
------------ step : 90 ------------
MAE_loss : 1.4998944
------------ step : 105 ------------
MAE_loss : 1.6272643
------------ step : 120 ------------
MAE_loss : 1.5159802
------------ step : 135 ------------
MAE_loss : 1.2811649


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.795701265335083


0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 1.1377861
------------ step : 15 ------------
MAE_loss : 1.554024
------------ step : 30 ------------
MAE_loss : 1.3478649
------------ step : 45 ------------
MAE_loss : 1.3503861
------------ step : 60 ------------
MAE_loss : 2.448057
------------ step : 75 ------------
MAE_loss : 3.6223843
------------ step : 90 ------------
MAE_loss : 1.2394737
------------ step : 105 ------------
MAE_loss : 1.5270433
------------ step : 120 ------------
MAE_loss : 2.9312263
------------ step : 135 ------------
MAE_loss : 2.2056956


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.1652724742889404


# 테스트 데이터 기준 예측

In [29]:
# 테스트 데이터 csv파일 불러오기 및 데이터셋 생성
test_set = pd.read_csv(TEST_FOLDER + 'test_data.csv')
test_set['l_root'] = test_set['before_file_path'].map(lambda x: TEST_FOLDER + x.split('_')[1] + '_Resize/' + x.split('_')[2])
test_set['r_root'] = test_set['after_file_path'].map(lambda x: TEST_FOLDER + x.split('_')[1] + '_Resize/' + x.split('_')[2])
test_set['before_image_path'] = test_set['l_root'] + '/' + test_set['before_file_path'] + '.png'
test_set['after_image_path'] = test_set['r_root'] + '/' + test_set['after_file_path'] + '.png'
test_dataset = ImageDataset(test_set, is_test=True)
test_data_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

test_data_loader

<torch.utils.data.dataloader.DataLoader at 0x7fdfcb598dd0>

In [30]:
# 예측결과 저장
test_value = []
with torch.no_grad():
  for test_before, test_after in tqdm(test_data_loader):
    test_before = test_before.to(device)
    test_after = test_after.to(device)
    logit = model(test_before, test_after)
    value = logit.squeeze(1).detach().cpu().float()
    
    test_value.extend(value)

  0%|          | 0/62 [00:00<?, ?it/s]

In [31]:
len(test_value), test_value[:5] # 3960건 예측결과 저장 확인

(3960,
 [tensor(28.3087),
  tensor(36.0444),
  tensor(4.6426),
  tensor(6.7505),
  tensor(27.3961)])

In [32]:
# 제출양식에 반영
sub = pd.read_csv('/content/drive/MyDrive/ds_study/data1/open/sample_submission.csv')
sub['time_delta'] = np.array(test_value)
sub.head()

Unnamed: 0,idx,time_delta
0,0,28.308681
1,1,36.044361
2,2,4.642614
3,3,6.750463
4,4,27.39608


In [33]:
# 예측결과 csv파일 저장
sub.to_csv('/content/drive/MyDrive/ds_study/save/submission_v1.1.csv', index=False)

In [34]:
# "VALIDATION_LOSS MAE"가 가장 낮았던 시점의 체크포인트 불러오기
model_state = torch.load(SAVE_FOLDER + 'checkpoint_18.pt')
model.load_state_dict(model_state['model'])
optimizer.load_state_dict(model_state['optimizer'])

In [35]:
# 예측결과 저장
test_value = []
with torch.no_grad():
  for test_before, test_after in tqdm(test_data_loader):
    test_before = test_before.to(device)
    test_after = test_after.to(device)
    logit = model(test_before, test_after)
    value = logit.squeeze(1).detach().cpu().float()
    
    test_value.extend(value)

  0%|          | 0/62 [00:00<?, ?it/s]

In [36]:
len(test_value), test_value[:5] # 3960건 예측결과 저장 확인

(3960,
 [tensor(26.9913),
  tensor(36.8421),
  tensor(4.2060),
  tensor(6.2892),
  tensor(26.2449)])

In [37]:
# 제출양식에 반영
sub = pd.read_csv('/content/drive/MyDrive/ds_study/data1/open/sample_submission.csv')
sub['time_delta'] = np.array(test_value)

# 예측결과 csv파일 저장
sub.to_csv('/content/drive/MyDrive/ds_study/save/submission_v1.11.csv', index=False)