# 생육 기간 예측 프로젝트

### 목적 및 배경
* 한 쌍의 이미지를 입력받아 작물의 생육 기간을 예측하는 모델 개발<br/>
 ※ 이후 환경 변수 데이터가 추가 확보되는 시점에는 작물의 효율적인 생육을 위한 최적의 환경을 도출하는 작업으로 연계도 가능할 것으로 전망

### 데이터 정보 및 학습 진행 방식
* DACON의 "생육 기간 예측 경진대회"에서 제공된 데이터로 진행
* 2개 작물(청경채, 적상추)에 대한 생육 기간 경과일자별 이미지 데이터 저장<br/>
\- 총 753개(청경채 353개, 적상추 400개)
* 작물별 이미지 2장씩을 다양하게 조합하여 2장의 이미지간 경과일을 기준으로 학습 및 평가 진행 예정

### 모델 평가 기준
* RMSE(Root Mean Squared Error)

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms

In [2]:
import os
import random
from PIL import Image
from glob import glob
from tqdm.notebook import tqdm

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#### 기본 셋팅

In [4]:
# seed 고정 함수 정의 => seed 고정을 통해 재현성을 확보하기 위함
def seed_everything(seed):
    # 파이토치 및 넘파이, random 등 관련 모듈에 대한 seed 일괄 설정
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

# seed 고정
seed_everything(2048)

is_cuda = torch.cuda.is_available()
device = torch.device('cuda' if is_cuda else 'cpu')
print(device)

LEARNING_RATE = 0.00005
EPOCHS = 10  # 초기 설정 10
BATCH_SIZE = 64  # 초기 설정 64
VALID_BATCH_SIZE = 50  # 초기 설정 50 => 체크 예정

cuda


# 2개 작물별 데이터 정리 및 DataFrame 저장

#### 데이터프레임 생성 관련 함수 정의

In [5]:
def get_image_path(root_path=None, resize_path=None):
    # 2개 작물별 디렉토리명 list 추출
    if root_path is None:  # None일 경우, 현재 경로 기준의 하위 디렉토리명 추출
        if resize_path: # resize 옵션값 체크
          bc_directories = glob('./BC_Resize/*')
          lt_directories = glob('./LT_Resize/*')  
        else:
          bc_directories = glob('./BC/*')
          lt_directories = glob('./LT/*')
    else:
        if resize_path: # resize 옵션값 체크
          bc_directories = glob(root_path + 'BC_Resize/*')
          lt_directories = glob(root_path + 'LT_Resize/*')  
        else:
          bc_directories = glob(root_path + 'BC/*')
          lt_directories = glob(root_path + 'LT/*')
    
    # 2개 작물별 모든 이미지 파일 경로 list로 저장
    bc_image_path = []
    for bc_directory in bc_directories:
        images = glob(bc_directory + '/*.png')
        bc_image_path.extend(images)
    lt_image_path = []
    for lt_directory in lt_directories:
        images = glob(lt_directory + '/*.png')
        lt_image_path.extend(images)
    
    return bc_image_path, lt_image_path

def get_dataframe(root_path=None, resize_path=None):
    # 2개 작물별 이미지 파일 경로 list로 저장(BC : 청경채, LT : 적상추)
    bc_image_path, lt_image_path = get_image_path(root_path, resize_path)
    
    # 각 파일명에서 기준일자(day) 정보 추출 및 np.array로 저장 => 파일명에서 마지막 2자리 숫자 정보 추출
    bc_day_array = np.array([int(path.split('.')[-2][-2:]) for path in bc_image_path])
    lt_day_array = np.array([int(path.split('.')[-2][-2:]) for path in lt_image_path])
    
    # 데이터프레임 생성
    bc_df = pd.DataFrame({'image_path' : bc_image_path, 'day' : bc_day_array})
    bc_df['species'] = 'bc'
    lt_df = pd.DataFrame({'image_path' : lt_image_path, 'day' : lt_day_array})
    lt_df['species'] = 'lt'
    
    total_df = pd.concat([bc_df, lt_df]).reset_index(drop=True)
    
    return total_df

#### 데이터프레임 생성 => "total_df"

In [24]:
TRAIN_FOLDER = '/content/drive/MyDrive/ds_study/data1/open/train_dataset/'  # 구글 코랩 기준 경로
TEST_FOLDER = '/content/drive/MyDrive/ds_study/data1/open/test_dataset/'  # 구글 코랩 기준 경로

# total_df = get_dataframe(root_path=TRAIN_FOLDER)
# len(total_df), total_df.head()  # 총 753개 데이터 저장 결과 확인

#### 이미지 사이즈 체크 및 Resize 진행

In [None]:
# img = Image.open(total_df['image_path'][0])
# print(img.size)
# print(img.mode)

(3280, 2464)
RGB


> 이미지 사이즈가 매우 크므로, 코랩 환경 등을 고려하여 모델에 적용할 사이즈로 resize 및 별도 폴더로 저장 후 진행하고자 함

In [None]:
# dir_list = ['BC', 'LT']

# # train_dataset 내 Resize 폴더 생성
# for dir in dir_list:
#   os.mkdir(TRAIN_FOLDER + dir + '_Resize')
#   for sub_dir in os.listdir(TRAIN_FOLDER + dir): # 서브 폴더 생성
#     os.mkdir(TRAIN_FOLDER + dir + '_Resize/' + sub_dir)
#     for image_path in glob(TRAIN_FOLDER + dir + '/' + sub_dir + '/*'): # 이미지 resize 및 저장
#       image_file_name = image_path.split('/')[-1]
#       img = Image.open(image_path)
#       img = img.resize((224, 224))
#       img.save(TRAIN_FOLDER + dir + '_Resize/' + sub_dir + '/' + image_file_name)

# # test_dataset 내 Resize 폴더 생성
# for dir in dir_list:
#   os.mkdir(TEST_FOLDER + dir + '_Resize')
#   for sub_dir in os.listdir(TEST_FOLDER + dir): # 서브 폴더 생성
#     os.mkdir(TEST_FOLDER + dir + '_Resize/' + sub_dir)
#     for image_path in glob(TEST_FOLDER + dir + '/' + sub_dir + '/*'): # 이미지 resize 및 저장
#       image_file_name = image_path.split('/')[-1]
#       img = Image.open(image_path)
#       img = img.resize((224, 224))
#       img.save(TEST_FOLDER + dir + '_Resize/' + sub_dir + '/' + image_file_name)

#### 변경된 경로("Resize") 기준 데이터프레임 다시 생성 => "total_df_v2"

In [9]:
total_df_v2 = get_dataframe(root_path=TRAIN_FOLDER, resize_path=True) # resize 경로로 반영
len(total_df_v2), total_df_v2.head()  # 총 753개 데이터 저장 결과 확인

(753,                                           image_path  day species
 0  /content/drive/MyDrive/ds_study/data1/open/tra...   10      bc
 1  /content/drive/MyDrive/ds_study/data1/open/tra...    4      bc
 2  /content/drive/MyDrive/ds_study/data1/open/tra...    1      bc
 3  /content/drive/MyDrive/ds_study/data1/open/tra...    5      bc
 4  /content/drive/MyDrive/ds_study/data1/open/tra...    9      bc)

In [10]:
# 이미지 사이즈 재확인 => (224, 224)로 반영된 것을 확인함
img = Image.open(total_df_v2['image_path'][0])
print(img.size)
print(img.mode)

(224, 224)
RGB


# 모델 선언
* 우선 baseline 셋팅 완료 후, 다양한 모델을 적용해 평가 및 테스트 진행 예정

In [11]:
from torchvision.models import mobilenet_v2

class CompareCNN(nn.Module):
    
    def __init__(self):
        super(CompareCNN, self).__init__()
        self.mobile_net = mobilenet_v2(pretrained=True)
        self.fc_layer = nn.Linear(1000, 1)
    
    def forward(self, input):
        x = self.mobile_net(input)
        output = self.fc_layer(x)
        return output

class CompareNet(nn.Module):
    
    def __init__(self):
        super(CompareNet, self).__init__()
        self.before_net = CompareCNN()
        self.after_net = CompareCNN()
    
    def forward(self, before_input, after_input):
        before = self.before_net(before_input)
        after = self.after_net(after_input)
        delta = after - before
        return delta

# 학습을 위한 데이터셋 생성

#### 작물별 이미지 조합 및 train, valid 데이터 저장

In [12]:
# 동일 작물 내 샘플링된 2개 이미지별 time_delta 산출한 데이터프레임 생성 함수
def get_combination_df(length, species, df):
  before_image_path = []
  after_image_path = []
  time_delta = []

  for i in range(length):
    sample = df[df['species'] == species].sample(2)
    before = sample[sample['day'] == min(sample['day'])].reset_index(drop=True)
    after = sample[sample['day'] == max(sample['day'])].reset_index(drop=True)

    before_image_path.append(before.iloc[0]['image_path'])
    after_image_path.append(after.iloc[0]['image_path'])
    delta = after.iloc[0]['day'] - before.iloc[0]['day']
    time_delta.append(delta)
  
  combination_df = pd.DataFrame({
      'before_image_path' : before_image_path,
      'after_image_path' : after_image_path,
      'time_delta' : time_delta
  })

  combination_df['species'] = species

  return combination_df

In [13]:
data_length = 5000  # 추출할 조합의 개수
valid_size = 0.1  # validation 데이터 비율
train_data_length = int(data_length*(1-valid_size))

# 이미지 조합 데이터프레임 생성
bc_comb_df = get_combination_df(data_length, 'bc', total_df_v2) # total_df_v2(resize된 이미지) 기준
lt_comb_df = get_combination_df(data_length, 'lt', total_df_v2) # total_df_v2(resize된 이미지) 기준

# train, valid 각각 분리
bc_train = bc_comb_df[:train_data_length]
lt_train = lt_comb_df[:train_data_length]

bc_valid = bc_comb_df[train_data_length:]
lt_valid = lt_comb_df[train_data_length:]

# train_set, valid_set 저장
train_data = pd.concat([bc_train, lt_train])
valid_data = pd.concat([bc_valid, lt_valid])

In [14]:
train_data.tail()

Unnamed: 0,before_image_path,after_image_path,time_delta,species
4495,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,28,lt
4496,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,21,lt
4497,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,5,lt
4498,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,13,lt
4499,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,7,lt


In [15]:
valid_data.head()

Unnamed: 0,before_image_path,after_image_path,time_delta,species
4500,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,7,bc
4501,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,4,bc
4502,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,28,bc
4503,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,2,bc
4504,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,15,bc


#### 데이터셋 만들기
\- Normalize 추가 적용<br/>
\- RandomHorizontalFlip 및 RandomVerticalFlip 추가 적용

In [16]:
# 현재 이미지 데이터 기준, RGB 평균 및 표준편차 구하기
mean_rgb = []
std_rgb = []
for _, row in total_df_v2.iterrows():
  img = Image.open(row['image_path'])
  mean_rgb.append(np.mean(np.array(img), axis=(0,1)) / 255.0)
  std_rgb.append(np.std(np.array(img), axis=(0,1)) / 255.0)

len(mean_rgb), mean_rgb[0], len(std_rgb), std_rgb[0]

(753,
 array([0.9183464 , 0.90566094, 0.95120634]),
 753,
 array([0.11392699, 0.12514097, 0.09008806]))

In [17]:
mean_r = np.mean([rgb[0] for rgb in mean_rgb])
mean_g = np.mean([rgb[1] for rgb in mean_rgb])
mean_b = np.mean([rgb[2] for rgb in mean_rgb])

std_r = np.mean([rgb[0] for rgb in std_rgb])
std_g = np.mean([rgb[1] for rgb in std_rgb])
std_b = np.mean([rgb[2] for rgb in std_rgb])

print(mean_r, mean_g, mean_b) # 전체 이미지 데이터 기준 평균
print(std_r, std_g, std_b) # 전체 이미지 데이터 기준 표준편차

0.7485439488295317 0.7608712307549181 0.8175421300450789
0.17449895204237378 0.17197505433483895 0.18335567523917604


In [38]:
# 사용자 정의 데이터셋 클래스
class ImageDataset(Dataset):
  def __init__(self, combination_df, is_test=None):
    self.combination_df = combination_df
    if is_test:
      self.transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([mean_r, mean_g, mean_b], [std_r, std_g, std_b])
      ])
    else:
      self.transform = transforms.Compose([
        # transforms.Resize(224),  # 이미지 resize 기적용 완료
        transforms.ToTensor(),
        transforms.Normalize([mean_r, mean_g, mean_b], [std_r, std_g, std_b]),  # Normalize 적용
        transforms.RandomHorizontalFlip(p=0.5),  # 좌우반전 적용
        transforms.RandomVerticalFlip(p=0.5)  # 상하반전 적용
      ])
    self.is_test = is_test
  
  def __len__(self):
    return len(self.combination_df)

  def __getitem__(self, idx):
    before_image = Image.open(self.combination_df.iloc[idx]['before_image_path'])
    after_image = Image.open(self.combination_df.iloc[idx]['after_image_path'])

    before_image = self.transform(before_image)
    after_image = self.transform(after_image)

    if self.is_test:
      return before_image, after_image
    
    time_delta = self.combination_df.iloc[idx]['time_delta']

    return before_image, after_image, time_delta

In [31]:
train_dataset = ImageDataset(train_data)
valid_dataset = ImageDataset(valid_data)

# 학습 진행

#### 미니 배치 구성

In [32]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=VALID_BATCH_SIZE)

In [33]:
SAVE_FOLDER = '/content/drive/MyDrive/ds_study/save/'

model = CompareNet().to(device)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [34]:
for epoch in tqdm(range(EPOCHS)):
  print(f'============ epoch : {epoch} ===============')
  for step, (before_image, after_image, time_delta) in tqdm(enumerate(train_loader)):
    before_image = before_image.to(device)
    after_image = after_image.to(device)
    time_delta = time_delta.to(device)

    optimizer.zero_grad()
    logit = model(before_image, after_image)
    train_loss = torch.sum(torch.abs(logit.squeeze(1).float() - time_delta.float())) / torch.LongTensor([BATCH_SIZE]).squeeze(0).to(device)
    train_loss.backward()
    optimizer.step()

    if step % 15 == 0:
      print(f'------------ step : {step} ------------')
      print('MAE_loss :', train_loss.detach().cpu().numpy())
  
  valid_losses = []
  with torch.no_grad():
    for valid_before, valid_after, time_delta in tqdm(valid_loader):
      valid_before = valid_before.to(device)
      valid_after = valid_after.to(device)
      valid_time_delta = time_delta.to(device)


      logit = model(valid_before, valid_after)
      valid_loss = torch.sum(torch.abs(logit.squeeze(1).float() - valid_time_delta.float())) / torch.LongTensor([VALID_BATCH_SIZE]).squeeze(0).to(device)
      valid_losses.append(valid_loss.detach().cpu())


  print(f'VALIDATION_LOSS MAE : {sum(valid_losses)/len(valid_losses)}')

  checkpoint = {
      'model' : model.state_dict(),
      'optimizer' : optimizer.state_dict()
  }

  torch.save(checkpoint, SAVE_FOLDER + f'checkpoint_{epoch}.pt')

  0%|          | 0/10 [00:00<?, ?it/s]



0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 14.176298
------------ step : 15 ------------
MAE_loss : 4.2550807
------------ step : 30 ------------
MAE_loss : 3.7442236
------------ step : 45 ------------
MAE_loss : 2.8763027
------------ step : 60 ------------
MAE_loss : 3.1525204
------------ step : 75 ------------
MAE_loss : 2.919415
------------ step : 90 ------------
MAE_loss : 2.1976955
------------ step : 105 ------------
MAE_loss : 2.877777
------------ step : 120 ------------
MAE_loss : 2.2899554
------------ step : 135 ------------
MAE_loss : 1.9836807


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.3056304454803467


0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 1.644269
------------ step : 15 ------------
MAE_loss : 1.8890889
------------ step : 30 ------------
MAE_loss : 2.588421
------------ step : 45 ------------
MAE_loss : 2.3327308
------------ step : 60 ------------
MAE_loss : 4.231181
------------ step : 75 ------------
MAE_loss : 1.5941733
------------ step : 90 ------------
MAE_loss : 2.1912217
------------ step : 105 ------------
MAE_loss : 2.1465433
------------ step : 120 ------------
MAE_loss : 1.5836811
------------ step : 135 ------------
MAE_loss : 1.4888389


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.9961767196655273


0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 2.2129784
------------ step : 15 ------------
MAE_loss : 2.1629596
------------ step : 30 ------------
MAE_loss : 2.1470895
------------ step : 45 ------------
MAE_loss : 1.8131067
------------ step : 60 ------------
MAE_loss : 1.6723704
------------ step : 75 ------------
MAE_loss : 1.3001175
------------ step : 90 ------------
MAE_loss : 1.6661005
------------ step : 105 ------------
MAE_loss : 1.3722436
------------ step : 120 ------------
MAE_loss : 1.85504
------------ step : 135 ------------
MAE_loss : 1.544463


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.8470284938812256


0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 1.2659309
------------ step : 15 ------------
MAE_loss : 2.456052
------------ step : 30 ------------
MAE_loss : 1.555707
------------ step : 45 ------------
MAE_loss : 2.114731
------------ step : 60 ------------
MAE_loss : 1.4918704
------------ step : 75 ------------
MAE_loss : 1.6024139
------------ step : 90 ------------
MAE_loss : 1.2803397
------------ step : 105 ------------
MAE_loss : 1.1716096
------------ step : 120 ------------
MAE_loss : 1.3454974
------------ step : 135 ------------
MAE_loss : 2.0354905


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.0924599170684814


0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 2.0238328
------------ step : 15 ------------
MAE_loss : 1.5345767
------------ step : 30 ------------
MAE_loss : 1.3499979
------------ step : 45 ------------
MAE_loss : 1.8076602
------------ step : 60 ------------
MAE_loss : 2.489715
------------ step : 75 ------------
MAE_loss : 1.3377619
------------ step : 90 ------------
MAE_loss : 3.8690596
------------ step : 105 ------------
MAE_loss : 2.5846527
------------ step : 120 ------------
MAE_loss : 1.1777437
------------ step : 135 ------------
MAE_loss : 1.2135873


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.378988027572632


0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 3.1244879
------------ step : 15 ------------
MAE_loss : 1.7113854
------------ step : 30 ------------
MAE_loss : 1.9737432
------------ step : 45 ------------
MAE_loss : 2.1884084
------------ step : 60 ------------
MAE_loss : 1.2309346
------------ step : 75 ------------
MAE_loss : 1.3967475
------------ step : 90 ------------
MAE_loss : 1.0112181
------------ step : 105 ------------
MAE_loss : 1.4971099
------------ step : 120 ------------
MAE_loss : 1.1880605
------------ step : 135 ------------
MAE_loss : 3.4844508


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.7660009860992432


0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 2.0206141
------------ step : 15 ------------
MAE_loss : 1.1482372
------------ step : 30 ------------
MAE_loss : 1.7484517
------------ step : 45 ------------
MAE_loss : 1.1688346
------------ step : 60 ------------
MAE_loss : 1.2529712
------------ step : 75 ------------
MAE_loss : 1.273215
------------ step : 90 ------------
MAE_loss : 2.1195407
------------ step : 105 ------------
MAE_loss : 1.5250819
------------ step : 120 ------------
MAE_loss : 2.072906
------------ step : 135 ------------
MAE_loss : 1.6583841


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.9197397232055664


0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 1.1896784
------------ step : 15 ------------
MAE_loss : 1.0066725
------------ step : 30 ------------
MAE_loss : 1.4476353
------------ step : 45 ------------
MAE_loss : 2.2204683
------------ step : 60 ------------
MAE_loss : 2.7226295
------------ step : 75 ------------
MAE_loss : 1.4224725
------------ step : 90 ------------
MAE_loss : 1.0381631
------------ step : 105 ------------
MAE_loss : 1.0464187
------------ step : 120 ------------
MAE_loss : 1.3349712
------------ step : 135 ------------
MAE_loss : 1.3310611


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.5006930828094482


0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 1.5166268
------------ step : 15 ------------
MAE_loss : 1.079567
------------ step : 30 ------------
MAE_loss : 1.8108974
------------ step : 45 ------------
MAE_loss : 2.5688598
------------ step : 60 ------------
MAE_loss : 2.5206766
------------ step : 75 ------------
MAE_loss : 2.15
------------ step : 90 ------------
MAE_loss : 1.1257646
------------ step : 105 ------------
MAE_loss : 1.7222292
------------ step : 120 ------------
MAE_loss : 1.1005275
------------ step : 135 ------------
MAE_loss : 1.2236466


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.840698003768921


0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 2.3352227
------------ step : 15 ------------
MAE_loss : 1.570321
------------ step : 30 ------------
MAE_loss : 1.1892354
------------ step : 45 ------------
MAE_loss : 1.2143689
------------ step : 60 ------------
MAE_loss : 1.2156353
------------ step : 75 ------------
MAE_loss : 1.0033696
------------ step : 90 ------------
MAE_loss : 0.7624841
------------ step : 105 ------------
MAE_loss : 2.280405
------------ step : 120 ------------
MAE_loss : 0.874768
------------ step : 135 ------------
MAE_loss : 1.628912


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.4882996082305908


# 테스트 데이터 기준 예측

In [39]:
# 테스트 데이터 csv파일 불러오기 및 데이터셋 생성
test_set = pd.read_csv(TEST_FOLDER + 'test_data.csv')
test_set['l_root'] = test_set['before_file_path'].map(lambda x: TEST_FOLDER + x.split('_')[1] + '_Resize/' + x.split('_')[2])
test_set['r_root'] = test_set['after_file_path'].map(lambda x: TEST_FOLDER + x.split('_')[1] + '_Resize/' + x.split('_')[2])
test_set['before_image_path'] = test_set['l_root'] + '/' + test_set['before_file_path'] + '.png'
test_set['after_image_path'] = test_set['r_root'] + '/' + test_set['after_file_path'] + '.png'
test_dataset = ImageDataset(test_set, is_test=True)
test_data_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

test_data_loader

<torch.utils.data.dataloader.DataLoader at 0x7f7f46c04090>

In [40]:
# 예측결과 저장
test_value = []
with torch.no_grad():
  for test_before, test_after in tqdm(test_data_loader):
    test_before = test_before.to(device)
    test_after = test_after.to(device)
    logit = model(test_before, test_after)
    value = logit.squeeze(1).detach().cpu().float()
    
    test_value.extend(value)

  0%|          | 0/62 [00:00<?, ?it/s]

In [41]:
len(test_value), test_value[:5] # 3960건 예측결과 저장 확인

(3960,
 [tensor(27.2772),
  tensor(30.8148),
  tensor(5.8631),
  tensor(4.8091),
  tensor(26.3175)])

In [42]:
# 제출양식에 반영
sub = pd.read_csv('/content/drive/MyDrive/ds_study/data1/open/sample_submission.csv')
sub['time_delta'] = np.array(test_value)
sub.head()

Unnamed: 0,idx,time_delta
0,0,27.277241
1,1,30.814842
2,2,5.863149
3,3,4.809115
4,4,26.317493


In [43]:
# 예측결과 csv파일 저장
sub.to_csv('/content/drive/MyDrive/ds_study/save/submission_v0.72.csv', index=False)