# 생육 기간 예측 프로젝트

### 목적 및 배경
* 한 쌍의 이미지를 입력받아 작물의 생육 기간을 예측하는 모델 개발<br/>
 ※ 이후 환경 변수 데이터가 추가 확보되는 시점에는 작물의 효율적인 생육을 위한 최적의 환경을 도출하는 작업으로 연계도 가능할 것으로 전망

### 데이터 정보 및 학습 진행 방식
* DACON의 "생육 기간 예측 경진대회"에서 제공된 데이터로 진행
* 2개 작물(청경채, 적상추)에 대한 생육 기간 경과일자별 이미지 데이터 저장<br/>
\- 총 753개(청경채 353개, 적상추 400개)
* 작물별 이미지 2장씩을 다양하게 조합하여 2장의 이미지간 경과일을 기준으로 학습 및 평가 진행 예정

### 모델 평가 기준
* RMSE(Root Mean Squared Error)

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms

In [3]:
import os
import random
from PIL import Image
from glob import glob
from tqdm.notebook import tqdm

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#### 기본 셋팅

In [5]:
# seed 고정 함수 정의 => seed 고정을 통해 재현성을 확보하기 위함
def seed_everything(seed):
    # 파이토치 및 넘파이, random 등 관련 모듈에 대한 seed 일괄 설정
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

# seed 고정
seed_everything(2048)

is_cuda = torch.cuda.is_available()
device = torch.device('cuda' if is_cuda else 'cpu')
print(device)

lr = 0.00005
epochs = 10  # 초기 설정 10
batch_size = 64  # 초기 설정 64
valid_batch_size = 50  # 초기 설정 50 => 체크 예정

cuda


# 2개 작물별 데이터 정리 및 DataFrame 저장

#### 데이터프레임 생성 관련 함수 정의

In [6]:
def get_image_path(root_path=None, resize_path=None):
    # 2개 작물별 디렉토리명 list 추출
    if root_path is None:  # None일 경우, 현재 경로 기준의 하위 디렉토리명 추출
        if resize_path: # resize 옵션값 체크
          bc_directories = glob('./BC_Resize/*')
          lt_directories = glob('./LT_Resize/*')  
        else:
          bc_directories = glob('./BC/*')
          lt_directories = glob('./LT/*')
    else:
        if resize_path: # resize 옵션값 체크
          bc_directories = glob(root_path + 'BC_Resize/*')
          lt_directories = glob(root_path + 'LT_Resize/*')  
        else:
          bc_directories = glob(root_path + 'BC/*')
          lt_directories = glob(root_path + 'LT/*')
    
    # 2개 작물별 모든 이미지 파일 경로 list로 저장
    bc_image_path = []
    for bc_directory in bc_directories:
        images = glob(bc_directory + '/*.png')
        bc_image_path.extend(images)
    lt_image_path = []
    for lt_directory in lt_directories:
        images = glob(lt_directory + '/*.png')
        lt_image_path.extend(images)
    
    return bc_image_path, lt_image_path

def get_dataframe(root_path=None, resize_path=None):
    # 2개 작물별 이미지 파일 경로 list로 저장(BC : 청경채, LT : 적상추)
    bc_image_path, lt_image_path = get_image_path(root_path, resize_path)
    
    # 각 파일명에서 기준일자(day) 정보 추출 및 np.array로 저장 => 파일명에서 마지막 2자리 숫자 정보 추출
    bc_day_array = np.array([int(path.split('.')[-2][-2:]) for path in bc_image_path])
    lt_day_array = np.array([int(path.split('.')[-2][-2:]) for path in lt_image_path])
    
    # 데이터프레임 생성
    bc_df = pd.DataFrame({'image_path' : bc_image_path, 'day' : bc_day_array})
    bc_df['species'] = 'bc'
    lt_df = pd.DataFrame({'image_path' : lt_image_path, 'day' : lt_day_array})
    lt_df['species'] = 'lt'
    
    total_df = pd.concat([bc_df, lt_df]).reset_index(drop=True)
    
    return total_df

#### 데이터프레임 생성 => "total_df"

In [7]:
TRAIN_FOLDER = '/content/drive/MyDrive/ds_study/data1/open/train_dataset/'  # 구글 코랩 기준 경로
TEST_FOLDER = '/content/drive/MyDrive/ds_study/data1/open/test_dataset/'  # 구글 코랩 기준 경로

total_df = get_dataframe(root_path=TRAIN_FOLDER)
len(total_df), total_df.head()  # 총 753개 데이터 저장 결과 확인

(753,                                           image_path  day species
 0  /content/drive/MyDrive/ds_study/data1/open/tra...    2      bc
 1  /content/drive/MyDrive/ds_study/data1/open/tra...    1      bc
 2  /content/drive/MyDrive/ds_study/data1/open/tra...    3      bc
 3  /content/drive/MyDrive/ds_study/data1/open/tra...    6      bc
 4  /content/drive/MyDrive/ds_study/data1/open/tra...   10      bc)

#### 이미지 사이즈 체크 및 Resize 진행

In [8]:
img = Image.open(total_df['image_path'][0])
print(img.size)
print(img.mode)

(3280, 2464)
RGB


> 이미지 사이즈가 매우 크므로, 코랩 환경 등을 고려하여 모델에 적용할 사이즈로 resize 및 별도 폴더로 저장 후 진행하고자 함

In [None]:
# dir_list = ['BC', 'LT']

# # train_dataset 내 Resize 폴더 생성
# for dir in dir_list:
#   os.mkdir(TRAIN_FOLDER + dir + '_Resize')
#   for sub_dir in os.listdir(TRAIN_FOLDER + dir): # 서브 폴더 생성
#     os.mkdir(TRAIN_FOLDER + dir + '_Resize/' + sub_dir)
#     for image_path in glob(TRAIN_FOLDER + dir + '/' + sub_dir + '/*'): # 이미지 resize 및 저장
#       image_file_name = image_path.split('/')[-1]
#       img = Image.open(image_path)
#       img = img.resize((224, 224))
#       img.save(TRAIN_FOLDER + dir + '_Resize/' + sub_dir + '/' + image_file_name)

# # test_dataset 내 Resize 폴더 생성
# for dir in dir_list:
#   os.mkdir(TEST_FOLDER + dir + '_Resize')
#   for sub_dir in os.listdir(TEST_FOLDER + dir): # 서브 폴더 생성
#     os.mkdir(TEST_FOLDER + dir + '_Resize/' + sub_dir)
#     for image_path in glob(TEST_FOLDER + dir + '/' + sub_dir + '/*'): # 이미지 resize 및 저장
#       image_file_name = image_path.split('/')[-1]
#       img = Image.open(image_path)
#       img = img.resize((224, 224))
#       img.save(TEST_FOLDER + dir + '_Resize/' + sub_dir + '/' + image_file_name)

#### 변경된 경로("Resize") 기준 데이터프레임 다시 생성 => "total_df_v2"

In [9]:
total_df_v2 = get_dataframe(root_path=TRAIN_FOLDER, resize_path=True) # resize 경로로 반영
len(total_df_v2), total_df_v2.head()  # 총 753개 데이터 저장 결과 확인

(753,                                           image_path  day species
 0  /content/drive/MyDrive/ds_study/data1/open/tra...    2      bc
 1  /content/drive/MyDrive/ds_study/data1/open/tra...    1      bc
 2  /content/drive/MyDrive/ds_study/data1/open/tra...    3      bc
 3  /content/drive/MyDrive/ds_study/data1/open/tra...    6      bc
 4  /content/drive/MyDrive/ds_study/data1/open/tra...   10      bc)

In [10]:
# 이미지 사이즈 재확인 => (224, 224)로 반영된 것을 확인함
img = Image.open(total_df_v2['image_path'][0])
print(img.size)
print(img.mode)

(224, 224)
RGB


# 모델 선언
* 우선 baseline 셋팅 완료 후, 다양한 모델을 적용해 평가 및 테스트 진행 예정

In [11]:
from torchvision.models import mobilenet_v2

class CompareCNN(nn.Module):
    
    def __init__(self):
        super(CompareCNN, self).__init__()
        self.mobile_net = mobilenet_v2(pretrained=True)
        self.fc_layer = nn.Linear(1000, 1)
    
    def forward(self, input):
        x = self.mobile_net(input)
        output = self.fc_layer(x)
        return output

class CompareNet(nn.Module):
    
    def __init__(self):
        super(CompareNet, self).__init__()
        self.before_net = CompareCNN()
        self.after_net = CompareCNN()
    
    def forward(self, before_input, after_input):
        before = self.before_net(before_input)
        after = self.after_net(after_input)
        delta = after - before
        return delta

# 학습을 위한 데이터셋 생성

#### 작물별 이미지 조합 및 train, valid 데이터 저장

In [12]:
# 동일 작물 내 샘플링된 2개 이미지별 time_delta 산출한 데이터프레임 생성 함수
def get_combination_df(length, species, df):
  before_image_path = []
  after_image_path = []
  time_delta = []

  for i in range(length):
    sample = df[df['species'] == species].sample(2)
    before = sample[sample['day'] == min(sample['day'])].reset_index(drop=True)
    after = sample[sample['day'] == max(sample['day'])].reset_index(drop=True)

    before_image_path.append(before.iloc[0]['image_path'])
    after_image_path.append(after.iloc[0]['image_path'])
    delta = after.iloc[0]['day'] - before.iloc[0]['day']
    time_delta.append(delta)
  
  combination_df = pd.DataFrame({
      'before_image_path' : before_image_path,
      'after_image_path' : after_image_path,
      'time_delta' : time_delta
  })

  combination_df['species'] = species

  return combination_df

In [13]:
data_length = 5000  # 추출할 조합의 개수
valid_size = 0.1  # validation 데이터 비율
train_data_length = int(data_length*(1-valid_size))

# 이미지 조합 데이터프레임 생성
bc_comb_df = get_combination_df(data_length, 'bc', total_df_v2) # total_df_v2(resize된 이미지) 기준
lt_comb_df = get_combination_df(data_length, 'lt', total_df_v2) # total_df_v2(resize된 이미지) 기준

# train, valid 각각 분리
bc_train = bc_comb_df[:train_data_length]
lt_train = lt_comb_df[:train_data_length]

bc_valid = bc_comb_df[train_data_length:]
lt_valid = lt_comb_df[train_data_length:]

# train_set, valid_set 저장
train_data = pd.concat([bc_train, lt_train])
valid_data = pd.concat([bc_valid, lt_valid])

In [14]:
train_data.tail()

Unnamed: 0,before_image_path,after_image_path,time_delta,species
4495,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,27,lt
4496,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,19,lt
4497,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,6,lt
4498,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,16,lt
4499,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,6,lt


In [15]:
valid_data.head()

Unnamed: 0,before_image_path,after_image_path,time_delta,species
4500,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,8,bc
4501,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,0,bc
4502,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,30,bc
4503,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,7,bc
4504,/content/drive/MyDrive/ds_study/data1/open/tra...,/content/drive/MyDrive/ds_study/data1/open/tra...,32,bc


#### 데이터셋 만들기

In [16]:
# 사용자 정의 데이터셋 클래스
class ImageDataset(Dataset):
  def __init__(self, combination_df, is_test=None):
    self.combination_df = combination_df
    self.transform = transforms.Compose([
      # transforms.Resize(224),  # 이미지 resize 기적용 완료
      transforms.ToTensor()
    ])
    self.is_test = is_test
  
  def __len__(self):
    return len(self.combination_df)

  def __getitem__(self, idx):
    before_image = Image.open(self.combination_df.iloc[idx]['before_image_path'])
    after_image = Image.open(self.combination_df.iloc[idx]['after_image_path'])

    before_image = self.transform(before_image)
    after_image = self.transform(after_image)

    if self.is_test:
      return before_image, after_image
    
    time_delta = self.combination_df.iloc[idx]['time_delta']

    return before_image, after_image, time_delta

In [17]:
train_dataset = ImageDataset(train_data)
valid_dataset = ImageDataset(valid_data)

# 학습 진행

#### 미니 배치 구성

In [18]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=valid_batch_size)

In [19]:
SAVE_FOLDER = '/content/drive/MyDrive/ds_study/save/'

model = CompareNet().to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth


  0%|          | 0.00/13.6M [00:00<?, ?B/s]

In [20]:
for epoch in tqdm(range(epochs)):
  print(f'============ epoch : {epoch} ===============')
  for step, (before_image, after_image, time_delta) in tqdm(enumerate(train_loader)):
    before_image = before_image.to(device)
    after_image = after_image.to(device)
    time_delta = time_delta.to(device)

    optimizer.zero_grad()
    logit = model(before_image, after_image)
    train_loss = torch.sum(torch.abs(logit.squeeze(1).float() - time_delta.float())) / torch.LongTensor([batch_size]).squeeze(0).to(device)
    train_loss.backward()
    optimizer.step()

    if step % 15 == 0:
      print(f'------------ step : {step} ------------')
      print('MAE_loss :', train_loss.detach().cpu().numpy())
  
  valid_losses = []
  with torch.no_grad():
    for valid_before, valid_after, time_delta in tqdm(valid_loader):
      valid_before = valid_before.to(device)
      valid_after = valid_after.to(device)
      valid_time_delta = time_delta.to(device)


      logit = model(valid_before, valid_after)
      valid_loss = torch.sum(torch.abs(logit.squeeze(1).float() - valid_time_delta.float())) / torch.LongTensor([valid_batch_size]).squeeze(0).to(device)
      valid_losses.append(valid_loss.detach().cpu())


  print(f'VALIDATION_LOSS MAE : {sum(valid_losses)/len(valid_losses)}')

  checkpoint = {
      'model' : model.state_dict(),
      'optimizer' : optimizer.state_dict()
  }

  torch.save(checkpoint, SAVE_FOLDER + f'checkpoint_{epoch}.pt')

  0%|          | 0/10 [00:00<?, ?it/s]



0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 13.082825
------------ step : 15 ------------
MAE_loss : 4.674914
------------ step : 30 ------------
MAE_loss : 2.5435517
------------ step : 45 ------------
MAE_loss : 3.639538
------------ step : 60 ------------
MAE_loss : 1.7493811
------------ step : 75 ------------
MAE_loss : 2.5853605
------------ step : 90 ------------
MAE_loss : 2.6660042
------------ step : 105 ------------
MAE_loss : 2.769632
------------ step : 120 ------------
MAE_loss : 1.5636876
------------ step : 135 ------------
MAE_loss : 1.7426953


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.0679173469543457


0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 1.5244117
------------ step : 15 ------------
MAE_loss : 1.3351536
------------ step : 30 ------------
MAE_loss : 1.8722606
------------ step : 45 ------------
MAE_loss : 2.6144192
------------ step : 60 ------------
MAE_loss : 1.842883
------------ step : 75 ------------
MAE_loss : 1.8866905
------------ step : 90 ------------
MAE_loss : 1.4344754
------------ step : 105 ------------
MAE_loss : 2.1026368
------------ step : 120 ------------
MAE_loss : 1.2495471
------------ step : 135 ------------
MAE_loss : 1.6520405


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.8441156148910522


0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 1.019896
------------ step : 15 ------------
MAE_loss : 1.1014469
------------ step : 30 ------------
MAE_loss : 3.1035023
------------ step : 45 ------------
MAE_loss : 1.6070526
------------ step : 60 ------------
MAE_loss : 1.3035302
------------ step : 75 ------------
MAE_loss : 0.9510591
------------ step : 90 ------------
MAE_loss : 0.9936441
------------ step : 105 ------------
MAE_loss : 2.1424725
------------ step : 120 ------------
MAE_loss : 1.5030007
------------ step : 135 ------------
MAE_loss : 1.0472808


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.546873927116394


0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 1.2390286
------------ step : 15 ------------
MAE_loss : 1.9476026
------------ step : 30 ------------
MAE_loss : 1.288055
------------ step : 45 ------------
MAE_loss : 1.0612323
------------ step : 60 ------------
MAE_loss : 2.2502165
------------ step : 75 ------------
MAE_loss : 1.3730347
------------ step : 90 ------------
MAE_loss : 1.1956897
------------ step : 105 ------------
MAE_loss : 1.0570562
------------ step : 120 ------------
MAE_loss : 3.4486265
------------ step : 135 ------------
MAE_loss : 0.96008


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.629639983177185


0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 0.90636694
------------ step : 15 ------------
MAE_loss : 1.8359201
------------ step : 30 ------------
MAE_loss : 1.6875811
------------ step : 45 ------------
MAE_loss : 1.0113939
------------ step : 60 ------------
MAE_loss : 1.0346506
------------ step : 75 ------------
MAE_loss : 1.9809947
------------ step : 90 ------------
MAE_loss : 1.2224717
------------ step : 105 ------------
MAE_loss : 0.8910876
------------ step : 120 ------------
MAE_loss : 1.7799196
------------ step : 135 ------------
MAE_loss : 1.1452832


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.896206259727478


0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 1.0342519
------------ step : 15 ------------
MAE_loss : 1.8436443
------------ step : 30 ------------
MAE_loss : 1.0619283
------------ step : 45 ------------
MAE_loss : 1.1290756
------------ step : 60 ------------
MAE_loss : 0.9839947
------------ step : 75 ------------
MAE_loss : 1.6280063
------------ step : 90 ------------
MAE_loss : 0.9871635
------------ step : 105 ------------
MAE_loss : 1.1663479
------------ step : 120 ------------
MAE_loss : 1.1904757
------------ step : 135 ------------
MAE_loss : 1.9291267


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.5291171073913574


0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 1.0360467
------------ step : 15 ------------
MAE_loss : 0.79357505
------------ step : 30 ------------
MAE_loss : 2.2964067
------------ step : 45 ------------
MAE_loss : 1.0783143
------------ step : 60 ------------
MAE_loss : 3.634296
------------ step : 75 ------------
MAE_loss : 0.93908393
------------ step : 90 ------------
MAE_loss : 1.8377922
------------ step : 105 ------------
MAE_loss : 1.2119696
------------ step : 120 ------------
MAE_loss : 2.0671504
------------ step : 135 ------------
MAE_loss : 0.80496585


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.4445154666900635


0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 0.9661953
------------ step : 15 ------------
MAE_loss : 3.867584
------------ step : 30 ------------
MAE_loss : 1.3630915
------------ step : 45 ------------
MAE_loss : 1.2790458
------------ step : 60 ------------
MAE_loss : 0.9548712
------------ step : 75 ------------
MAE_loss : 1.7446543
------------ step : 90 ------------
MAE_loss : 2.4975781
------------ step : 105 ------------
MAE_loss : 1.2753758
------------ step : 120 ------------
MAE_loss : 1.2497267
------------ step : 135 ------------
MAE_loss : 1.2052004


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.293242335319519


0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 0.90552557
------------ step : 15 ------------
MAE_loss : 1.2165716
------------ step : 30 ------------
MAE_loss : 2.2330322
------------ step : 45 ------------
MAE_loss : 1.0742078
------------ step : 60 ------------
MAE_loss : 1.088738
------------ step : 75 ------------
MAE_loss : 0.7983378
------------ step : 90 ------------
MAE_loss : 0.74156255
------------ step : 105 ------------
MAE_loss : 0.8747442
------------ step : 120 ------------
MAE_loss : 1.7842927
------------ step : 135 ------------
MAE_loss : 0.91689646


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.3216627836227417


0it [00:00, ?it/s]

------------ step : 0 ------------
MAE_loss : 0.760231
------------ step : 15 ------------
MAE_loss : 0.9761988
------------ step : 30 ------------
MAE_loss : 1.8061323
------------ step : 45 ------------
MAE_loss : 0.9069492
------------ step : 60 ------------
MAE_loss : 1.1414512
------------ step : 75 ------------
MAE_loss : 0.8783419
------------ step : 90 ------------
MAE_loss : 1.1235065
------------ step : 105 ------------
MAE_loss : 1.4616439
------------ step : 120 ------------
MAE_loss : 1.0186536
------------ step : 135 ------------
MAE_loss : 0.7057423


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.3818230628967285


# 테스트 데이터 기준 예측

In [27]:
# 테스트 데이터 csv파일 불러오기 및 데이터셋 생성
test_set = pd.read_csv(TEST_FOLDER + 'test_data.csv')
test_set['l_root'] = test_set['before_file_path'].map(lambda x: TEST_FOLDER + x.split('_')[1] + '_Resize/' + x.split('_')[2])
test_set['r_root'] = test_set['after_file_path'].map(lambda x: TEST_FOLDER + x.split('_')[1] + '_Resize/' + x.split('_')[2])
test_set['before_image_path'] = test_set['l_root'] + '/' + test_set['before_file_path'] + '.png'
test_set['after_image_path'] = test_set['r_root'] + '/' + test_set['after_file_path'] + '.png'
test_dataset = ImageDataset(test_set, is_test=True)
test_data_loader = DataLoader(test_dataset, batch_size=batch_size)

test_data_loader

<torch.utils.data.dataloader.DataLoader at 0x7f03ffab1810>

In [29]:
# 예측결과 저장
test_value = []
with torch.no_grad():
  for test_before, test_after in tqdm(test_data_loader):
    test_before = test_before.to(device)
    test_after = test_after.to(device)
    logit = model(test_before, test_after)
    value = logit.squeeze(1).detach().cpu().float()
    
    test_value.extend(value)

  0%|          | 0/62 [00:00<?, ?it/s]

In [46]:
len(test_value), test_value[:5] # 3960건 예측결과 저장 확인

(3960,
 [tensor(23.2113),
  tensor(24.2531),
  tensor(3.0924),
  tensor(7.3620),
  tensor(27.8971)])

In [41]:
# 제출양식에 반영
sub = pd.read_csv('/content/drive/MyDrive/ds_study/data1/open/sample_submission.csv')
sub['time_delta'] = np.array(test_value)
sub.head()

Unnamed: 0,idx,time_delta
0,0,23.211262
1,1,24.253082
2,2,3.092441
3,3,7.361996
4,4,27.897129


In [42]:
# 예측결과 csv파일 저장
sub.to_csv('/content/drive/MyDrive/ds_study/submission_v0.5.csv', index=False)