# 라이브러리 호출

In [1]:
!pip install einops
!pip install optuna
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.metrics import mean_squared_error
import os
import pickle
import random
import torch
import torch.nn.functional as F
from torch.nn.utils import weight_norm
import math
import sys
import argparse
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from tqdm import tqdm
import json
import copy
import torchvision.ops

Collecting einops
  Downloading einops-0.7.0-py3-none-any.whl (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: einops
Successfully installed einops-0.7.0
Collecting optuna
  Downloading optuna-3.5.0-py3-none-any.whl (413 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m413.4/413.4 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.1-py3-none-any.whl (233 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from optuna)
  Downloading colorlog-6.8.0-py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.0-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
Ins

# 깃 클론

In [2]:
!git clone 'https://github.com/edwardhan925192/TimesNet.git'
%cd /content/TimesNet

from times_model import Model
from whole_dataset import TimeSeriesDataset,TimeSeries_ValDataset,TimeSeries_TestDataset
from schedular.scheduler import initialize_scheduler

Cloning into 'TimesNet'...
remote: Enumerating objects: 844, done.[K
remote: Counting objects: 100% (385/385), done.[K
remote: Compressing objects: 100% (225/225), done.[K
remote: Total 844 (delta 221), reused 270 (delta 156), pack-reused 459[K
Receiving objects: 100% (844/844), 281.87 KiB | 1.61 MiB/s, done.
Resolving deltas: 100% (482/482), done.
/content/TimesNet


# 시드 설정

In [3]:
seed = 0
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)  # for multi-GPU.
np.random.seed(seed)
random.seed(seed)  # Replace 42 with your chosen seed number

# 함수

In [4]:
from pandas.tseries.offsets import YearEnd
from tqdm import tqdm

# validation 스플릿
def split_train_validation_timeseries(df, validation_ranges):
    '''
    takes list of validation range,
    Ex)   [(800, 900), (900, 1000)]

    returns rows previous of validation range and validation range.
    '''
    train_dfs = []
    validation_dfs = []

    for val_range in validation_ranges:
        # Ensure the range is valid
        start, end = val_range
        if start >= end or end > len(df):
            raise ValueError("Invalid range: {}".format(val_range))

        # Split the DataFrame
        validation_df = df.iloc[start:end]
        train_df = df.iloc[:start]

        train_dfs.append(train_df)
        validation_dfs.append(validation_df)

    return train_dfs, validation_dfs

# 결측치 채우기
def fill_missing_with_ratio(df, ratio, column2, column1):
    """
    If column 1 is missing, fill it by multiplying column 2 with the ratio.
    If column 2 is missing, fill it by dividing column 1 by the ratio.

    column 2 is bigger column

    Parameters:
    df (pd.DataFrame): The DataFrame to process.
    ratio (float): The ratio to be used for filling missing values.
    column1 (str): The name of the first column.
    column2 (str): The name of the second column.

    Returns:
    pd.DataFrame: DataFrame with filled values.
    """
    if column1 not in df or column2 not in df:
        raise ValueError("Specified columns must be in the DataFrame")

    # If column1 is missing, fill it by multiplying column2 with the ratio
    df[column1] = df.apply(lambda row: row[column2] * ratio if pd.isna(row[column1]) else row[column1], axis=1)

    # If column2 is missing, fill it by dividing column1 by the ratio
    df[column2] = df.apply(lambda row: row[column1] / ratio if pd.isna(row[column2]) else row[column2], axis=1)

    return df

# 결측치 채우기2
def interpolate_columns(df, columns):
    """
    Interpolates missing values in specified columns of a DataFrame.

    Parameters:
    df (pd.DataFrame): The DataFrame to process.
    columns (list of str): List of column names in which to interpolate missing values.

    Returns:
    pd.DataFrame: DataFrame with interpolated values in the specified columns.
    """
    for column in columns:
        if column in df.columns:
            df[column] = df[column].interpolate(method='linear', limit_direction='forward', axis=0)
        else:
            print(f"Column '{column}' not found in DataFrame.")
    return df

def convert_datetime_column(df, column_name):
    # Define a helper function to extract date-time components and differentiate between weekends and weekdays
    def extract_date_time_components(datetime_str):
        # Check if the hour component is present in the datetime string
        if ':' in datetime_str or ' ' in datetime_str and len(datetime_str.split(' ')[1]) > 0:
            dt_format = '%Y-%m-%d %H'
        else:
            dt_format = '%Y-%m-%d'

        dt_obj = datetime.strptime(datetime_str, dt_format)
        weekday = dt_obj.weekday()

        # Classify as 'Weekend' or 'Weekday'
        day_type = 'Weekend' if weekday in [5, 6] else 'Weekday'

        # Return components based on the presence of the hour component
        if dt_format == '%Y-%m-%d %H':
            return dt_obj.year, dt_obj.month, dt_obj.day, dt_obj.hour, day_type
        else:
            return dt_obj.year, dt_obj.month, dt_obj.day, None, day_type

    # Apply the helper function to the specified column
    components = df[column_name].apply(extract_date_time_components)

    # Unpack components and create columns conditionally
    df['Year'], df['Month'], df['Day'], hour_component, df['DayType'] = zip(*components)
    if any(hour is not None for hour in hour_component):
        df['Hour'] = hour_component

    return df


columns_info = {
    'Month': 12,
    'Hour': 23,
    'Day':31
}

def fill_column_from_dfB_to_dfA(dfA, dfB, column_name):
    """
    Fill values of a column in dfA with values from the same column in dfB.

    Parameters:
    dfA (pd.DataFrame): The dataframe to be updated.
    dfB (pd.DataFrame): The dataframe providing the new values.
    column_name (str): The name of the column to be updated.
    """
    # Check if the column exists in both dataframes
    if column_name in dfA.columns and column_name in dfB.columns:
        # Determine the number of rows to update
        num_rows_to_update = min(len(dfA), len(dfB))

        # Update the values in dfA from dfB
        dfA.loc[:num_rows_to_update - 1, column_name] = dfB.loc[:num_rows_to_update - 1, column_name]
    else:
        raise ValueError("Column not found in one or both dataframes")

    return dfA

# 데이터 결측치 처리

In [5]:
from validation_split import split_train_validation_timeseries
train = pd.read_csv('train path')
sample_submission = pd.read_csv('sample submission path')

train = convert_datetime_column(train, '일시')
test = convert_datetime_column(sample_submission.drop('평균기온',axis = 1), '일시')
train = train.drop(['일시','DayType'],axis = 1)
test = test.drop(['일시','DayType'],axis = 1)

# 최고기온, 최저기온, 일교차 , 평균풍속 결측치
train_ = interpolate_columns(train,['최고기온','최저기온', '일교차' , '평균풍속'])

# 강수량 결측치
train_['강수량'] = train_['강수량'].fillna(0)

# 일조합 일조율 결측치
ratio = train[train['일조율'] > 0]['일조율'].sum() / train[train['일조율'] > 0]['일조합'].sum()
train_ = fill_missing_with_ratio(train_,ratio,'일조합','일조율')

# 일사합 결측치
train_ = train_.drop('일사합',axis =1 )
train_ = train_.drop(['평균풍속','일교차','평균풍속','일조율','Year','Month','Day'],axis = 1)
train_ = train_[['평균기온','최고기온','최저기온']]

# 겨울 모델 및 스케줄러 파라미터 설정

In [6]:
# 스케줄러 설정
class SchedulerConfig:
    def __init__(self):
        # STARTS from lr goes down to eta_min in T_0
        self.CosineAnnealingWarmRestarts = {'T_0': 10, 'T_mult': 1, 'eta_min': 0.0005}
        self.StepLR = {'step_size': 10, 'gamma': 0.1}
        self.ExponentialLR = {'gamma': 0.95}

        #steps_per_epoch should be set to number of batches , epochs should be total number of epochs
        # Starts from low lr to max lr
        self.OneCycleLR = {'max_lr': 0.01, 'steps_per_epoch': 10, 'epochs': 20}

        self.CyclicLR = {'base_lr': 0.001, 'max_lr': 0.01, 'step_size_up': 5,'step_size_down':5,  'mode': 'triangular'}

    def get_params(self, scheduler_name):
        return getattr(self, scheduler_name, None)

num_features = 3
target_name = '평균기온'

# 모델 파라미터 설정
class Config:
    def __init__(self):
        self.task_name = 'short_term_forecast'
        self.seq_len = 365
        self.window_shift = 1
        self.enc_in = num_features
        self.d_model = 22
        self.top_k = 3
        self.d_ff = 22
        self.num_kernels = 6
        self.dropout = 0.24915726313968972
        self.e_layers = 1
        self.label_len = num_features
        self.target_col = target_name
        self.cnn_type = 'inceptionv1'
        self.pred_len = 358
        self.c_out = 1
        self.eval_range = 0
        self.seq_range = np.concatenate([np.arange(0, 45), np.arange(317, 358)])
        self.scheduler_config = SchedulerConfig()
        self.scheduler_name = 'CosineAnnealingWarmRestarts'
        self.scheduler_update_type = 'epoch'
    def update(self, new_params):
        for key, value in new_params.items():
            if hasattr(self, key):
                setattr(self, key, value)
winter_configs = Config()

# 봄,여름,가을 모델 및 스케줄러 파라미터 설정

In [7]:
class SchedulerConfig:
    def __init__(self):
        # STARTS from lr goes down to eta_min in T_0
        self.CosineAnnealingWarmRestarts = {'T_0': 10, 'T_mult': 1, 'eta_min': 0.0005}
        self.StepLR = {'step_size': 10, 'gamma': 0.1}
        self.ExponentialLR = {'gamma': 0.95}

        #steps_per_epoch should be set to number of batches , epochs should be total number of epochs
        # Starts from low lr to max lr
        self.OneCycleLR = {'max_lr': 0.01, 'steps_per_epoch': 10, 'epochs': 20}

        self.CyclicLR = {'base_lr': 0.001, 'max_lr': 0.01, 'step_size_up': 5,'step_size_down':5,  'mode': 'triangular'}

    def get_params(self, scheduler_name):
        return getattr(self, scheduler_name, None)

num_features = 3
target_name = '평균기온'

class Config:
    def __init__(self):
        self.task_name = 'short_term_forecast'
        # Output features and c_out should be the same when the task is anomaly_detection
        self.seq_len = 365
        self.window_shift = 1
        self.enc_in = num_features    # Features
        self.d_model = 20     # Convolution Embedding dimension AFTER RESHAPING
        self.top_k = 3        # FFT frequency
        self.d_ff = 20       # Convolution Output layer dimension AFTER RESHAPING
        self.num_kernels = 6  # inception block Num of different grid cells used / If using dcvn set it to 3
        self.dropout = 0.1933493411095017    # Dropout rate
        self.e_layers = 1     # num Timeblock
        self.label_len = num_features   # Features
        self.target_col = target_name   # Name of target column
        self.cnn_type = 'inceptionv1' # dcvn (KERNEL = 3), inceptionv1, inceptionv2, res_dcvn, res_inceptionv1, res_inceptionv2
        self.pred_len = 358   # Prediction length
        self.c_out = 1        # Output feature
        self.eval_range = 0
        self.seq_range = np.arange(45, 317)
        self.scheduler_config = SchedulerConfig()
        self.scheduler_name = 'CosineAnnealingWarmRestarts' #'CosineAnnealingWarmRestarts', 'StepLR', 'ExponentialLR', 'OneCycleLR', 'CyclicLR'
        self.scheduler_update_type = 'epoch' # epoch, batch
    def update(self, new_params):
        for key, value in new_params.items():
            if hasattr(self, key):
                setattr(self, key, value)

xwinter_configs = Config()

# 훈련 파라미터 설정

In [8]:
df_train = train_
target_col = None
learning_rate = 0.01
num_epochs = 10
batch_sizes = 30
configs = winter_configs #winter_configs, xwinter_configs
df_test = train_

# 겨울 모델 트레이닝

In [9]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model_state = None

training_loss_history = []

col_list = list(df_train.columns)
target_index = col_list.index(target_col) if target_col in col_list else -1

criterion = nn.L1Loss()

model = Model(configs).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = initialize_scheduler(optimizer, configs)

train_dataset = TimeSeriesDataset(df_train, configs.seq_len, configs.pred_len, configs.seq_range, configs.eval_range)
train_loader = DataLoader(train_dataset, batch_size=batch_sizes, shuffle=False)

for epoch in range(num_epochs):

    model.train()
    total_loss = 0

    for batch_idx, (batch_data, batch_target) in enumerate(tqdm(train_loader, desc=f"Epoch {epoch + 1}/{num_epochs}")):
        batch_data, batch_target = batch_data.to(device), batch_target.to(device)
        optimizer.zero_grad()
        outputs = model(batch_data)

        loss = criterion(outputs, batch_target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    scheduler.step()

    average_training_loss = total_loss / len(train_loader)
    print(f'Epoch {epoch + 1}/{num_epochs}, Training_Loss = {average_training_loss}')
model_state = copy.deepcopy(model.state_dict())

test_dataset = TimeSeries_TestDataset(df_test, configs.seq_len)
test_loader = DataLoader(test_dataset, batch_size=batch_sizes, shuffle=False)

model.eval()

with torch.no_grad():
    for batch_test_data in test_loader:
        batch_test_data = batch_test_data.to(device)
        outputs = model(batch_test_data)

winter_pred = outputs

Epoch 1/1: 100%|██████████| 743/743 [01:27<00:00,  8.54it/s]

Epoch 1/1, Training_Loss = 3.709547520646497





# 봄,여름,가을 모델 트레이닝

In [10]:
configs = xwinter_configs

device = 'cuda' if torch.cuda.is_available() else 'cpu'

model_state = None

training_loss_history = []

col_list = list(df_train.columns)
target_index = col_list.index(target_col) if target_col in col_list else -1

criterion = nn.L1Loss()

model = Model(configs).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = initialize_scheduler(optimizer, configs)

train_dataset = TimeSeriesDataset(df_train, configs.seq_len, configs.pred_len, configs.seq_range, configs.eval_range)
train_loader = DataLoader(train_dataset, batch_size=batch_sizes, shuffle=False)

for epoch in range(num_epochs):

    model.train()
    total_loss = 0

    for batch_idx, (batch_data, batch_target) in enumerate(tqdm(train_loader, desc=f"Epoch {epoch + 1}/{num_epochs}")):
        batch_data, batch_target = batch_data.to(device), batch_target.to(device)
        optimizer.zero_grad()
        outputs = model(batch_data)

        loss = criterion(outputs, batch_target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    scheduler.step()

    average_training_loss = total_loss / len(train_loader)
    print(f'Epoch {epoch + 1}/{num_epochs}, Training_Loss = {average_training_loss}')
model_state = copy.deepcopy(model.state_dict())

test_dataset = TimeSeries_TestDataset(df_test, configs.seq_len)
test_loader = DataLoader(test_dataset, batch_size=batch_sizes, shuffle=False)

model.eval()

with torch.no_grad():
    for batch_test_data in test_loader:
        batch_test_data = batch_test_data.to(device)
        outputs = model(batch_test_data)


xwinter_pred = outputs

Epoch 1/1: 100%|██████████| 743/743 [01:29<00:00,  8.34it/s]


Epoch 1/1, Training_Loss = 3.324030128333963


# 계절별 모델 합치기

In [13]:
first = winter_pred[0][:45].cpu()
third = winter_pred[0][45:].cpu()

second = xwinter_pred[0].cpu()

full_pred = np.concatenate([first.numpy(), second.numpy(), third.numpy()], axis=0)

# 제출

In [None]:
result = pd.DataFrame(full_pred)
result.columns = ['평균기온']
submission_A = fill_column_from_dfB_to_dfA(sample_submission,result,'평균기온')
submission_A.to_csv('submission.csv',index = False)

# Private 점수 복원

In [None]:
# 모델 웨이트 저장공간
# https://drive.google.com/drive/folders/1uwKoVtbX5djdrf6e4pmWj4t64LzHv9BU?usp=drive_link

In [15]:
winter_model_path = '/content/timesnet_o_winter'
xwinter_model_path = '/content/timesnet_x_winter.pth'

winter_model_dict = torch.load(winter_model_path)
xwinter_model_dict = torch.load(xwinter_model_path)

In [16]:
configs = winter_configs
model = Model(configs).to(device)

model.load_state_dict(winter_model_dict)

model.eval()

# 데이터 준비
test_dataset = TimeSeries_TestDataset(df_test, configs.seq_len)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# 예측
with torch.no_grad():
    for batch_test_data in test_loader:
        batch_test_data = batch_test_data.to(device)
        outputs = model(batch_test_data)

        winter_predictions = outputs


In [17]:
configs = xwinter_configs
model = Model(configs).to(device)
model.load_state_dict(xwinter_model_dict)

model.eval()

# 데이터 준비
test_dataset = TimeSeries_TestDataset(df_test, configs.seq_len)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# 예측
with torch.no_grad():
    for batch_test_data in test_loader:
        batch_test_data = batch_test_data.to(device)
        outputs = model(batch_test_data)

        xwinter_predictions = outputs

# 계절별 복원값 합치기

In [None]:
first = winter_predictions[0][:45].cpu()
third = winter_predictions[0][45:].cpu()

second = xwinter_predictions[0].cpu()

bokwon = np.concatenate([first.numpy(), second.numpy(), third.numpy()], axis=0)
bokwon