# 2023 제1회 철도 인공지능 경진대회
- 주어진 주행데이터 및 선로데이터를 이용하여, 탈선계수에 해당하는 이하 4개 항목을 예측하는 모델을 만듭니다.
    - YL_M1_B1_W1: 좌측 전위 차륜 탈선계수
    - YR_M1_B1_W1: 우측 전위 차륜 탈선계수
    - YL_M1_B1_W2: 좌측 후위 차륜 탈선계수
    - YR_M1_B1_W2: 우측 후위 차륜 탈선계수

In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from tqdm.notebook import tqdm
import time

In [2]:
data_c30 = pd.read_csv('./data/data_c30.csv')
data_c40 = pd.read_csv('./data/data_c40.csv')
data_c50 = pd.read_csv('./data/data_c50.csv')
data_c70 = pd.read_csv('./data/data_c70.csv')
data_c100 = pd.read_csv('./data/data_c100.csv')

lane_data_c = pd.read_csv('./data/lane_data_c.csv')

data_s30 = pd.read_csv('./data/data_s30.csv')
data_s40 = pd.read_csv('./data/data_s40.csv')
data_s50 = pd.read_csv('./data/data_s50.csv')
data_s70 = pd.read_csv('./data/data_s70.csv')
data_s100 = pd.read_csv('./data/data_s100.csv')

lane_data_s = pd.read_csv('./data/lane_data_s.csv')

answer_sample = pd.read_csv('./data/answer_sample.csv')

In [4]:
#train data
s_datas = {
    's30':[data_s30, lane_data_s],
    's40':[data_s40, lane_data_s],
    's50':[data_s50, lane_data_s],
    's70':[data_s70, lane_data_s],
    's100':[data_s100, lane_data_s]
    }

c_datas = {
    'c30':[data_c30, lane_data_c],
    'c40':[data_c40, lane_data_c],
    'c50':[data_c50, lane_data_c],
    'c70':[data_c70, lane_data_c],
    'c100':[data_c100, lane_data_c]
    }

#answer를 위한 data
test_data = {
    's30':None,
    's40':None,
    's50':None,
    's70':None,
    's100':None,
    'c30':None,
    'c40':None,
    'c50':None,
    'c70':None,
    'c100':None,
    }

# 하이퍼파라미터 설정
#TEST0: LSTM        epoch 100, hidden_size 64, num_layers 2, seq_length 100
#TEST1: GRU         epoch 100, hidden_size 64, num_layers 2, seq_length 100
#TEST2: LSTMGRU     epoch 100, lstm_hidden_size 64, lstm_num_layers 2, gru_hidden_size 64, gru_num_layers 2, seq_length 100  
#TEST2: LSTMGRU     epoch 100, lstm_hidden_size 32, lstm_num_layers 2, gru_hidden_size 32, gru_num_layers 2, seq_length 100  -> 0.0014

hyperparameter = {
        'test_number':0,
        'load_number':0,
        'flag':False,
        's_input_size':36,
        'c_input_size':34,
        'output_size':4,
        'hidden_size':128,
        'num_layer':2,
        'epochs':40,
        'learning_rate':0.001,
        'seq_length': 120,
        'batch_size': 100
    }

model_loss = {
    's30_1':None,
    's40_1':None,
    's50_1':None,
    's70_1':None,
    's100_1':None,
    'c30_1':None,
    'c40_1':None,
    'c50_1':None,
    'c70_1':None,
    'c100_1':None,

    's30_2':None,
    's40_2':None,
    's50_2':None,
    's70_2':None,
    's100_2':None,
    'c30_2':None,
    'c40_2':None,
    'c50_2':None,
    'c70_2':None,
    'c100_2':None,

    's30_3':None,
    's40_3':None,
    's50_3':None,
    's70_3':None,
    's100_3':None,
    'c30_3':None,
    'c40_3':None,
    'c50_3':None,
    'c70_3':None,
    'c100_3':None,
}

In [12]:
# LSTM 모델 정의
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:,:,:])  # 출력 레이어의 크기를 output_size로 변경
        return out
    
#GRU 모델 정의
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(GRUModel, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        out, _ = self.gru(x)
        out = self.fc(out)  # 마지막 시간 단계의 출력만 사용
        return out

#LSTM GRU 앙상블 모델 정의
class LSTMGRUModel(nn.Module):
    def __init__(self, input_size, hidden_size_lstm, num_layers_lstm, hidden_size_gru, num_layers_gru, output_size):
        super(LSTMGRUModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size_lstm, num_layers_lstm, batch_first=True)
        self.gru = nn.GRU(hidden_size_lstm, hidden_size_gru, num_layers_gru, batch_first=True)
        self.fc = nn.Linear(hidden_size_gru, output_size)
    
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        gru_out, _ = self.gru(lstm_out)
        out = self.fc(gru_out)  # 마지막 시간 단계의 출력만 사용
        return out

class BILSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, bidirectional=True, dropout_prob=0.2):
        super(BILSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional, dropout=dropout_prob)
        num_directions = 2 if bidirectional else 1
        self.fc = nn.Linear(hidden_size * num_directions, output_size)
    
    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out)  # 출력 레이어의 크기를 output_size로 변경
        return out
    
class BILSTMGRUModel(nn.Module):
    def __init__(self, input_size, hidden_size_lstm, num_layers_lstm, hidden_size_gru, num_layers_gru, output_size, bidirectional=True, dropout_prob=0.2):
        super(BILSTMGRUModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size_lstm, num_layers_lstm, batch_first=True, bidirectional=bidirectional, dropout=dropout_prob)   
        num_directions = 2 if bidirectional else 1
        self.gru = nn.GRU(hidden_size_lstm, hidden_size_gru, num_layers_gru, batch_first=True)
        self.fc = nn.Linear(hidden_size_gru * num_directions, output_size)
    
    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out)  # 출력 레이어의 크기를 output_size로 변경
        return out
    
class BiLSTMWithCNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, bidirectional=True, dropout_prob=0.2, cnn_kernel_size=3, cnn_out_channels=64):
        super(BiLSTMWithCNN, self).__init__()
        
        # Bidirectional LSTM
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional, dropout=dropout_prob)
        lstm_output_size = hidden_size * (2 if bidirectional else 1)
        
        # 1D-CNN layer
        self.cnn = nn.Conv1d(in_channels=lstm_output_size, out_channels=cnn_out_channels, kernel_size=cnn_kernel_size)
        
        # Fully connected layer
        self.fc = nn.Linear(cnn_out_channels, output_size)
    
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        lstm_out = lstm_out.permute(0, 1, 2)  # LSTM 출력의 차원을 (배치, 시퀀스 길이, hidden_size)에서 (배치, hidden_size, 시퀀스 길이)로 변경
        cnn_out = self.cnn(lstm_out)
        cnn_out = torch.max(cnn_out, dim=1)[0]  # 시퀀스 길이 방향으로 최대 풀링
        fc_out = self.fc(cnn_out)
        return fc_out

In [13]:
def create_tensor(datas, sequence_length =100, stride=10):
    train_data = []

    for interval, data in datas.items():
        
        features = pd.merge(data[0].iloc[:10001, :31], data[1].iloc[:10001], on='Distance', how='inner').values
        targets = data[0].iloc[:10001, 31:].values
        test_features = pd.merge(data[0].iloc[10001:, :31], data[1].iloc[10001:], on='Distance', how='inner').values

        features_tensor = torch.tensor(features, dtype=torch.float32).unfold(0, sequence_length, stride).permute(0,2,1)
        targets_tensor = torch.tensor(targets, dtype=torch.float32).unfold(0, sequence_length, stride).permute(0,2,1)
        test_data[interval] = torch.tensor(test_features, dtype=torch.float32).unsqueeze(1).expand(-1, sequence_length, -1)

        print(features_tensor.shape, targets_tensor.shape)
        train_dataset = TensorDataset(features_tensor, targets_tensor)
        train_loader = DataLoader(dataset=train_dataset, batch_size=hyperparameter['batch_size'])
        train_data.append(train_loader)

    return train_data


In [14]:
s_train_data = create_tensor(s_datas)
c_train_data = create_tensor(c_datas)

torch.Size([991, 100, 36]) torch.Size([991, 100, 4])
torch.Size([991, 100, 36]) torch.Size([991, 100, 4])
torch.Size([991, 100, 36]) torch.Size([991, 100, 4])
torch.Size([991, 100, 36]) torch.Size([991, 100, 4])
torch.Size([991, 100, 36]) torch.Size([991, 100, 4])
torch.Size([991, 100, 34]) torch.Size([991, 100, 4])
torch.Size([991, 100, 34]) torch.Size([991, 100, 4])
torch.Size([991, 100, 34]) torch.Size([991, 100, 4])
torch.Size([991, 100, 34]) torch.Size([991, 100, 4])
torch.Size([991, 100, 34]) torch.Size([991, 100, 4])


In [15]:
s_model1 = LSTMModel(hyperparameter['s_input_size'], hyperparameter['hidden_size'], hyperparameter['num_layer'], hyperparameter['output_size'])
s_model2 = GRUModel(hyperparameter['s_input_size'], hyperparameter['hidden_size'], hyperparameter['num_layer'], hyperparameter['output_size'])
s_model3 = LSTMGRUModel(hyperparameter['s_input_size'], hyperparameter['hidden_size'], hyperparameter['num_layer'], hyperparameter['hidden_size'], hyperparameter['num_layer'], hyperparameter['output_size'])
s_model4 = BILSTMModel(hyperparameter['s_input_size'], hyperparameter['hidden_size'], hyperparameter['num_layer'], hyperparameter['output_size'])
s_model5 = BILSTMGRUModel(hyperparameter['s_input_size'], hyperparameter['hidden_size'], hyperparameter['num_layer'], hyperparameter['hidden_size'], hyperparameter['num_layer'], hyperparameter['output_size'])
s_model6 = BiLSTMWithCNN(hyperparameter['s_input_size'], hyperparameter['hidden_size'], hyperparameter['num_layer'], hyperparameter['output_size'])

c_model1 = LSTMModel(hyperparameter['c_input_size'], hyperparameter['hidden_size'], hyperparameter['num_layer'], hyperparameter['output_size'])
c_model2 = GRUModel(hyperparameter['c_input_size'], hyperparameter['hidden_size'], hyperparameter['num_layer'], hyperparameter['output_size'])
c_model3 = LSTMGRUModel(hyperparameter['c_input_size'], hyperparameter['hidden_size'], hyperparameter['num_layer'], hyperparameter['hidden_size'], hyperparameter['num_layer'], hyperparameter['output_size'])
c_model4 = BILSTMModel(hyperparameter['c_input_size'], hyperparameter['hidden_size'], hyperparameter['num_layer'], hyperparameter['output_size'])
c_model5 = BILSTMGRUModel(hyperparameter['c_input_size'], hyperparameter['hidden_size'], hyperparameter['num_layer'], hyperparameter['hidden_size'], hyperparameter['num_layer'], hyperparameter['output_size'])
c_model6 = BiLSTMWithCNN(hyperparameter['c_input_size'], hyperparameter['hidden_size'], hyperparameter['num_layer'], hyperparameter['output_size'])


In [17]:
#모델 학습 -> s, c 별로 1,3 번 모델 학습

cnt = 4
model_list = [(s_model1, c_model1),(s_model2, c_model2),(s_model3, c_model3),(s_model4, c_model4),(s_model5, c_model5),(s_model6, c_model6)]

for models in model_list[cnt:]:
    for train_data in [s_train_data,c_train_data]:

        if train_data == s_train_data:
            st = 's'
            model = models[0]
        else:
            st = 'c'
            model = models[1]


        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=hyperparameter['learning_rate'])

        # 모델 학습
        print(f'start model learning')
        data_loss = 0

        epochs = hyperparameter['epochs']
            
        for epoch in tqdm(range(epochs), desc="epoch", unit='epoch'):
            start_time = time.time()
            for t_data in train_data:
                for input, labels in t_data:
                    outputs = model(input)
                    loss = criterion(outputs, labels)
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                            
            end_time = time.time()
            epoch_time = end_time - start_time

            tqdm.write(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.6f}, Time: {epoch_time:.3f} seconds')
            data_loss = loss.item()


            torch.save(model.state_dict(), f'./model/{st}/model{cnt}_weights_ver1.pth')
            
cnt += 1


start model learning


epoch:   0%|          | 0/40 [00:00<?, ?epoch/s]

Epoch [1/40], Loss: 0.000944, Time: 166.587 seconds
Epoch [2/40], Loss: 0.000776, Time: 177.192 seconds
Epoch [3/40], Loss: 0.000634, Time: 144.031 seconds
Epoch [4/40], Loss: 0.000370, Time: 137.477 seconds
Epoch [5/40], Loss: 0.000243, Time: 134.167 seconds
Epoch [6/40], Loss: 0.000294, Time: 134.104 seconds
Epoch [7/40], Loss: 0.000600, Time: 132.911 seconds
Epoch [8/40], Loss: 0.000173, Time: 145.200 seconds
Epoch [9/40], Loss: 0.000183, Time: 147.860 seconds
Epoch [10/40], Loss: 0.000201, Time: 147.754 seconds
Epoch [11/40], Loss: 0.000118, Time: 147.805 seconds
Epoch [12/40], Loss: 0.000135, Time: 148.062 seconds
Epoch [13/40], Loss: 0.000310, Time: 147.813 seconds
Epoch [14/40], Loss: 0.000163, Time: 152.883 seconds
Epoch [15/40], Loss: 0.000135, Time: 148.080 seconds
Epoch [16/40], Loss: 0.000318, Time: 147.455 seconds
Epoch [17/40], Loss: 0.000224, Time: 148.304 seconds
Epoch [18/40], Loss: 0.000154, Time: 150.620 seconds
Epoch [19/40], Loss: 0.000235, Time: 150.599 seconds
Ep

epoch:   0%|          | 0/40 [00:00<?, ?epoch/s]

Epoch [1/40], Loss: 0.001497, Time: 143.680 seconds
Epoch [2/40], Loss: 0.000923, Time: 149.865 seconds
Epoch [3/40], Loss: 0.000954, Time: 148.341 seconds
Epoch [4/40], Loss: 0.000842, Time: 150.907 seconds
Epoch [5/40], Loss: 0.000818, Time: 150.351 seconds
Epoch [6/40], Loss: 0.000968, Time: 152.689 seconds
Epoch [7/40], Loss: 0.000693, Time: 150.249 seconds
Epoch [8/40], Loss: 0.000587, Time: 150.193 seconds
Epoch [9/40], Loss: 0.000849, Time: 149.251 seconds
Epoch [10/40], Loss: 0.000400, Time: 149.827 seconds
Epoch [11/40], Loss: 0.000812, Time: 147.893 seconds
Epoch [12/40], Loss: 0.000407, Time: 148.889 seconds
Epoch [13/40], Loss: 0.000528, Time: 147.903 seconds
Epoch [14/40], Loss: 0.000320, Time: 148.948 seconds
Epoch [15/40], Loss: 0.000555, Time: 148.277 seconds
Epoch [16/40], Loss: 0.000357, Time: 149.447 seconds
Epoch [17/40], Loss: 0.000827, Time: 148.773 seconds
Epoch [18/40], Loss: 0.000267, Time: 152.636 seconds
Epoch [19/40], Loss: 0.000375, Time: 148.384 seconds
Ep

epoch:   0%|          | 0/40 [00:00<?, ?epoch/s]

RuntimeError: Given groups=1, weight of size [64, 256, 3], expected input[100, 100, 256] to have 256 channels, but got 100 channels instead

In [18]:
#모델 weight loading

def LSTMmodel_loading():
    c_weights_path = './model/c/LSTM_weights_ver1.pth'
    c_model1.load_state_dict(torch.load(c_weights_path))
    c_model1.eval()

    s_weights_path = './model/s/LSTM_weights_ver1.pth'
    s_model1.load_state_dict(torch.load(s_weights_path))
    s_model1.eval()

def LSTMGRUmodel_loading():
    c_weights_path = './model/c/model_weights_4.pth'
    c_model3.load_state_dict(torch.load(c_weights_path))
    c_model3.eval()

    s_weights_path = './model/s/model_weights_2.pth'
    s_model3.load_state_dict(torch.load(s_weights_path))
    s_model3.eval()

def BILSTMmodel_loading():
    c_weights_path = './model/c/BILSTM_weights.pth'
    c_model4.load_state_dict(torch.load(c_weights_path))
    c_model4.eval()

    s_weights_path = './model/s/BILSTM_weights.pth'
    s_model4.load_state_dict(torch.load(s_weights_path))
    s_model4.eval()

def BILSTMGRUmodel_loading():
    c_weights_path = './model/c/model4_weights_ver1.pth'
    c_model5.load_state_dict(torch.load(c_weights_path))
    c_model5.eval()

    s_weights_path = './model/s/model4_weights_ver1.pth'
    s_model5.load_state_dict(torch.load(s_weights_path))
    s_model5.eval()

In [19]:
def data_prediction(s_model, c_model, test_data):
    result_tensor = torch.empty(0)

    for column, tensor in test_data.items():
        if column[0] == 's':    
            with torch.no_grad():
                predictions = s_model(tensor)
                print(predictions.shape)
        else:
            with torch.no_grad():
                predictions = c_model(tensor)
                print(predictions.shape)

        result_tensor = torch.cat((result_tensor, predictions), dim=2)

    return result_tensor

In [20]:
#LSTMmodel_loading()
# LSTMGRUmodel_loading()
# BILSTMmodel_loading()
BILSTMGRUmodel_loading()

result_tensor = data_prediction(s_model1, c_model1, test_data)

torch.Size([1999, 100, 4])
torch.Size([1999, 100, 4])
torch.Size([1999, 100, 4])
torch.Size([1999, 100, 4])
torch.Size([1999, 100, 4])
torch.Size([1999, 100, 4])
torch.Size([1999, 100, 4])
torch.Size([1999, 100, 4])
torch.Size([1999, 100, 4])
torch.Size([1999, 100, 4])


In [21]:
result_tensor.shape

torch.Size([1999, 100, 40])

In [22]:
result_array = result_tensor[:,-1,:].numpy()
print(result_array.shape)
result_df = pd.DataFrame(result_array, columns=[
    'YL_M1_B1_W1_s30','YR_M1_B1_W1_s30','YL_M1_B1_W2_s30','YR_M1_B1_W2_s30',
    'YL_M1_B1_W1_s40','YR_M1_B1_W1_s40','YL_M1_B1_W2_s40','YR_M1_B1_W2_s40',
    'YL_M1_B1_W1_s50','YR_M1_B1_W1_s50','YL_M1_B1_W2_s50','YR_M1_B1_W2_s50',
    'YL_M1_B1_W1_s70','YR_M1_B1_W1_s70','YL_M1_B1_W2_s70','YR_M1_B1_W2_s70',
    'YL_M1_B1_W1_s100','YR_M1_B1_W1_s100','YL_M1_B1_W2_s100','YR_M1_B1_W2_s100',
    'YL_M1_B1_W1_c30','YR_M1_B1_W1_c30','YL_M1_B1_W2_c30','YR_M1_B1_W2_c30',
    'YL_M1_B1_W1_c40','YR_M1_B1_W1_c40','YL_M1_B1_W2_c40','YR_M1_B1_W2_c40',
    'YL_M1_B1_W1_c50','YR_M1_B1_W1_c50','YL_M1_B1_W2_c50','YR_M1_B1_W2_c50',
    'YL_M1_B1_W1_c70','YR_M1_B1_W1_c70','YL_M1_B1_W2_c70','YR_M1_B1_W2_c70',
    'YL_M1_B1_W1_c100','YR_M1_B1_W1_c100','YL_M1_B1_W2_c100','YR_M1_B1_W2_c100'
])

(1999, 40)


In [23]:
result_df.head(1)

Unnamed: 0,YL_M1_B1_W1_s30,YR_M1_B1_W1_s30,YL_M1_B1_W2_s30,YR_M1_B1_W2_s30,YL_M1_B1_W1_s40,YR_M1_B1_W1_s40,YL_M1_B1_W2_s40,YR_M1_B1_W2_s40,YL_M1_B1_W1_s50,YR_M1_B1_W1_s50,...,YL_M1_B1_W2_c50,YR_M1_B1_W2_c50,YL_M1_B1_W1_c70,YR_M1_B1_W1_c70,YL_M1_B1_W2_c70,YR_M1_B1_W2_c70,YL_M1_B1_W1_c100,YR_M1_B1_W1_c100,YL_M1_B1_W2_c100,YR_M1_B1_W2_c100
0,0.150981,-0.098702,0.00529,0.086255,0.15069,-0.101257,0.005077,0.086646,0.150823,-0.100702,...,-0.020597,-0.048949,0.007647,0.008993,-0.020594,-0.048955,0.007646,0.008993,-0.020594,-0.048955


In [24]:
distance = lane_data_c['Distance'][10001:]
distance = distance.reset_index(drop=True)
distance

0       2500.25
1       2500.50
2       2500.75
3       2501.00
4       2501.25
         ...   
1994    2998.75
1995    2999.00
1996    2999.25
1997    2999.50
1998    2999.75
Name: Distance, Length: 1999, dtype: float64

In [25]:
result_df = pd.concat([distance,result_df], axis=1)
result_df.head(1)

Unnamed: 0,Distance,YL_M1_B1_W1_s30,YR_M1_B1_W1_s30,YL_M1_B1_W2_s30,YR_M1_B1_W2_s30,YL_M1_B1_W1_s40,YR_M1_B1_W1_s40,YL_M1_B1_W2_s40,YR_M1_B1_W2_s40,YL_M1_B1_W1_s50,...,YL_M1_B1_W2_c50,YR_M1_B1_W2_c50,YL_M1_B1_W1_c70,YR_M1_B1_W1_c70,YL_M1_B1_W2_c70,YR_M1_B1_W2_c70,YL_M1_B1_W1_c100,YR_M1_B1_W1_c100,YL_M1_B1_W2_c100,YR_M1_B1_W2_c100
0,2500.25,0.150981,-0.098702,0.00529,0.086255,0.15069,-0.101257,0.005077,0.086646,0.150823,...,-0.020597,-0.048949,0.007647,0.008993,-0.020594,-0.048955,0.007646,0.008993,-0.020594,-0.048955


In [26]:
result_df.to_csv('./result/answer_BILSTMGRU_ver1.csv',index=False)