<a href="https://colab.research.google.com/github/ByeonJaeseong/DeepLearningProject/blob/main/Train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#!pip install -U keras-tuner

import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, concatenate
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from kerastuner.tuners import RandomSearch
from google.colab import files
import shutil
from tensorflow.keras.layers import Dropout

#uploaded = files.upload()



In [None]:
list = ['s30', 's40', 's50', 's70', 's100', 'c30', 'c40', 'c50', 'c70', 'c100'] #모든 파일에 대해서 for 문 돌려서 실행
count = 1
for i in list :
    #튜너 삭제
    shutil.rmtree('tuner_results')
    # 데이터 불러오기
    lane_data_c = pd.read_csv('lane_data_c.csv', encoding='utf-8')
    lane_data_s = pd.read_csv('lane_data_s.csv', encoding='utf-8')
    data = pd.read_csv('data_'+i+'.csv', encoding='utf-8') #for문을 이용하여 데이터 바꾸기
    # 데이터 결합
    data_combined = pd.concat([lane_data_c, lane_data_s, data], axis=1) #데이터합치기
    data_combined = data_combined.loc[:, ~data_combined.columns.duplicated()]#거리데이터 중복되어있으니까 빼기
    data_combined = data_combined.drop_duplicates(subset='Distance', keep='first')  # 첫 번째 중복 행만 남기기
    # 가중치 계산 함수 정의
    def weighted_mape(y_true, y_pred, weights):
        return np.sum(weights * np.abs((y_true - y_pred) / y_true)) / np.sum(weights) * 100

    # 사용자 정의 Weighted MAPE 손실 함수
    def weighted_mape_loss(weights):
        def loss(y_true, y_pred):
            return tf.reduce_sum(weights * tf.abs((y_true - y_pred) / y_true)) / tf.reduce_sum(weights) * 100
        return loss

    # 입력 변수와 탈선계수 분리
    X_time_series = data_combined[['Distance']]  #Distance를 시계열 데이터로 쓰기 위해서 떼어내기
    X_features = data_combined.drop(['YL_M1_B1_W1', 'YR_M1_B1_W1', 'YL_M1_B1_W2', 'YR_M1_B1_W2', 'Distance'], axis=1) #라벨링데이터 떼어내기
    y = data_combined[['YL_M1_B1_W1', 'YR_M1_B1_W1', 'YL_M1_B1_W2', 'YR_M1_B1_W2']] #라벨링하기


    # 데이터 정규화
    scaler = MinMaxScaler() # 스케일링하기
    X_features_scaled = scaler.fit_transform(X_features) #스케일링


    # 학습 데이터와 테스트 데이터 분할
    X_features_train, X_features_test, X_time_series_train, X_time_series_test, y_train, y_test = train_test_split(X_features_scaled, X_time_series, y, test_size=0.2, random_state=42, shuffle=False)



    # 가중치 계산
    weights_train = np.abs(y_train)  # 훈련 데이터를 기반으로 가중치 계산

    # 입력 정의
    input_time_series = Input(shape=(X_time_series.shape[1], 1), name='input_time_series')
    input_features = Input(shape=(X_features_train.shape[1],), name='input_features')

    # 시계열 데이터 처리를 위한 LSTM 층
    lstm_units = 64
    lstm_output = LSTM(units=lstm_units, activation='tanh', return_sequences=True)(input_time_series)
    lstm_output = LSTM(units=lstm_units, activation='tanh')(lstm_output)

    # 특성 데이터 처리를 위한 밀집층
    features_output = Dense(units=32, activation='relu')(input_features)

    # LSTM 층과 밀집층을 합치기
    concatenated = concatenate([lstm_output, features_output])

    # 예측을 위한 밀집층 추가
    output_layer = Dense(4)(concatenated)  # 4개의 탈선계수를 예측하므로 출력 뉴런 수는 4

    # 모델 구성
    model = Model(inputs=[input_time_series, input_features], outputs=output_layer)

    # 모델 컴파일
    model.compile(optimizer='adam', loss=weighted_mape_loss(weights_train))

    # 사용자 정의 Weighted MAPE 손실 함수
    def weighted_mape_loss(y_true, y_pred):
        weights = tf.abs(y_true)
        return tf.reduce_sum(weights * tf.abs((y_true - y_pred) / y_true)) / tf.reduce_sum(weights) * 100

    # 하이퍼파라미터 튜닝을 위한 함수 정의
    def build_model(hp):
        lstm_units = hp.Int('lstm_units', min_value=32, max_value=128, step=32)
        lstm_activation = hp.Choice('lstm_activation', values=['relu', 'tanh'])
        dropout_rate = hp.Float('dropout_rate', min_value=0.0, max_value=0.5, step=0.005)
        epochs = hp.Int('epochs', min_value=10, max_value=200, step=5)
        num_layers = hp.Int('num_layers', min_value=1, max_value=5, step=1)
        optimizer = hp.Choice('optimizer', values=['adam', 'rmsprop', 'sgd'])
        batch_size = hp.Int('batch_size', min_value=8, max_value=128, step=8)  # Add this line

        input_time_series = Input(shape=(X_time_series_train.shape[1], 1), name='input_time_series')
        input_features = Input(shape=(X_features_train.shape[1],), name='input_features')  # 추가된 입력

        lstm_output = LSTM(units=lstm_units, activation=lstm_activation, return_sequences=True)(input_time_series)
        for _ in range(num_layers):
            lstm_output = LSTM(units=lstm_units, activation=lstm_activation, return_sequences=True)(lstm_output)
            lstm_output = Dropout(dropout_rate)(lstm_output)
        lstm_output = LSTM(units=lstm_units, activation=lstm_activation)(lstm_output)

        features_output = Dense(units=32, activation='relu')(input_features)

        concatenated = concatenate([lstm_output, features_output])

        output_layer = Dense(4)(concatenated)

        tuned_model = Model(inputs=[input_time_series, input_features], outputs=output_layer)
        tuned_model.compile(optimizer=optimizer, loss='mean_squared_error')

        return tuned_model

    epochs = 50  # Set the initial value for epochs
    batch_size = 32  # Set the initial value for batch_size

    tuner = RandomSearch(build_model, objective='val_loss', max_trials=50, executions_per_trial=1, directory='tuner_results', project_name='model_tuning')

    # 튜닝할 파라미터 정의
    tuner.search_space_summary()

    # 튜닝 실행
    tuner.search([X_time_series_train, X_features_train], y_train, epochs=epochs, batch_size=batch_size, validation_data=([X_time_series_test, X_features_test], y_test))


    # 최적의 모델 선택
    best_model = tuner.get_best_models(num_models=1)[0]

    # 모델 훈련
    best_epochs = best_model.tuner.get_best_trials()[0].hyperparameters.values['epochs']
    best_batch_size = best_model.tuner.get_best_trials()[0].hyperparameters.values['batch_size']

    best_model.fit(
        [X_time_series_train, X_features_train], y_train,
        epochs=best_epochs,
        batch_size=best_batch_size,  # Use the best batch size
        validation_data=([X_time_series_test, X_features_test], y_test),
        callbacks=[EarlyStopping(patience=10, restore_best_weights=True)]
    )

    # 다음 1999개의 샘플 예측
    next_samples = 1999
    X_time_series_predict = X_time_series[-next_samples:]
    X_features_predict = X_features[-next_samples:]

    # 모델 예측
    predictions = best_model.predict([X_time_series_predict, X_features_predict])

    answer_sample = pd.read_csv('answer_sample.csv', header=None)
    answer_sample.iloc[1:, count:count+4] = predictions  # 예측 결과 저장
    answer_sample.to_csv('answer_sample.csv', index=False, header=False)  # 결과를 파일에 저장
    count = count + 4



Trial 1 Complete [00h 03m 10s]
val_loss: 0.0007463643560186028

Best val_loss So Far: 0.0007463643560186028
Total elapsed time: 00h 03m 10s

Search: Running Trial #2

Value             |Best Value So Far |Hyperparameter
128               |64                |lstm_units
relu              |tanh              |lstm_activation
0.04              |0.205             |dropout_rate
85                |90                |epochs
5                 |4                 |num_layers
rmsprop           |sgd               |optimizer
80                |8                 |batch_size

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50

In [13]:

answer_sample = pd.read_csv('answer_sample.csv', header=None)
answer_sample.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,31,32,33,34,35,36,37,38,39,40
0,Distance,YL_M1_B1_W1_s30,YR_M1_B1_W1_s30,YL_M1_B1_W2_s30,YR_M1_B1_W2_s30,YL_M1_B1_W1_s40,YR_M1_B1_W1_s40,YL_M1_B1_W2_s40,YR_M1_B1_W2_s40,YL_M1_B1_W1_s50,...,YL_M1_B1_W2_c50,YR_M1_B1_W2_c50,YL_M1_B1_W1_c70,YR_M1_B1_W1_c70,YL_M1_B1_W2_c70,YR_M1_B1_W2_c70,YL_M1_B1_W1_c100,YR_M1_B1_W1_c100,YL_M1_B1_W2_c100,YR_M1_B1_W2_c100
1,2500.25,3.0383856296539307,3.3745944499969482,3.150264024734497,0.5677508115768433,1.590240240097046,-0.8460798859596252,0.35806435346603394,-0.8905683755874634,1.219680666923523,...,0.06297896802425385,-0.2666114270687103,-0.3706197142601013,2.079420566558838,-0.5998522639274597,-0.005573870614171028,-0.2598481774330139,-0.10763148963451385,0.46673956513404846,0.13719095289707184
2,2500.5,4.388960361480713,4.988764762878418,2.9292871952056885,1.6415818929672241,1.1254503726959229,-0.5474135279655457,-0.14807218313217163,0.043817199766635895,1.1105281114578247,...,-0.8448710441589355,0.9835374355316162,0.04862434044480324,3.5793468952178955,-1.1364562511444092,1.1456530094146729,-0.9671570658683777,0.6867867112159729,-0.9352637529373169,0.5585556030273438
3,2500.75,4.857058048248291,7.091855049133301,1.1406546831130981,1.1984490156173706,1.1719924211502075,-0.15573377907276154,-0.24037331342697144,-0.008829018101096153,-0.20981845259666443,...,-0.7839117646217346,1.1493388414382935,-1.4068650007247925,4.369986057281494,-1.971381425857544,1.2050524950027466,-1.8323396444320679,2.6019110679626465,-1.3256511688232422,0.9894505739212036
4,2501.0,4.885616779327393,7.02304220199585,0.2526165544986725,-0.08797340095043182,0.9910439848899841,0.16068248450756073,0.23632007837295532,-0.9893527030944824,-0.24068251252174377,...,-0.11427594721317291,0.34817755222320557,-1.26677668094635,3.9846315383911133,-0.33597585558891296,-0.21141111850738525,-2.2528817653656006,3.0522849559783936,-0.5561127662658691,0.32541918754577637
