<a href="https://colab.research.google.com/github/ByeonJaeseong/DeepLearningProject/blob/main/Train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install -U keras-tuner

import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, concatenate
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from kerastuner.tuners import RandomSearch
from google.colab import files

uploaded = files.upload()



Collecting keras-tuner
  Downloading keras_tuner-1.3.5-py3-none-any.whl (176 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.1/176.1 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.3.5 kt-legacy-1.0.5


  from kerastuner.tuners import RandomSearch


Saving answer_sample.csv to answer_sample.csv
Saving data_c30.csv to data_c30.csv
Saving data_c40.csv to data_c40.csv
Saving data_c50.csv to data_c50.csv
Saving data_c70.csv to data_c70.csv
Saving data_c100.csv to data_c100.csv
Saving data_columns.csv to data_columns.csv
Saving data_s30.csv to data_s30.csv
Saving data_s40.csv to data_s40.csv
Saving data_s50.csv to data_s50.csv
Saving data_s70.csv to data_s70.csv
Saving data_s100.csv to data_s100.csv
Saving lane_data_c.csv to lane_data_c.csv
Saving lane_data_columns.csv to lane_data_columns.csv
Saving lane_data_s.csv to lane_data_s.csv
Saving 차량_및_요댐퍼.xlsx to 차량_및_요댐퍼.xlsx


In [6]:
list = ['s30', 's40', 's50', 's70', 's100', 'c30', 'c40', 'c50', 'c70', 'c100']
count = 1
for i in list :
    # 데이터 불러오기
    lane_data_c = pd.read_csv('lane_data_c.csv', encoding='utf-8')
    lane_data_s = pd.read_csv('lane_data_s.csv', encoding='utf-8')
    data = pd.read_csv('data_'+i+'.csv', encoding='utf-8')
    # 데이터 결합
    data_combined = pd.concat([lane_data_c, lane_data_s, data], axis=1)
    data_combined = data_combined.loc[:, ~data_combined.columns.duplicated()]
    data_combined = data_combined.drop_duplicates(subset='Distance', keep='first')  # 첫 번째 중복 행만 남기기
    # 가중치 계산 함수 정의
    def weighted_mape(y_true, y_pred, weights):
        return np.sum(weights * np.abs((y_true - y_pred) / y_true)) / np.sum(weights) * 100

    # 사용자 정의 Weighted MAPE 손실 함수
    def weighted_mape_loss(weights):
        def loss(y_true, y_pred):
            return tf.reduce_sum(weights * tf.abs((y_true - y_pred) / y_true)) / tf.reduce_sum(weights) * 100
        return loss

    # 입력 변수와 탈선계수 분리
    X_time_series = data_combined[['Distance']]
    X_features = data_combined.drop(['YL_M1_B1_W1', 'YR_M1_B1_W1', 'YL_M1_B1_W2', 'YR_M1_B1_W2', 'Distance'], axis=1)
    y = data_combined[['YL_M1_B1_W1', 'YR_M1_B1_W1', 'YL_M1_B1_W2', 'YR_M1_B1_W2']]


    # 데이터 정규화
    scaler = MinMaxScaler()
    X_features_scaled = scaler.fit_transform(X_features)
    X_time_series_scaled = scaler.fit_transform(X_time_series)

    # 학습 데이터와 테스트 데이터 분할
    X_features_train, X_features_test, X_time_series_train, X_time_series_test, y_train, y_test = train_test_split(X_features_scaled, X_time_series_scaled, y, test_size=0.2, random_state=42)

    # 사용자 정의 Weighted MAPE 손실 함수
    def weighted_mape_loss(weights):
        def loss(y_true, y_pred):
            return tf.reduce_sum(weights * tf.abs((y_true - y_pred) / y_true)) / tf.reduce_sum(weights) * 100
        return loss

    # 가중치 계산
    weights_train = np.abs(y_train)  # 훈련 데이터를 기반으로 가중치 계산

    # 입력 정의
    input_time_series = Input(shape=(X_time_series_train.shape[1], 1), name='input_time_series')
    input_features = Input(shape=(X_features_train.shape[1],), name='input_features')

    # 시계열 데이터 처리를 위한 LSTM 층
    lstm_units = 64
    lstm_output = LSTM(units=lstm_units, activation='relu', return_sequences=True)(input_time_series)
    lstm_output = LSTM(units=lstm_units, activation='relu')(lstm_output)

    # 특성 데이터 처리를 위한 밀집층
    features_output = Dense(units=32, activation='relu')(input_features)

    # LSTM 층과 밀집층을 합치기
    concatenated = concatenate([lstm_output, features_output])

    # 예측을 위한 밀집층 추가
    output_layer = Dense(4)(concatenated)  # 4개의 탈선계수를 예측하므로 출력 뉴런 수는 4

    # 모델 구성
    model = Model(inputs=[input_time_series, input_features], outputs=output_layer)

    # 모델 컴파일
    model.compile(optimizer='adam', loss=weighted_mape_loss(weights_train))

    # 하이퍼파라미터 튜닝을 위한 함수 정의
    def build_model(hp):
        lstm_units = hp.Int('lstm_units', min_value=32, max_value=128, step=32)
        dense_units = hp.Int('dense_units', min_value=32, max_value=128, step=32)

        input_time_series = Input(shape=(X_time_series_train.shape[1], 1), name='input_time_series')
        input_features = Input(shape=(X_features_train.shape[1],), name='input_features')

        lstm_output = LSTM(units=lstm_units, activation='relu', return_sequences=True)(input_time_series)
        lstm_output = LSTM(units=lstm_units, activation='relu')(lstm_output)

        features_output = Dense(units=dense_units, activation='relu')(input_features)

        concatenated = concatenate([lstm_output, features_output])

        output_layer = Dense(4)(concatenated)

        tuned_model = Model(inputs=[input_time_series, input_features], outputs=output_layer)
        tuned_model.compile(optimizer='adam', loss='mean_squared_error')

        return tuned_model

    # 하이퍼파라미터 튜닝
    tuner = RandomSearch(build_model, objective='val_loss', max_trials=5, executions_per_trial=1, directory='tuner_results', project_name='model_tuning')
    tuner.search([X_time_series_train, X_features_train], y_train, epochs=50, batch_size=32, validation_data=([X_time_series_test, X_features_test], y_test))

    # 최적의 모델 선택
    best_model = tuner.get_best_models(num_models=1)[0]

    # 모델 훈련
    best_model.fit([X_time_series_train, X_features_train], y_train, epochs=50, batch_size=32, validation_data=([X_time_series_test, X_features_test], y_test))

    # 다음 1999개의 샘플 예측
    next_samples = 1999
    X_time_series_predict = X_time_series[-next_samples:]
    X_features_predict = X_features[-next_samples:]

    # 모델 예측
    predictions = best_model.predict([X_time_series_predict, X_features_predict])

    answer_sample = pd.read_csv('answer_sample.csv', header=None)
    answer_sample.iloc[1:, count:count+4] = predictions  # 예측 결과 저장
    answer_sample.to_csv('answer_sample.csv', index=False, header=False)  # 결과를 파일에 저장
    count = count+4




Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/5

In [7]:

answer_sample = pd.read_csv('answer_sample.csv', header=None)
answer_sample.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,31,32,33,34,35,36,37,38,39,40
0,Distance,YL_M1_B1_W1_s30,YR_M1_B1_W1_s30,YL_M1_B1_W2_s30,YR_M1_B1_W2_s30,YL_M1_B1_W1_s40,YR_M1_B1_W1_s40,YL_M1_B1_W2_s40,YR_M1_B1_W2_s40,YL_M1_B1_W1_s50,...,YL_M1_B1_W2_c50,YR_M1_B1_W2_c50,YL_M1_B1_W1_c70,YR_M1_B1_W1_c70,YL_M1_B1_W2_c70,YR_M1_B1_W2_c70,YL_M1_B1_W1_c100,YR_M1_B1_W1_c100,YL_M1_B1_W2_c100,YR_M1_B1_W2_c100
1,2500.25,-3.0934884548187256,-1.4886139631271362,20.01789093017578,-34.83118438720703,-0.3079476058483124,-5.391927242279053,3.5623786449432373,-7.828848361968994,5.366372585296631,...,-3.894071340560913,-36.648765563964844,-58.144840240478516,-52.452613830566406,-25.34734344482422,-45.123416900634766,10.21291732788086,-20.53331184387207,12.387517929077148,-29.0419979095459
2,2500.5,-2.2509000301361084,-1.34923255443573,19.740528106689453,-34.8693962097168,0.4328317940235138,-4.951985836029053,4.133959770202637,-8.332554817199707,6.211326599121094,...,-3.6030831336975098,-36.858070373535156,-57.30297088623047,-50.87465286254883,-24.833837509155273,-45.28736114501953,11.046222686767578,-19.129724502563477,12.802045822143555,-29.59121322631836
3,2500.75,1.4095127582550049,1.2128592729568481,19.660186767578125,-34.815792083740234,3.013756275177002,-3.26092267036438,4.375100135803223,-9.513163566589355,9.22378158569336,...,-2.607984781265259,-37.675514221191406,-54.00553894042969,-47.01079559326172,-23.564468383789062,-46.1656608581543,14.39602279663086,-15.092873573303223,14.033159255981445,-30.982730865478516
4,2501.0,2.6432268619537354,2.2171010971069336,19.893169403076172,-35.54393005371094,4.662353515625,-2.4610416889190674,4.614716529846191,-10.21658992767334,11.517169952392578,...,-1.684582233428955,-38.659664154052734,-52.000579833984375,-45.38255310058594,-22.53877830505371,-47.32369613647461,16.39366340637207,-13.429675102233887,14.918346405029297,-32.055450439453125
