In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from keras_tuner.tuners import RandomSearch
from tensorflow.keras.layers import Dense, Input, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

2023-08-16 20:37:05.672512: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# データの読み込み

In [2]:
train_data = pd.read_csv('../data/preprocessed_data/train_clean.csv')
test_data = pd.read_csv('../data/preprocessed_data/test_clean.csv')

# 追加前処理

In [3]:
# 'id'と'price'を分離
train_ids = train_data['id']
train_y = train_data['price']
test_ids = test_data['id']

train_data.drop(['id', 'price'], axis=1, inplace=True)
test_data.drop(['id'], axis=1, inplace=True)

In [4]:
# 数値でないカテゴリ変数のみをOne-Hotエンコード
encoder = OneHotEncoder(drop='first', sparse=False, handle_unknown='error')
X_train_encoded = encoder.fit_transform(train_data.select_dtypes(exclude=['int64', 'float64']))
X_test_encoded = encoder.transform(test_data.select_dtypes(exclude=['int64', 'float64']))



In [5]:
# 数値変数を結合
X_train = pd.concat([pd.DataFrame(X_train_encoded), train_data.select_dtypes(include=['int64', 'float64'])], axis=1)
X_test = pd.concat([pd.DataFrame(X_test_encoded), test_data.select_dtypes(include=['int64', 'float64'])], axis=1)

# モデル定義

In [6]:
# モデルの定義
def build_model(hp):
    inputs = Input(shape=(X_train.shape[1]))
    x = inputs
    
    for i in range(hp.Int('num_layers', 1, 5)):
        units = hp.Int('units_' + str(i), min_value=32, max_value=512, step=32)
        x = Dense(units, activation='relu')(x)
        x = Dropout(hp.Float('dropout_' + str(i), min_value=0, max_value=0.5, step=0.1))(x)
        outputs = Dense(1)(x)
        model = Model(inputs, outputs)
        model.compile(optimizer=Adam(hp.Choice('learning_rate', [0.01, 0.005, 0.001])), loss=tf.keras.losses.MeanAbsolutePercentageError())
        return model

In [7]:
# ランダムサーチの実施
tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=50,
    executions_per_trial=3,
    directory='random_search03'
)

# データの分割

In [8]:
# データを訓練用と検証用に分割
X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(X_train, train_y, test_size=0.2, random_state=42)

# チューニングの実行（バッチサイズの調整）
tuner.search(X_train_split, y_train_split, epochs=30, validation_data=(X_val_split, y_val_split), batch_size=512)

Trial 50 Complete [00h 00m 37s]
val_loss: 47.404501597086586

Best val_loss So Far: 46.05416615804037
Total elapsed time: 00h 38m 02s
INFO:tensorflow:Oracle triggered exit


# 学習

In [9]:
# 最適なモデルの取得
best_model = tuner.get_best_models(num_models=1)[0]

In [10]:
# 最適なモデルで予測
predictions = best_model.predict(X_test).flatten()



# 提出ファイルの出力

In [11]:
# 結果をCSVファイルとして出力
result = pd.DataFrame({'id': test_ids, 'price': predictions})
result.to_csv('../submit-file/20230816_02_submit.csv', index=False, header=False)