In [1]:
# 必要なライブラリのインポート
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score
from sklearn.preprocessing import StandardScaler

In [47]:
# データの読み込み
data = pd.read_csv('opt_base_dispatched_data.csv')
data

Unnamed: 0,request_id,user_current_lon,user_current_lat,user_dest_lon,user_dest_lat,bike_id,bike_current_lon,bike_current_lat,owner_lon,owner_lat,distance_user_bike,timestamp,assigned
0,0,40.798561,-73.953014,40.798275,-73.952530,0,40.824145,-73.950062,40.824145,-73.950062,855.755363,2023-01-01 00:00:00,0
1,0,40.798561,-73.953014,40.798275,-73.952530,1,40.723723,-73.808596,40.723723,-73.808596,16284.255259,2023-01-01 00:00:00,0
2,0,40.798561,-73.953014,40.798275,-73.952530,2,40.745380,-73.905415,40.745380,-73.905415,5561.022549,2023-01-01 00:00:00,0
3,0,40.798561,-73.953014,40.798275,-73.952530,3,40.647957,-74.003680,40.647957,-74.003680,7315.766716,2023-01-01 00:00:00,0
4,0,40.798561,-73.953014,40.798275,-73.952530,4,40.620472,-74.011667,40.620472,-74.011667,8541.791325,2023-01-01 00:00:00,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
747715,76751,40.775714,-73.873364,40.580510,-73.836153,5,40.792480,-73.776282,40.839846,-73.846470,10847.534760,2023-01-01 23:59:57,0
747716,76751,40.775714,-73.873364,40.580510,-73.836153,6,40.665306,-73.912475,40.675732,-73.912434,5545.583935,2023-01-01 23:59:57,0
747717,76751,40.775714,-73.873364,40.580510,-73.836153,7,40.718739,-73.956902,40.708709,-73.970222,9488.592552,2023-01-01 23:59:57,0
747718,76751,40.775714,-73.873364,40.580510,-73.836153,8,40.738719,-73.879460,40.675921,-73.784965,1333.816431,2023-01-01 23:59:57,0


## 学習用にデータを整形

In [48]:
# timestampカラムをdatetime型に変換
data['timestamp'] = pd.to_datetime(data['timestamp'])

# 一日の始まりからの経過分を計算
data['elapsed_time'] = (data['timestamp'] - data['timestamp'].dt.normalize()).dt.total_seconds() / 60

# 元のtimestampカラムを削除（必要に応じて）
data = data.drop('timestamp', axis=1)
data

Unnamed: 0,request_id,user_current_lon,user_current_lat,user_dest_lon,user_dest_lat,bike_id,bike_current_lon,bike_current_lat,owner_lon,owner_lat,distance_user_bike,assigned,elapsed_time
0,0,40.798561,-73.953014,40.798275,-73.952530,0,40.824145,-73.950062,40.824145,-73.950062,855.755363,0,0.00
1,0,40.798561,-73.953014,40.798275,-73.952530,1,40.723723,-73.808596,40.723723,-73.808596,16284.255259,0,0.00
2,0,40.798561,-73.953014,40.798275,-73.952530,2,40.745380,-73.905415,40.745380,-73.905415,5561.022549,0,0.00
3,0,40.798561,-73.953014,40.798275,-73.952530,3,40.647957,-74.003680,40.647957,-74.003680,7315.766716,0,0.00
4,0,40.798561,-73.953014,40.798275,-73.952530,4,40.620472,-74.011667,40.620472,-74.011667,8541.791325,0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
747715,76751,40.775714,-73.873364,40.580510,-73.836153,5,40.792480,-73.776282,40.839846,-73.846470,10847.534760,0,1439.95
747716,76751,40.775714,-73.873364,40.580510,-73.836153,6,40.665306,-73.912475,40.675732,-73.912434,5545.583935,0,1439.95
747717,76751,40.775714,-73.873364,40.580510,-73.836153,7,40.718739,-73.956902,40.708709,-73.970222,9488.592552,0,1439.95
747718,76751,40.775714,-73.873364,40.580510,-73.836153,8,40.738719,-73.879460,40.675921,-73.784965,1333.816431,0,1439.95


In [49]:
# ==== パラメータ設定 ==== (2 mins)
M = 3  # 1リクエストあたりの最大候補自転車数
include_no_assignment = True  # 割り当てないクラスを追加
# include_no_assignmentをTrueにした場合、クラス数はM+1となる。

# # 各request_idごとに距離が最小のbikeをassigned=1にする
# def assign_min_distance(df):
#     idx = df['distance_user_bike'].idxmin()
#     df.loc[:, 'assigned'] = 0
#     df.loc[idx, 'assigned'] = 1
#     return df

# data = data.groupby('request_id').apply(assign_min_distance)
# data = data.reset_index(drop=True)

# ==== データのマルチクラス化 ====
# request_idごとにグループ化し、M台分のbike候補を抽出（距離が近い順などのルールでソート）
# 今回は単純にdistance_user_bikeでソートして上位M台を使用。
# M台未満ならダミー行を追加してパディング。
def pad_bikes(group):
    # 距離順にソート
    group = group.sort_values(by='distance_user_bike').reset_index(drop=True)

    # クラスラベルは、assigned=1のbikeが何番目か（0-based）
    assigned_idx = group.index[group['assigned']==1]
    if len(assigned_idx) == 0:
        chosen_class = -1  # 割り当てなし
    else:
        chosen_class = assigned_idx[0]

    # M台に満たない場合はダミー自転車を追加
    while len(group) < M:
        dummy = group.iloc[0:1].copy()
        dummy[['bike_id','bike_current_lon','bike_current_lat','owner_lon','owner_lat','distance_user_bike','assigned']] = 0
        group = pd.concat([group, dummy], ignore_index=True)

    # M台より多ければ上位M台のみ残す
    group = group.iloc[:M]

    # chosen_classがMより大きい場合は上位Mに入っていないので割り当てなし
    if chosen_class >= M or chosen_class == -1:
        if include_no_assignment:
            # no-assignmentクラスをM番目のクラスとする
            final_class = M
        else:
            # no assignmentクラスを設けない場合は、該当なし時はM台の中に1がいないので困るが
            # この場合は単純に一番目のbikeをassigned=1にするなどルールが必要
            # ここでは簡略化のため0番目を強制的に1にする
            final_class = 0
    else:
        final_class = chosen_class

    # 特徴量を整形
    # ユーザ側特徴量は共通
    user_feats = group.loc[0, ['user_current_lon','user_current_lat','user_dest_lon','user_dest_lat','elapsed_time']].values
    # 各bikeの特徴量を縦に並べる
    bike_feats = []
    for i in range(M):
        bike_i = group.iloc[i]
        bike_feats.extend([
            bike_i['bike_current_lon'],
            bike_i['bike_current_lat'],
            bike_i['owner_lon'],
            bike_i['owner_lat'],
            bike_i['distance_user_bike']
        ])
    # user_feats + bike_featsが最終特徴量ベクトル
    # ラベルはfinal_class
    return pd.Series(np.concatenate([user_feats, bike_feats, [final_class]]))

agg_data_backup = data.groupby('request_id').apply(pad_bikes)

  agg_data_backup = data.groupby('request_id').apply(pad_bikes)


In [92]:
# aggデータを1からやり直したい場合
agg_data = agg_data_backup.copy()

In [93]:
agg_data

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,20
request_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,40.798561,-73.953014,40.798275,-73.952530,0.000000,40.824145,-73.950062,40.824145,-73.950062,855.755363,...,-73.970222,40.708709,-73.970222,3372.330828,40.745380,-73.905415,40.745380,-73.905415,5561.022549,3.0
1,40.734186,-74.005580,40.750201,-73.993104,0.083333,40.647957,-74.003680,40.647957,-74.003680,2661.750174,...,-74.011667,40.620472,-74.011667,3563.539264,40.708709,-73.970222,40.708709,-73.970222,4023.518748,3.0
2,40.726804,-74.007983,40.746448,-74.001412,0.100000,40.647957,-74.003680,40.647957,-74.003680,2473.038144,...,-74.011667,40.620472,-74.011667,3296.583929,40.708709,-73.970222,40.708709,-73.970222,4251.167163,3.0
3,40.798561,-73.953014,40.841371,-73.939586,0.133333,40.824145,-73.950062,40.824145,-73.950062,855.755363,...,-73.970222,40.708709,-73.970222,3372.330828,40.745380,-73.905415,40.745380,-73.905415,5561.022549,3.0
4,40.729269,-73.987361,40.717224,-74.011923,0.150000,40.708709,-73.970222,40.708709,-73.970222,2015.108643,...,-74.003680,40.647957,-74.003680,3095.845899,40.620472,-74.011667,40.620472,-74.011667,4309.647259,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76747,40.775714,-73.873364,40.677871,-73.968473,1439.783333,40.738719,-73.879460,40.675921,-73.784965,1333.816431,...,-73.905353,40.824145,-73.950062,5440.003524,40.665306,-73.912475,40.675732,-73.912434,5545.583935,3.0
76748,40.642948,-73.779373,40.681287,-73.943775,1439.833333,40.605382,-73.755133,40.723723,-73.808596,2948.596773,...,-73.776282,40.839846,-73.846470,4677.394315,40.738719,-73.879460,40.675921,-73.784965,11560.548398,3.0
76749,40.752700,-73.994682,40.752700,-73.994682,1439.866667,40.718739,-73.956902,40.708709,-73.970222,4344.589807,...,-73.954680,40.701492,-73.886803,5315.828433,40.853520,-73.937556,40.745380,-73.905415,7093.589023,3.0
76750,40.734186,-74.005580,40.678420,-73.994802,1439.866667,40.718739,-73.956902,40.708709,-73.970222,5453.701497,...,-73.954680,40.701492,-73.886803,6134.039940,40.605146,-74.056430,40.620472,-74.011667,6922.726447,3.0


In [58]:
# カラム名を設定
user_cols = ['user_current_lon','user_current_lat','user_dest_lon','user_dest_lat','elapsed_time']
bike_cols = []
for i in range(M):
    bike_cols += [f'bike{i+1}_current_lon', f'bike{i+1}_current_lat', f'bike{i+1}_owner_lon', f'bike{i+1}_owner_lat', f'bike{i+1}_distance_user_bike']
label_col = ['label']
agg_data.columns = user_cols + bike_cols + label_col

agg_data = agg_data.reset_index(drop=True)

X = agg_data[user_cols + bike_cols].values
y = agg_data['label'].values.astype(int)

In [59]:
# クラス数
num_classes = M+1 if include_no_assignment else M
y_onehot = tf.keras.utils.to_categorical(y, num_classes=num_classes)

In [60]:
# スケーリング
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_onehot, test_size=0.2, random_state=42, stratify=y_onehot)

# ==== モデル構築 (マルチクラス) ====
model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [61]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1)

y_pred_prob = model.predict(X_test)
y_pred = np.argmax(y_pred_prob, axis=1)
y_true = np.argmax(y_test, axis=1)

acc = accuracy_score(y_true, y_pred)
print("Test Accuracy:", acc)

Epoch 1/10
[1m1683/1683[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.9787 - loss: 0.1182 - val_accuracy: 0.9948 - val_loss: 0.0364
Epoch 2/10
[1m1683/1683[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9954 - loss: 0.0361 - val_accuracy: 0.9948 - val_loss: 0.0355
Epoch 3/10
[1m1683/1683[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.9959 - loss: 0.0283 - val_accuracy: 0.9948 - val_loss: 0.0345
Epoch 4/10
[1m1683/1683[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9954 - loss: 0.0317 - val_accuracy: 0.9948 - val_loss: 0.0329
Epoch 5/10
[1m1683/1683[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9956 - loss: 0.0283 - val_accuracy: 0.9948 - val_loss: 0.0334
Epoch 6/10
[1m1683/1683[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - accuracy: 0.9954 - loss: 0.0293 - val_accuracy: 0.9948 - val_loss: 0.0313
Epoch 7/10
[1m1

In [63]:
# 新規リクエストに対する予測例
# 同様にユーザ特徴＋M台分のバイク特徴を用意し、モデルに入力する
# ここでは適当にダミーで
new_request_user = np.array([-73.95, 40.75, -73.96, 40.76, 600]) # user側5特徴
new_request_bikes = np.random.uniform(-74.0, -73.9, size=M*5) # M台分×5特徴（bike_current_lon, bike_current_lat, owner_lon, owner_lat, distance_user_bike）


In [64]:
new_request_user

array([-73.95,  40.75, -73.96,  40.76, 600.  ])

In [97]:
new_request_features = np.concatenate([new_request_user, new_request_bikes])
new_request_scaled = scaler.transform([new_request_features])

In [98]:
new_request_scaled

array([[-3.08281118e+03,  1.80793633e+03, -3.07733573e+03,
         2.20807356e+03, -2.27196677e-01, -2.21841323e+03,
         2.57425142e-01, -1.99029482e+03,  1.99055318e-01,
        -1.38887261e+00, -2.13731411e+03,  8.58451779e-01,
        -1.96977164e+03, -7.74433978e-01, -1.82271249e+00,
        -1.91850675e+03,  7.97533636e-01, -1.88509153e+03,
         1.85434240e-01, -1.69258086e+00]])

In [109]:
new_prob = model.predict(new_request_scaled)[0]
pred_class = np.argmax(new_prob)

if pred_class < M:
    print(f"このリクエストには bike{pred_class+1} を割り当てるクラスが選ばれました")
else:
    print("このリクエストには割り当てないことが選ばれました (no assignment)")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
このリクエストには割り当てないことが選ばれました (no assignment)


In [111]:
# モデルを保存する（HDF5形式）
model.save('trained_multiclass_model.h5')

# 保存したモデルは次のようにして使う
"""
from tensorflow.keras.models import load_model

# 保存したモデルの読み込み
loaded_model = load_model('trained_multiclass_model.h5')

# 読み込んだモデルで予測を実行可能
new_prob = loaded_model.predict(new_request_scaled)[0]
pred_class = np.argmax(new_prob)
"""



"\nfrom tensorflow.keras.models import load_model\n\n# 保存したモデルの読み込み\nloaded_model = load_model('trained_multiclass_model.h5')\n\n# 読み込んだモデルで予測を実行可能\nnew_prob = loaded_model.predict(new_request_scaled)[0]\npred_class = np.argmax(new_prob)\n"