In [1]:
import pandas as pd
import numpy as np

from src.preprocessing import load_data, calculate_rank, filter_valid_stays, discretize_features, train_test_split_by_stay
from src.model import IVVModel
from src.evaluation import calculate_ndcg, calculate_map

## **データの読み込み**

In [2]:
# 1. Load Data
print("Loading data...")
# Assuming the file is at data/data_01.csv
# Note: The user's file might be large, but we'll assume it fits in memory for now.
filepath = "data/data_01.csv"
df = load_data(filepath)

print(f"Data loaded: {len(df)} rows.")

Loading data...
Data loaded: 838 rows.


## **データの前処理**

In [3]:
# 2. Preprocessing
print("Preprocessing...")
if 'rank' not in df.columns:
    print("Calculating rank...")
    df = calculate_rank(df)

print("Filtering valid stays...")
df = filter_valid_stays(df)
print(f"Valid stays: {len(df)} rows.")

print("Discretizing features...")
df = discretize_features(df)

print("Preprocessing completed.")
df.head()

Preprocessing...
Calculating rank...
Filtering valid stays...
Valid stays: 307 rows.
Discretizing features...
Preprocessing completed.


Unnamed: 0,stay_id,day_date,day_of_week_name,holiday_name,started_from_jst,ended_to_jst,stay_log_point,poi_name,category_name_lv3,poi_address,poi_point,distance_from_poi,adid,label,memo,rank,distance_bucket,rank_bucket
19,9,2025/10/7,火,平日,2025/10/7 18:51,2025/10/7 19:07,POINT(139.709364704718 35.6467472578559),アトレ恵比寿西館,その他の大型商業、複合商業施設,東京都渋谷区恵比寿南1-6-1,POINT(139.7096144 35.6465289),33.145138,CE6DB387-CEF3-44AD-A83B-4F0E8A6F50D5,0,,1,9,1
20,9,2025/10/7,火,平日,2025/10/7 18:51,2025/10/7 19:07,POINT(139.709364704718 35.6467472578559),恵比寿駅(JR東日本 埼京線),駅,東京都渋谷区恵比寿南1丁目5-5,POINT(139.7101061 35.64669),67.294348,CE6DB387-CEF3-44AD-A83B-4F0E8A6F50D5,0,降車した（日比谷線利用）,2,18,2
21,9,2025/10/7,火,平日,2025/10/7 18:51,2025/10/7 19:07,POINT(139.709364704718 35.6467472578559),恵比寿駅(JR東日本 山手線(湘南新宿ライン)),駅,東京都渋谷区恵比寿南1丁目5-5,POINT(139.7101061 35.64669),67.294348,CE6DB387-CEF3-44AD-A83B-4F0E8A6F50D5,0,降車した（日比谷線利用）,3,18,3
22,9,2025/10/7,火,平日,2025/10/7 18:51,2025/10/7 19:07,POINT(139.709364704718 35.6467472578559),恵比寿駅(JR東日本 山手線(相鉄・JR直通線)),駅,東京都渋谷区恵比寿南1丁目5-5,POINT(139.7101061 35.64669),67.294348,CE6DB387-CEF3-44AD-A83B-4F0E8A6F50D5,0,降車した（日比谷線利用）,4,18,4
23,9,2025/10/7,火,平日,2025/10/7 18:51,2025/10/7 19:07,POINT(139.709364704718 35.6467472578559),恵比寿駅(東京メトロ 日比谷線),駅,東京都渋谷区恵比寿南1丁目5-5,POINT(139.7101061 35.64669),67.294348,CE6DB387-CEF3-44AD-A83B-4F0E8A6F50D5,1,降車した（日比谷線利用）,5,18,5


In [4]:
# 3. Train/Test Split
print("Splitting data...")
train_df, test_df = train_test_split_by_stay(df, test_ratio=0.2)
print(f"Train set: {len(train_df)} rows, Test set: {len(test_df)} rows.")

Splitting data...
Train set: 262 rows, Test set: 45 rows.


## **モデルの訓練・評価**

In [5]:
# 4. Model Training
print("Training model...")
model = IVVModel()
model.train(train_df, learning_rate=0.001, iterations=100, verbose=True)

# 5. Evaluation
print("\nEvaluating on Test Set...")
# Predict scores for test set
test_df_pred = model.predict_proba(test_df)

# Calculate Metrics
ndcg = calculate_ndcg(test_df_pred, k_list=[1, 5, 10])
map_score = calculate_map(test_df_pred)

print("Evaluation Results:")
print(f"MAP: {map_score:.4f}")
for k, score in ndcg.items():
    print(f"NDCG@{k}: {score:.4f}")
    
# Also evaluate on Train set for sanity check
print("\n(Sanity Check) Train Set Metrics:")
train_df_pred = model.predict_proba(train_df)
train_map = calculate_map(train_df_pred)
print(f"Train MAP: {train_map:.4f}")

Training model...
Iteration 0: LL = -42.3021
Iteration 10: LL = -41.9917
Iteration 20: LL = -41.6868
Iteration 30: LL = -41.3872
Iteration 40: LL = -41.0928
Iteration 50: LL = -40.8036
Iteration 60: LL = -40.5193
Iteration 70: LL = -40.2399
Iteration 80: LL = -39.9654
Iteration 90: LL = -39.6956
Iteration 99: LL = -39.4567

Evaluating on Test Set...
Evaluation Results:
MAP: 0.5655
NDCG@1: 0.3333
NDCG@5: 0.6154
NDCG@10: 0.6710

(Sanity Check) Train Set Metrics:
Train MAP: 0.8210


In [9]:
test_df_pred.sort_values(by="stay_id").head()

Unnamed: 0,stay_id,day_date,day_of_week_name,holiday_name,started_from_jst,ended_to_jst,stay_log_point,poi_name,category_name_lv3,poi_address,poi_point,distance_from_poi,adid,label,memo,rank,distance_bucket,rank_bucket,score,proba
286,46,2025/10/19,日,休日,2025/10/19 12:57,2025/10/19 13:02,POINT(139.794422581569 35.7473130572422),まいばすけっと千住緑町3丁目店,ミニスーパー,東京都足立区千住緑町3丁目1-17,POINT(139.7943742 35.7471511),18.530594,CE6DB387-CEF3-44AD-A83B-4F0E8A6F50D5,1,,0,5,0,0.881901,1.0
287,48,2025/10/20,月,平日,2025/10/20 19:09,2025/10/20 19:17,POINT(139.794880956993 35.7423575968589),*ワークマン女子ポンテポルタ千住店,ファスト、カジュアル,東京都足立区千住橋戸町1-13,POINT(139.7953947 35.7424028),46.637823,CE6DB387-CEF3-44AD-A83B-4F0E8A6F50D5,0,ポンテポルタ千住には行った,0,12,0,1.051396,0.339058
288,48,2025/10/20,月,平日,2025/10/20 19:09,2025/10/20 19:17,POINT(139.794880956993 35.7423575968589),ノジマ 千住大橋店,家電量販店,東京都足立区千住橋戸町1-13,POINT(139.79538919 35.74248885),48.13479,CE6DB387-CEF3-44AD-A83B-4F0E8A6F50D5,0,,1,13,1,0.959885,0.309548
289,48,2025/10/20,月,平日,2025/10/20 19:09,2025/10/20 19:17,POINT(139.794880956993 35.7423575968589),ポンテポルタ千住,その他の大型商業、複合商業施設,東京都足立区千住橋戸町1-13,POINT(139.7954427 35.7423959),50.876846,CE6DB387-CEF3-44AD-A83B-4F0E8A6F50D5,1,,2,14,2,1.089648,0.351394
436,66,2025/10/26,日,休日,2025/10/26 14:45,2025/10/26 15:08,POINT(139.795584149864 35.7421079469808),*ワークマン女子ポンテポルタ千住店,ファスト、カジュアル,東京都足立区千住橋戸町1-13,POINT(139.7953947 35.7424028),36.976804,CE6DB387-CEF3-44AD-A83B-4F0E8A6F50D5,0,,5,10,5,1.045036,0.137401
