# Learn to rank model for public leader board

In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb

df = pd.read_csv('./dataset/train_clean_v1_noIDs.csv')
df_test = pd.read_csv('./dataset/test_clean_v1_noIDs.csv')
df_test

Unnamed: 0,srch_id,visitor_hist_starrating,visitor_hist_adr_usd,prop_country_id,prop_id,prop_starrating,prop_review_score,prop_brand_bool,prop_location_score1,prop_log_historical_price,price_usd,promotion_flag,srch_length_of_stay,srch_adults_count,srch_children_count,srch_room_count,srch_saturday_night_bool
0,1,3.374933,177.15073,219,3180,3,4.5,1,2.94,5.03,119.00,0,1,2,0,1,0
1,1,3.374933,177.15073,219,5543,3,4.5,1,2.64,4.93,118.00,0,1,2,0,1,0
2,1,3.374933,177.15073,219,14142,2,3.5,1,2.71,4.16,49.00,0,1,2,0,1,0
3,1,3.374933,177.15073,219,22393,3,4.5,1,2.40,5.03,143.00,0,1,2,0,1,0
4,1,3.374933,177.15073,219,24194,3,4.5,1,2.94,4.72,79.00,0,1,2,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4959178,332787,3.374933,177.15073,117,32019,4,3.5,0,2.48,4.53,66.07,0,2,1,0,1,0
4959179,332787,3.374933,177.15073,117,33959,4,3.0,1,2.20,4.39,67.10,0,2,1,0,1,0
4959180,332787,3.374933,177.15073,117,35240,4,0.0,0,1.79,4.64,73.91,0,2,1,0,1,0
4959181,332787,3.374933,177.15073,117,94437,4,0.0,0,2.94,4.64,66.07,0,2,1,0,1,0


In [2]:
X_train = df.loc[:, ~df.columns.isin(['srch_id','target_label', 'position', 'prop_id'])]
y_train = df.loc[:, df.columns.isin(['target_label'])]

# groups = train_data.groupby('srch_id').size().to_frame('size')['size'].to_numpy()
groups = df['srch_id'].to_numpy()

#We need to keep the id for later predictions
X_test = df_test.loc[:, ~df_test.columns.isin(['target_label', 'prop_id'])]
y_test = df_test.loc[:, df_test.columns.isin(['target_label'])]


In [3]:
params = {
    'tree_method': 'hist',
    'objective': 'rank:ndcg',  # Use rank:ndcg for optimizing NDCG
    'eval_metric': 'ndcg@5',    # Evaluation metric NDCG@k
    'eta': 0.1,                  # Learning rate
    'max_depth': 6               # Maximum depth of a tree
}

# Initialize the XGBRanker with modified parameters
model = xgb.XGBRanker(**params)

model.fit(X_train.to_numpy(), y_train.to_numpy(), qid = groups)

In [4]:
def predict(model, df):
    return model.predict(df.loc[:, ~df.columns.isin(['srch_id', 'target_label'])])

predictions = (X_test.groupby('srch_id')
               .apply(lambda x: predict(model, x)))


modified_df = X_test.groupby('srch_id').apply(lambda group: group.assign(predicted_reg=predictions[group.name])).reset_index(drop=True)

predictions


  predictions = (X_test.groupby('srch_id')
  modified_df = X_test.groupby('srch_id').apply(lambda group: group.assign(predicted_reg=predictions[group.name])).reset_index(drop=True)


srch_id
1         [-0.036684673, -0.086562656, -0.07009127, -0.2...
3         [0.021246597, 0.15950952, -0.52200633, -0.0506...
6         [-0.18253319, -0.25279087, -0.19910643, -0.256...
7         [-0.26661065, -0.05058336, 0.23019369, 0.08215...
10        [-0.008903268, -0.17327252, -0.4689219, -0.004...
                                ...                        
332781    [-0.3638572, -0.2685232, 0.07931345, -0.190998...
332783    [-0.21547702, 0.6905095, 1.0217726, 0.08779925...
332785    [0.14167956, -0.33033434, 0.18976472, 0.046980...
332786    [-0.28516978, -0.18233618, -0.18081653, 0.1134...
332787    [0.12015524, 0.07274464, 0.033584308, -0.00897...
Length: 199549, dtype: object

In [8]:
result = modified_df.sort_values(by=['srch_id', 'predicted_reg'], ascending=[True, False])
result['prop_id'] = df_test['prop_id']
result[['srch_id', 'prop_id']].reset_index(drop=True).to_csv("dataset/submission6_basic_L2R.csv", index=False)

In [6]:
modified_df

Unnamed: 0,srch_id,visitor_hist_starrating,visitor_hist_adr_usd,prop_country_id,prop_starrating,prop_review_score,prop_brand_bool,prop_location_score1,prop_log_historical_price,price_usd,promotion_flag,srch_length_of_stay,srch_adults_count,srch_children_count,srch_room_count,srch_saturday_night_bool,predicted_reg
0,1,3.374933,177.15073,219,3,4.5,1,2.94,5.03,119.00,0,1,2,0,1,0,-0.036685
1,1,3.374933,177.15073,219,3,4.5,1,2.64,4.93,118.00,0,1,2,0,1,0,-0.086563
2,1,3.374933,177.15073,219,2,3.5,1,2.71,4.16,49.00,0,1,2,0,1,0,-0.070091
3,1,3.374933,177.15073,219,3,4.5,1,2.40,5.03,143.00,0,1,2,0,1,0,-0.214417
4,1,3.374933,177.15073,219,3,4.5,1,2.94,4.72,79.00,0,1,2,0,1,0,0.224278
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4959178,332787,3.374933,177.15073,117,4,3.5,0,2.48,4.53,66.07,0,2,1,0,1,0,0.033584
4959179,332787,3.374933,177.15073,117,4,3.0,1,2.20,4.39,67.10,0,2,1,0,1,0,-0.008975
4959180,332787,3.374933,177.15073,117,4,0.0,0,1.79,4.64,73.91,0,2,1,0,1,0,-0.136365
4959181,332787,3.374933,177.15073,117,4,0.0,0,2.94,4.64,66.07,0,2,1,0,1,0,-0.023196
