In [22]:
import pickle
import numpy as np
import pandas as pd
from lightgbm import LGBMRanker

In [61]:
def handle_time(df):
    end = df.trip_end_date.astype(int) // 10**9
    start = df.trip_start_date.astype(int) // 10**9
    
    df.drop(['trip_end_date', 'trip_start_date'], axis=1, inplace=True)
    df['trip_length_seconds'] = end - start

In [66]:
# Reading in ze data
with open('data/X_test.pickle', 'rb') as cornichon:
    X_test = pickle.load(cornichon)
with open('data/X_train.pickle', 'rb') as cornichon:
    X_train = pickle.load(cornichon)
with open('data/X_val.pickle', 'rb') as cornichon:
    X_val = pickle.load(cornichon)
with open('data/y_test.pickle', 'rb') as cornichon:
    y_test = pickle.load(cornichon)
with open('data/y_train.pickle', 'rb') as cornichon:
    y_train = pickle.load(cornichon)
with open('data/y_val.pickle', 'rb') as cornichon:
    y_val = pickle.load(cornichon)
with open('data/submission_df_preprocessed.pickle', 'rb') as cornichon:
    submission_df = pickle.load(cornichon)

drop_cols = ['date_time', 'date']
X_test.drop(drop_cols, axis=1, inplace=True)
X_train.drop(drop_cols, axis=1, inplace=True)
X_val.drop(drop_cols, axis=1, inplace=True)
submission_df.drop(drop_cols, axis=1, inplace=True)

handle_time(X_test)
handle_time(X_train)
handle_time(X_val)
handle_time(submission_df)

In [73]:
qids_train = X_train.groupby('srch_id')['srch_id'].count().to_numpy()
qids_val = X_val.groupby('srch_id')['srch_id'].count().to_numpy()

In [77]:
clf = LGBMRanker(
        objective="lambdarank",
        metric="ndcg",
        n_estimators=5000,
        learning_rate=0.1,
        max_position=5,
        random_state=42,
        seed=42,
        boosting="dart",
    )

In [78]:
%%time
clf.fit(
    X = X_train,
    y = y_train,
    group = qids_train,
    eval_set=[(X_val, y_val)],
    eval_group=[qids_val],
    verbose=10,
)



[10]	valid_0's ndcg@1: 0.184429	valid_0's ndcg@2: 0.256241	valid_0's ndcg@3: 0.302941	valid_0's ndcg@4: 0.335363	valid_0's ndcg@5: 0.359886
[20]	valid_0's ndcg@1: 0.189909	valid_0's ndcg@2: 0.262859	valid_0's ndcg@3: 0.308927	valid_0's ndcg@4: 0.341746	valid_0's ndcg@5: 0.36672
[30]	valid_0's ndcg@1: 0.191499	valid_0's ndcg@2: 0.264943	valid_0's ndcg@3: 0.309584	valid_0's ndcg@4: 0.34313	valid_0's ndcg@5: 0.368096
[40]	valid_0's ndcg@1: 0.191606	valid_0's ndcg@2: 0.265892	valid_0's ndcg@3: 0.312308	valid_0's ndcg@4: 0.344744	valid_0's ndcg@5: 0.369506
[50]	valid_0's ndcg@1: 0.193046	valid_0's ndcg@2: 0.267601	valid_0's ndcg@3: 0.313634	valid_0's ndcg@4: 0.346174	valid_0's ndcg@5: 0.370646
[60]	valid_0's ndcg@1: 0.192644	valid_0's ndcg@2: 0.268004	valid_0's ndcg@3: 0.313711	valid_0's ndcg@4: 0.346761	valid_0's ndcg@5: 0.371573
[70]	valid_0's ndcg@1: 0.191253	valid_0's ndcg@2: 0.266878	valid_0's ndcg@3: 0.313449	valid_0's ndcg@4: 0.346174	valid_0's ndcg@5: 0.37178
[80]	valid_0's ndcg@1: 

[600]	valid_0's ndcg@1: 0.201403	valid_0's ndcg@2: 0.27673	valid_0's ndcg@3: 0.323224	valid_0's ndcg@4: 0.357295	valid_0's ndcg@5: 0.381292
[610]	valid_0's ndcg@1: 0.201849	valid_0's ndcg@2: 0.277403	valid_0's ndcg@3: 0.32459	valid_0's ndcg@4: 0.357325	valid_0's ndcg@5: 0.381952
[620]	valid_0's ndcg@1: 0.201799	valid_0's ndcg@2: 0.277403	valid_0's ndcg@3: 0.324146	valid_0's ndcg@4: 0.357662	valid_0's ndcg@5: 0.381979
[630]	valid_0's ndcg@1: 0.201751	valid_0's ndcg@2: 0.278084	valid_0's ndcg@3: 0.324326	valid_0's ndcg@4: 0.357721	valid_0's ndcg@5: 0.382285
[640]	valid_0's ndcg@1: 0.202097	valid_0's ndcg@2: 0.278175	valid_0's ndcg@3: 0.324907	valid_0's ndcg@4: 0.357826	valid_0's ndcg@5: 0.382759
[650]	valid_0's ndcg@1: 0.202	valid_0's ndcg@2: 0.277946	valid_0's ndcg@3: 0.324874	valid_0's ndcg@4: 0.358222	valid_0's ndcg@5: 0.383003
[660]	valid_0's ndcg@1: 0.201849	valid_0's ndcg@2: 0.277565	valid_0's ndcg@3: 0.325076	valid_0's ndcg@4: 0.358058	valid_0's ndcg@5: 0.382685
[670]	valid_0's nd

[1190]	valid_0's ndcg@1: 0.205749	valid_0's ndcg@2: 0.280506	valid_0's ndcg@3: 0.327267	valid_0's ndcg@4: 0.36096	valid_0's ndcg@5: 0.384685
[1200]	valid_0's ndcg@1: 0.206098	valid_0's ndcg@2: 0.280527	valid_0's ndcg@3: 0.327614	valid_0's ndcg@4: 0.361162	valid_0's ndcg@5: 0.385004
[1210]	valid_0's ndcg@1: 0.205704	valid_0's ndcg@2: 0.280284	valid_0's ndcg@3: 0.327706	valid_0's ndcg@4: 0.361098	valid_0's ndcg@5: 0.384793
[1220]	valid_0's ndcg@1: 0.205454	valid_0's ndcg@2: 0.28012	valid_0's ndcg@3: 0.327058	valid_0's ndcg@4: 0.360717	valid_0's ndcg@5: 0.384688
[1230]	valid_0's ndcg@1: 0.205803	valid_0's ndcg@2: 0.280232	valid_0's ndcg@3: 0.32743	valid_0's ndcg@4: 0.360996	valid_0's ndcg@5: 0.38517
[1240]	valid_0's ndcg@1: 0.205797	valid_0's ndcg@2: 0.280477	valid_0's ndcg@3: 0.328006	valid_0's ndcg@4: 0.361006	valid_0's ndcg@5: 0.385312
[1250]	valid_0's ndcg@1: 0.205546	valid_0's ndcg@2: 0.280299	valid_0's ndcg@3: 0.32744	valid_0's ndcg@4: 0.360509	valid_0's ndcg@5: 0.385237
[1260]	vali

[1770]	valid_0's ndcg@1: 0.20421	valid_0's ndcg@2: 0.279617	valid_0's ndcg@3: 0.326962	valid_0's ndcg@4: 0.360328	valid_0's ndcg@5: 0.384868
[1780]	valid_0's ndcg@1: 0.204012	valid_0's ndcg@2: 0.279526	valid_0's ndcg@3: 0.327027	valid_0's ndcg@4: 0.360341	valid_0's ndcg@5: 0.38469
[1790]	valid_0's ndcg@1: 0.20391	valid_0's ndcg@2: 0.279381	valid_0's ndcg@3: 0.326802	valid_0's ndcg@4: 0.36029	valid_0's ndcg@5: 0.384892
[1800]	valid_0's ndcg@1: 0.204062	valid_0's ndcg@2: 0.279546	valid_0's ndcg@3: 0.326704	valid_0's ndcg@4: 0.360534	valid_0's ndcg@5: 0.384895
[1810]	valid_0's ndcg@1: 0.203714	valid_0's ndcg@2: 0.279134	valid_0's ndcg@3: 0.326562	valid_0's ndcg@4: 0.360315	valid_0's ndcg@5: 0.384802
[1820]	valid_0's ndcg@1: 0.202616	valid_0's ndcg@2: 0.278912	valid_0's ndcg@3: 0.326552	valid_0's ndcg@4: 0.360187	valid_0's ndcg@5: 0.384414
[1830]	valid_0's ndcg@1: 0.203212	valid_0's ndcg@2: 0.279079	valid_0's ndcg@3: 0.326763	valid_0's ndcg@4: 0.36033	valid_0's ndcg@5: 0.384735
[1840]	vali

[2350]	valid_0's ndcg@1: 0.202222	valid_0's ndcg@2: 0.279624	valid_0's ndcg@3: 0.325898	valid_0's ndcg@4: 0.360052	valid_0's ndcg@5: 0.384075
[2360]	valid_0's ndcg@1: 0.202077	valid_0's ndcg@2: 0.279317	valid_0's ndcg@3: 0.32561	valid_0's ndcg@4: 0.360013	valid_0's ndcg@5: 0.384138
[2370]	valid_0's ndcg@1: 0.201974	valid_0's ndcg@2: 0.279028	valid_0's ndcg@3: 0.325558	valid_0's ndcg@4: 0.359969	valid_0's ndcg@5: 0.383977
[2380]	valid_0's ndcg@1: 0.202274	valid_0's ndcg@2: 0.279249	valid_0's ndcg@3: 0.325996	valid_0's ndcg@4: 0.360236	valid_0's ndcg@5: 0.384131
[2390]	valid_0's ndcg@1: 0.201921	valid_0's ndcg@2: 0.279175	valid_0's ndcg@3: 0.325722	valid_0's ndcg@4: 0.360271	valid_0's ndcg@5: 0.384075
[2400]	valid_0's ndcg@1: 0.20177	valid_0's ndcg@2: 0.279115	valid_0's ndcg@3: 0.325606	valid_0's ndcg@4: 0.36018	valid_0's ndcg@5: 0.384197
[2410]	valid_0's ndcg@1: 0.202071	valid_0's ndcg@2: 0.2792	valid_0's ndcg@3: 0.325649	valid_0's ndcg@4: 0.360294	valid_0's ndcg@5: 0.384083
[2420]	vali

[2930]	valid_0's ndcg@1: 0.201762	valid_0's ndcg@2: 0.278466	valid_0's ndcg@3: 0.326732	valid_0's ndcg@4: 0.360665	valid_0's ndcg@5: 0.384215
[2940]	valid_0's ndcg@1: 0.20186	valid_0's ndcg@2: 0.27871	valid_0's ndcg@3: 0.326635	valid_0's ndcg@4: 0.360808	valid_0's ndcg@5: 0.384234
[2950]	valid_0's ndcg@1: 0.20176	valid_0's ndcg@2: 0.27864	valid_0's ndcg@3: 0.326722	valid_0's ndcg@4: 0.360848	valid_0's ndcg@5: 0.384214
[2960]	valid_0's ndcg@1: 0.20196	valid_0's ndcg@2: 0.278626	valid_0's ndcg@3: 0.326525	valid_0's ndcg@4: 0.36087	valid_0's ndcg@5: 0.384143
[2970]	valid_0's ndcg@1: 0.201413	valid_0's ndcg@2: 0.278396	valid_0's ndcg@3: 0.326207	valid_0's ndcg@4: 0.360428	valid_0's ndcg@5: 0.383777
[2980]	valid_0's ndcg@1: 0.201413	valid_0's ndcg@2: 0.278559	valid_0's ndcg@3: 0.326112	valid_0's ndcg@4: 0.360655	valid_0's ndcg@5: 0.383821
[2990]	valid_0's ndcg@1: 0.201562	valid_0's ndcg@2: 0.278649	valid_0's ndcg@3: 0.326129	valid_0's ndcg@4: 0.360654	valid_0's ndcg@5: 0.383935
[3000]	valid

[3510]	valid_0's ndcg@1: 0.20101	valid_0's ndcg@2: 0.278793	valid_0's ndcg@3: 0.32631	valid_0's ndcg@4: 0.360011	valid_0's ndcg@5: 0.384001
[3520]	valid_0's ndcg@1: 0.20131	valid_0's ndcg@2: 0.278708	valid_0's ndcg@3: 0.326393	valid_0's ndcg@4: 0.360131	valid_0's ndcg@5: 0.384093
[3530]	valid_0's ndcg@1: 0.201112	valid_0's ndcg@2: 0.27871	valid_0's ndcg@3: 0.326176	valid_0's ndcg@4: 0.359823	valid_0's ndcg@5: 0.383953
[3540]	valid_0's ndcg@1: 0.201165	valid_0's ndcg@2: 0.278955	valid_0's ndcg@3: 0.325987	valid_0's ndcg@4: 0.360022	valid_0's ndcg@5: 0.38424
[3550]	valid_0's ndcg@1: 0.201963	valid_0's ndcg@2: 0.279137	valid_0's ndcg@3: 0.32637	valid_0's ndcg@4: 0.359647	valid_0's ndcg@5: 0.384521
[3560]	valid_0's ndcg@1: 0.201712	valid_0's ndcg@2: 0.279294	valid_0's ndcg@3: 0.326277	valid_0's ndcg@4: 0.359693	valid_0's ndcg@5: 0.384164
[3570]	valid_0's ndcg@1: 0.201462	valid_0's ndcg@2: 0.279471	valid_0's ndcg@3: 0.326062	valid_0's ndcg@4: 0.359469	valid_0's ndcg@5: 0.384163
[3580]	valid

[4090]	valid_0's ndcg@1: 0.201561	valid_0's ndcg@2: 0.278553	valid_0's ndcg@3: 0.326108	valid_0's ndcg@4: 0.359315	valid_0's ndcg@5: 0.3837
[4100]	valid_0's ndcg@1: 0.201211	valid_0's ndcg@2: 0.278415	valid_0's ndcg@3: 0.326129	valid_0's ndcg@4: 0.358858	valid_0's ndcg@5: 0.383621
[4110]	valid_0's ndcg@1: 0.20171	valid_0's ndcg@2: 0.278655	valid_0's ndcg@3: 0.326244	valid_0's ndcg@4: 0.359172	valid_0's ndcg@5: 0.383874
[4120]	valid_0's ndcg@1: 0.201411	valid_0's ndcg@2: 0.278365	valid_0's ndcg@3: 0.326049	valid_0's ndcg@4: 0.359114	valid_0's ndcg@5: 0.383515
[4130]	valid_0's ndcg@1: 0.201213	valid_0's ndcg@2: 0.278556	valid_0's ndcg@3: 0.325915	valid_0's ndcg@4: 0.359187	valid_0's ndcg@5: 0.383595
[4140]	valid_0's ndcg@1: 0.201313	valid_0's ndcg@2: 0.278868	valid_0's ndcg@3: 0.325935	valid_0's ndcg@4: 0.359494	valid_0's ndcg@5: 0.383674
[4150]	valid_0's ndcg@1: 0.201461	valid_0's ndcg@2: 0.278643	valid_0's ndcg@3: 0.325816	valid_0's ndcg@4: 0.359224	valid_0's ndcg@5: 0.383685
[4160]	va

[4670]	valid_0's ndcg@1: 0.200875	valid_0's ndcg@2: 0.277183	valid_0's ndcg@3: 0.325124	valid_0's ndcg@4: 0.358092	valid_0's ndcg@5: 0.382818
[4680]	valid_0's ndcg@1: 0.200976	valid_0's ndcg@2: 0.277331	valid_0's ndcg@3: 0.32554	valid_0's ndcg@4: 0.358236	valid_0's ndcg@5: 0.383157
[4690]	valid_0's ndcg@1: 0.200627	valid_0's ndcg@2: 0.277115	valid_0's ndcg@3: 0.325448	valid_0's ndcg@4: 0.357999	valid_0's ndcg@5: 0.382722
[4700]	valid_0's ndcg@1: 0.201078	valid_0's ndcg@2: 0.277628	valid_0's ndcg@3: 0.325563	valid_0's ndcg@4: 0.358168	valid_0's ndcg@5: 0.382921
[4710]	valid_0's ndcg@1: 0.201425	valid_0's ndcg@2: 0.27761	valid_0's ndcg@3: 0.325556	valid_0's ndcg@4: 0.358318	valid_0's ndcg@5: 0.383105
[4720]	valid_0's ndcg@1: 0.201427	valid_0's ndcg@2: 0.277635	valid_0's ndcg@3: 0.32565	valid_0's ndcg@4: 0.358315	valid_0's ndcg@5: 0.382978
[4730]	valid_0's ndcg@1: 0.201525	valid_0's ndcg@2: 0.27762	valid_0's ndcg@3: 0.325518	valid_0's ndcg@4: 0.358402	valid_0's ndcg@5: 0.383
[4740]	valid_

LGBMRanker(boosting='dart', max_position=5, metric='ndcg', n_estimators=5000,
           objective='lambdarank', random_state=42, seed=42)

In [84]:
predictions = clf.predict(submission_df)

In [83]:
predictions

array([-0.59366342, -1.09234092,  1.60536836, ..., -0.60151484,
       -0.16473693, -1.19221794])

In [87]:
submission_df = submission_df[['srch_id', 'prop_id']]

In [89]:
submission_df['ord'] = predictions

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  submission_df['ord'] = predictions


In [92]:
submission_df.sort_values(by=['srch_id', 'ord'], ascending=[True, False], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  submission_df.sort_values(by=['srch_id', 'ord'], ascending=[True, False], inplace=True)


In [95]:
submission_df.drop(['ord'], axis=1).to_csv('submissions/submission_14.csv', index=False)

In [98]:
with open('pickled_models/LGBM-1.pickle', 'wb') as out_file:
    pickle.dump(clf, out_file)

In [102]:
clf.best_score_

defaultdict(collections.OrderedDict,
            {'valid_0': OrderedDict([('ndcg@1', 0.20107451770636509),
                          ('ndcg@2', 0.27791228048419137),
                          ('ndcg@3', 0.3252187803549771),
                          ('ndcg@4', 0.35792926940784126),
                          ('ndcg@5', 0.38246878023539904)])})

In [99]:
submission_df

Unnamed: 0,srch_id,prop_id,ord
23,1,99484,0.971761
9,1,54937,0.889943
12,1,61934,0.802006
5,1,28181,0.613808
4,1,24194,0.475922
...,...,...,...
4959177,332787,29018,0.763745
4959182,332787,99509,0.704661
4959181,332787,94437,0.446099
4959178,332787,32019,-0.466457
