In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
# !pip install tensorflow_ranking
# import tensorflow_ranking as tfr

from sklearn.metrics import ndcg_score, classification_report, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from imblearn.over_sampling import SMOTE

In [3]:
df_train = pd.read_csv("/kaggle/input/vk-contest-mle/train_df.csv")
df_test = pd.read_csv("/kaggle/input/vk-contest-mle/test_df.csv")

X_train = df_train[df_train.columns[1:-1]].to_numpy()
X_test = df_test[df_test.columns[1:-1]].to_numpy()

y_train = df_train["target"].to_numpy()
y_test = df_test["target"].to_numpy()

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(15081, 79) (15081,) (1529, 79) (1529,)


In [4]:
X_scaler = StandardScaler().fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [28]:
model = Sequential([
    Dense(1024, activation="relu"),
    Dropout(0.2),
    Dense(1024, activation="relu"),
    Dropout(0.2),
    Dense(1, activation="sigmoid")
])

model.compile(optimizer=Adam(learning_rate=1e-3), loss='binary_crossentropy')

In [29]:
history = model.fit(x=X_train, y=y_train, 
                    batch_size=128, epochs=10,
                    validation_data=[X_test, y_test])

Epoch 1/10
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 25ms/step - loss: 0.3479 - precision: 0.0000e+00 - val_loss: 0.1053 - val_precision: 0.0000e+00
Epoch 2/10
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 24ms/step - loss: 0.1053 - precision: 0.0000e+00 - val_loss: 0.1068 - val_precision: 0.0000e+00
Epoch 3/10
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 26ms/step - loss: 0.1005 - precision: 0.0000e+00 - val_loss: 0.1236 - val_precision: 0.0000e+00
Epoch 4/10
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - loss: 0.1029 - precision: 0.0000e+00 - val_loss: 0.1059 - val_precision: 0.0000e+00
Epoch 5/10
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - loss: 0.1013 - precision: 0.0000e+00 - val_loss: 0.1064 - val_precision: 0.0000e+00
Epoch 6/10
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 24ms/step - loss: 0.1030 - precision: 0.0000e+00 - val_los

In [32]:
y_pred = model.predict(X_test)
print(ndcg_score([y_test],[y_pred[:, 0]]))

[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
0.44471734223611226


**best score:** 0.44471734223611226
# Scaled

In [33]:
model = Sequential([
    Dense(1024, activation="relu"),
    Dropout(0.2),
    Dense(1024, activation="relu"),
    Dropout(0.2),
    Dense(1, activation="sigmoid")
])

model.compile(optimizer=Adam(learning_rate=1e-3), loss='binary_crossentropy')

history = model.fit(x=X_train_scaled, y=y_train, 
                    batch_size=128, epochs=10,
                    validation_data=[X_test_scaled, y_test])

y_pred = model.predict(X_test_scaled)
print(ndcg_score([y_test],[y_pred[:, 0]]))

Epoch 1/10
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - loss: 0.1545 - val_loss: 0.1074
Epoch 2/10
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 26ms/step - loss: 0.0948 - val_loss: 0.1039
Epoch 3/10
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - loss: 0.0897 - val_loss: 0.1062
Epoch 4/10
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - loss: 0.0865 - val_loss: 0.1098
Epoch 5/10
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 24ms/step - loss: 0.0745 - val_loss: 0.1347
Epoch 6/10
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - loss: 0.0706 - val_loss: 0.1146
Epoch 7/10
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - loss: 0.0633 - val_loss: 0.1371
Epoch 8/10
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 23ms/step - loss: 0.0594 - val_loss: 0.1372
Epoch 9/10
[1m118/118[0m [32m

# SMOTE

In [38]:
df_train = pd.read_csv("/kaggle/input/vk-contest-mle/train_df.csv")
df_test = pd.read_csv("/kaggle/input/vk-contest-mle/test_df.csv")

X_train = df_train[df_train.columns[1:-1]].to_numpy()
X_test = df_test[df_test.columns[1:-1]].to_numpy()

y_train = df_train["target"].to_numpy()
y_test = df_test["target"].to_numpy()

X_train, y_train = SMOTE().fit_resample(X_train, y_train)

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

model = Sequential([
    Dense(1024, activation="relu"),
    Dropout(0.2),
    Dense(1024, activation="relu"),
    Dropout(0.2),
    Dense(1, activation="sigmoid")
])

model.compile(optimizer=Adam(learning_rate=1e-3), loss='binary_crossentropy')

history = model.fit(x=X_train, y=y_train, 
                    batch_size=128, epochs=100,
                    validation_data=[X_test, y_test])

y_pred = model.predict(X_test)
print(ndcg_score([y_test],[y_pred[:, 0]]))

(29518, 79) (29518,) (1529, 79) (1529,)
Epoch 1/100
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 23ms/step - loss: 1.1096 - val_loss: 0.7286
Epoch 2/100
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 22ms/step - loss: 0.5593 - val_loss: 0.6779
Epoch 3/100
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 24ms/step - loss: 0.4603 - val_loss: 0.3606
Epoch 4/100
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 23ms/step - loss: 0.3748 - val_loss: 0.3964
Epoch 5/100
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 22ms/step - loss: 0.3058 - val_loss: 0.2224
Epoch 6/100
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 22ms/step - loss: 0.2484 - val_loss: 0.2440
Epoch 7/100
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 23ms/step - loss: 0.2165 - val_loss: 0.3403
Epoch 8/100
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 22ms/step - loss: 0.1930 -

In [37]:
df_train = pd.read_csv("/kaggle/input/vk-contest-mle/train_df.csv")
df_test = pd.read_csv("/kaggle/input/vk-contest-mle/test_df.csv")

X_train = df_train[df_train.columns[1:-1]].to_numpy()
X_test = df_test[df_test.columns[1:-1]].to_numpy()

y_train = df_train["target"].to_numpy()
y_test = df_test["target"].to_numpy()

X_train, y_train = SMOTE().fit_resample(X_train, y_train)

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

X_scaler = StandardScaler().fit(np.concatenate((X_train, X_test)))

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

model = Sequential([
    Dense(1024, activation="relu"),
    Dropout(0.2),
    Dense(1024, activation="relu"),
    Dropout(0.2),
    Dense(1, activation="sigmoid")
])

model.compile(optimizer=Adam(learning_rate=1e-3), loss='binary_crossentropy')

history = model.fit(x=X_train_scaled, y=y_train, 
                    batch_size=128, epochs=10,
                    validation_data=[X_test_scaled, y_test])

y_pred = model.predict(X_test_scaled)
print(ndcg_score([y_test],[y_pred[:, 0]]))

(29518, 79) (29518,) (1529, 79) (1529,)
Epoch 1/10
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 22ms/step - loss: 0.4189 - val_loss: 0.2214
Epoch 2/10
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 23ms/step - loss: 0.1165 - val_loss: 0.2334
Epoch 3/10
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 23ms/step - loss: 0.0725 - val_loss: 0.2829
Epoch 4/10
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 22ms/step - loss: 0.0434 - val_loss: 0.2954
Epoch 5/10
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 21ms/step - loss: 0.0432 - val_loss: 0.2775
Epoch 6/10
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 22ms/step - loss: 0.0300 - val_loss: 0.3328
Epoch 7/10
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 22ms/step - loss: 0.0257 - val_loss: 0.3142
Epoch 8/10
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 22ms/step - loss: 0.0204 - val_loss