In [None]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path
sys.path.append("..")

from fastai.tabular import FillMissing, Categorify, Normalize, TabularList, tabular_learner
from fastai.callbacks.tracker import EarlyStoppingCallback, SaveModelCallback
from torch.nn import CrossEntropyLoss as CEloss
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

import src.train_utils as u

pd.set_option('display.max_columns', 999)
pd.set_option('display.max_rows', 100)


# Load data

In [None]:
train_path = '../data/train_1002.pkl'
test_path = '../data/test_1002.pkl'

train_full, test, all_cols, cont_cols, cat_cols = u.read_data(
    train_path=train_path, test_path=test_path)

train_full = train_full[:100_000]
train_full.reset_index(drop=True, inplace=True)

test = test[:100_000]
test.reset_index(drop=True, inplace=True)

## Model training

In [None]:
learn = u.train_fai_model(train=train_full, test=test,
                          cat_cols=cat_cols, cont_cols=cont_cols,
                          seed=42
                         )

# Submit

In [None]:
probas_val, *_ = learn.get_preds(DatasetType.Valid)
probas_test, *_ = learn.get_preds(DatasetType.Test)

probas_val = probas_val[:, 1]
probas_test = probas_test[:, 1]

th = u.estimate(learn, val, y_true=val.y)  # th: 0.3838, f1 score: 0.1213

In [None]:
pred_test = np.array(probas_test) > th * .95  # we choose th to predict 8118 events

n_pred = sum(pred_test)
print(f'Predicted events: {n_pred}')

In [None]:
submit = pd.DataFrame(
    data={'datetime x segment_id': test['datetime x segment_id'].values,
          'prediction': pred_test.astype(int)}
)
submit.to_csv(f'../results/submit_fai_{n_pred}.csv', index=False)