In [None]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path
sys.path.append("..")

from fastai.tabular import FillMissing, Categorify, Normalize, TabularList, tabular_learner
from fastai.callbacks.tracker import EarlyStoppingCallback, SaveModelCallback
from fastai.basic_data import DatasetType
from torch.nn import CrossEntropyLoss as CEloss
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from tqdm.auto import tqdm

import src.train_utils as u

pd.set_option('display.max_columns', 999)
pd.set_option('display.max_rows', 100)

u.random_seed(42)

# Load data

In [None]:
train_path = '../data/train_1002.pkl'
test_path = '../data/test_1002.pkl'

train_full, test, all_cols, cont_cols, cat_cols = u.read_data(
    train_path=train_path, test_path=test_path)

In [None]:
data_bunch = u.create_fai_databunch(train=train_full, test=test,
                                    cat_cols=cat_cols, cont_cols=cont_cols)

## Model training

In [None]:
n_attempts = 4

best_learn, best_score = None, 0

for i in tqdm(range(n_attempts)):
    learn = u.train_fai_model(data=data_bunch)
    _, score = u.estimate(learn)
    
    if score > best_score:
        best_learn = learn
        best_score = score
        del learn


best_learn.save(f'best_model_')

# Submit

In [None]:
probas_test, *_ = best_learn.get_preds(DatasetType.Test)
probas_test = probas_test[:, 1]

n_pred = 8118
pred_test = np.zeros(len(test), bool)
pred_test[np.argsort(-1 * probas_test)[:8118]] = True


print(f'Predicted events: {pred_test}')

In [None]:
submit = pd.DataFrame(
    data={'datetime x segment_id': test['datetime x segment_id'].values,
          'prediction': pred_test.astype(int)}
)
submit.to_csv(f'../results/submit_fai_{n_pred}.csv', index=False)