In [1]:
!pip install tabpfn --no-index --find-links=file:///kaggle/input/pip-packages-icr/pip-packages
!mkdir -p /opt/conda/lib/python3.10/site-packages/tabpfn/models_diff
!cp /kaggle/input/pip-packages-icr/pip-packages/prior_diff_real_checkpoint_n_0_epoch_100.cpkt /opt/conda/lib/python3.10/site-packages/tabpfn/models_diff/

Looking in links: file:///kaggle/input/pip-packages-icr/pip-packages
Processing /kaggle/input/pip-packages-icr/pip-packages/tabpfn-0.1.9-py3-none-any.whl
Installing collected packages: tabpfn
Successfully installed tabpfn-0.1.9
[0m

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import normalize
from sklearn.impute import SimpleImputer
import imblearn
from sklearn.decomposition import PCA
from imblearn.over_sampling import RandomOverSampler,ADASYN
from imblearn.under_sampling import RandomUnderSampler,EditedNearestNeighbours
import xgboost
import inspect
from collections import defaultdict
from tabpfn import TabPFNClassifier
from tqdm.notebook import tqdm
import sys
sys.path.append('/kaggle/input/iterativestratification')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
train = pd.read_csv('/kaggle/input/icr-identify-age-related-conditions/train.csv')
test = pd.read_csv('/kaggle/input/icr-identify-age-related-conditions/test.csv')
sample = pd.read_csv('/kaggle/input/icr-identify-age-related-conditions/sample_submission.csv')
greeks = pd.read_csv('/kaggle/input/icr-identify-age-related-conditions/greeks.csv')

In [5]:
first_category = train.EJ.unique()[0]
train.EJ = train.EJ.eq(first_category).astype('int')
test.EJ = test.EJ.eq(first_category).astype('int')

In [6]:
predictor_columns = [n for n in train.columns if n != 'Class' and n != 'Id']
x= train[predictor_columns]
y = train['Class']

In [7]:
from sklearn.model_selection import StratifiedKFold as sKF, KFold as KF, GridSearchCV
cv_outer = sKF(n_splits = 10, shuffle=True, random_state=42)
cv_med = sKF(n_splits = 7, shuffle=True, random_state=42)
cv_inner = KF(n_splits = 5, shuffle=True, random_state=42)
cv_msk = MultilabelStratifiedKFold(n_splits = 7, shuffle=True, random_state=42)

In [8]:
def balanced_log_loss(y_true, y_pred):
    # y_true: correct labels 0, 1
    # y_pred: predicted probabilities of class=1
    # calculate the number of observations for each class
    N_0 = np.sum(1 - y_true)
    N_1 = np.sum(y_true)
    # calculate the weights for each class to balance classes
    w_0 = 1 / N_0
    w_1 = 1 / N_1
    # calculate the predicted probabilities for each class
    p_1 = np.clip(y_pred, 1e-15, 1 - 1e-15)
    p_0 = 1 - p_1
    # calculate the summed log loss for each class
    log_loss_0 = -np.sum((1 - y_true) * np.log(p_0))
    log_loss_1 = -np.sum(y_true * np.log(p_1))
    # calculate the weighted summed logarithmic loss
    # (factgor of 2 included to give same result as LL with balanced input)
    balanced_log_loss = 2*(w_0 * log_loss_0 + w_1 * log_loss_1) / (w_0 + w_1)
    # return the average log loss
    return balanced_log_loss/(N_0+N_1)

In [9]:
class Ensemble():
    def __init__(self):
        self.pca = PCA(n_components = 54,whiten=True,svd_solver = "full", random_state = 42)
#         self.imputer = SimpleImputer(missing_values=np.nan, strategy='median')
        self.classifiers =[xgboost.XGBClassifier(),TabPFNClassifier(N_ensemble_configurations=64)]
    
    def fit(self,X,y):
        y = y.values
        unique_classes, y = np.unique(y, return_inverse=True)
        self.classes_ = unique_classes
#         first_category = X.EJ.unique()[0]
#         X.EJ = X.EJ.eq(first_category).astype('int')
#         X = self.imputer.fit_transform(X)
        X = self.pca.fit_transform(X)
        for classifier in self.classifiers:
            if classifier==self.classifiers[1]:
                classifier.fit(X,y,overwrite_warning =True)
            else :
                classifier.fit(X, y)
     
    def predict_proba(self, x):
#         x = self.imputer.transform(x)
        x= self.pca.transform(x)
        probabilities = np.stack([classifier.predict_proba(x) for classifier in self.classifiers])
        averaged_probabilities = np.mean(probabilities, axis=0)
        class_0_est_instances = averaged_probabilities[:, 0].sum()
        others_est_instances = averaged_probabilities[:, 1:].sum()
        # Weighted probabilities based on class imbalance
        new_probabilities = averaged_probabilities * np.array([[1/(class_0_est_instances if i==0 else others_est_instances) for i in range(averaged_probabilities.shape[1])]])
        return new_probabilities / np.sum(new_probabilities, axis=1, keepdims=1) 

In [10]:
def training(x,y):
    outer_results = list()
    best_loss = np.inf
    fold = 0
    folds = 20
    for train_idx,val_idx in tqdm(cv_outer.split(x,y), total = folds):
        fold+=1
        x_train, x_val = x.iloc[train_idx],x.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
        
        model = Ensemble()
        model.fit(x_train, y_train)
        y_pred = model.predict_proba(x_val)
        probabilities = np.concatenate((y_pred[:,:1], np.sum(y_pred[:,1:], 1, keepdims=True)), axis=1)
        p0 = probabilities[:,:1]
        p0[p0 > 0.86] = 1
        p0[p0 < 0.14] = 0
        y_p = np.empty((y_pred.shape[0],))
        for i in range(y_pred.shape[0]):
            if p0[i]>=0.5:
                y_p[i]= False
            else :
                y_p[i]=True
        y_p = y_p.astype(int)
        loss = balanced_log_loss(y_val,y_p)

        if loss<best_loss:
            best_model = model
            best_loss = loss
            print('best_model_saved')
        outer_results.append(loss)
        print('>val_loss=%.5f, fold = %.1f' % (loss,fold))
    print('LOSS: %.5f' % (np.mean(outer_results)))
    return np.mean(outer_results), best_model
    

In [11]:
# from datetime import datetime
# times = greeks.Epsilon.copy()
# times[greeks.Epsilon != 'Unknown'] = greeks.Epsilon[greeks.Epsilon != 'Unknown'].map(lambda x: datetime.strptime(x,'%m/%d/%Y').toordinal())
# times[greeks.Epsilon == 'Unknown'] = np.nan

In [12]:
# greeks.Alpha = greeks.Alpha.map({'A' :0, 'B':1,'G' :2, 'D' :3})

In [13]:
# yess = []
# for col in train.drop(['Id','Class'],axis=1).columns:
#     f = train[col].corr(greeks.Alpha)
#     t =  train[col].corr(train.Class)
#     if (f >=0.15 and t>=0.15)  or  (f<=-0.15 and t<=-0.15):
#         yess.append(col)

In [14]:
# len(yess)

In [15]:
# train_pred_and_time = pd.concat((train.drop(['Id','Class'],axis=1), times), axis=1)
train_pred_and_time = train[predictor_columns]
train_pred_and_time['Class'] = train.Class
test_predictors = test[predictor_columns]
# first_category = test_predictors.EJ.unique()[0]
# test_predictors.EJ = test_predictors.EJ.eq(first_category).astype('int')
test_pred_and_time = test_predictors
# test_pred_and_time = np.concatenate((test_predictors, np.zeros((len(test_predictors), 1)) + train_pred_and_time.Epsilon.max() + 1), axis=1)

In [16]:
ros = ADASYN(random_state=42)
enn = EditedNearestNeighbours()

si = SimpleImputer(missing_values=np.nan, strategy='median')
train_pred_and_time11 = si.fit_transform(train_pred_and_time)
# train_pred_and_time11 = normalize(train_pred_and_time11, axis=0)
train_ros, y_ros = ros.fit_resample(train_pred_and_time11, greeks.Alpha)
# x_ros, y_ros = ros.fit_resample(df.drop(['Class'],axis=1), df.Class)

print('Original dataset shape')
print( greeks.Alpha.value_counts())
print('Resample dataset shape')
print( y_ros.value_counts())

Original dataset shape
A    509
B     61
G     29
D     18
Name: Alpha, dtype: int64
Resample dataset shape
G    514
A    509
D    507
B    501
Name: Alpha, dtype: int64


In [17]:
df = pd.DataFrame(train_pred_and_time11,columns =train_pred_and_time.columns)

In [18]:
x_ros = df.drop(['Class'],axis=1)
y_ = df.Class

In [19]:
yt = Ensemble()

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters


In [20]:
hist = {}
for col in x_ros:
    print("")
    print(f"{col} started")
    loss , m = training(x_ros.drop([col],axis=1),y_)
    hist[col] = loss
    print(f"{col} done")
    print("")


AB started


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=2.49790, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.31184, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.79071, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.65431, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=5.24734, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.18606, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.39535, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.62367, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=0.79071, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.45663, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.69558, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.70719, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.77909, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.79071, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.90487, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.85199, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.49790, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.90487, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.58142, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.31184, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.70719, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.41438, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.70719, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.77909, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.59303, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.82135, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.62367, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.72622, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.38374, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.39535, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.62367, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.82135, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.65431, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.92389, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.38374, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.39535, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.62367, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=0.98839, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.85199, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.61206, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.18606, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.59303, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.62367, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.70719, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.73783, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=5.64270, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.77909, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.39535, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.45663, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.61206, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.90487, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.97677, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.59303, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.31184, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.18606, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.45663, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.21670, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.38374, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.18606, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.19768, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.73783, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.49790, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=2.17445, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.39535, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.73783, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.18606, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.65431, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.41438, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.38374, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.11416, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.42599, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.41438, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.49790, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.21670, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.90487, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.58142, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.39535, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.34248, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.45663, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.33086, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.77909, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.59303, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.42599, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.58142, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.45663, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.61206, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=0.98839, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=2.69558, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.31184, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.62367, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.90487, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.73783, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.72622, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.77909, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.39535, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.62367, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.01903, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.01903, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=2.49790, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=2.10254, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.90487, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.21670, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.65431, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.92389, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.73783, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.58142, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.39535, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.49790, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.65431, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.80973, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.38374, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.38374, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.39535, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.90487, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.50951, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=5.04967, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.21670, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.18606, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.59303, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.42599, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.45663, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.52854, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.70719, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.58142, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.79071, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.90487, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.45663, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=5.84037, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.21670, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=2.89325, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.70719, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.34248, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.70719, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.82135, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.61206, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.58142, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.79071, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.90487, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.93551, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.49790, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.01903, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.58142, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.19768, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.73783, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.93551, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.52854, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=0.98839, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=2.69558, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.39535, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.90487, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.69558, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.13318, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.97677, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.50951, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.85199, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.61206, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.90487, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.77909, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.79071, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.82135, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.18606, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.62367, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.33086, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=3.09093, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.39535, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.62367, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=0.79071, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.85199, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.21670, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.77909, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.70719, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.22832, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.21670, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.65431, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.52854, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.97677, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.39535, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.90487, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.65431, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.41438, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=0.98839, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=2.89325, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.79071, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.70719, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.90487, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.73783, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.13318, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=2.49790, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.31184, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.13318, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.52854, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.61206, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.58142, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.59303, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=0.98839, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.13318, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=2.49790, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.31184, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.62367, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.90487, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.13318, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=2.10254, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.50951, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.73783, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.90487, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=5.64270, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.18606, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.39535, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.62367, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.90487, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=5.04967, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.80973, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.58142, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.39535, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.73783, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.89325, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.90487, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.38374, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.39535, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.65431, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.33086, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.49790, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.77909, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.39535, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.82135, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=0.98839, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.21670, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=2.89325, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.39535, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.82135, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.65431, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.01903, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.18606, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=2.49790, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.98839, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.18606, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.41438, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.70719, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=2.49790, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.31184, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.52854, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.70719, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.58142, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.39535, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.73783, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.65431, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.13318, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=2.49790, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.79071, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.62367, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.73783, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.18606, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.38374, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.79071, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.25896, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.49790, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=5.76847, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.41438, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=2.49790, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.39535, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.73783, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.85199, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.21670, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.90487, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.38374, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.70719, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.82135, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.90487, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.93551, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.90487, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.15348, fo

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.77909, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.39535, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.73783, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.01903, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.97677, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.59303, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.62367, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.90487, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.85199, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.52854, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.38374, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.59303, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.73783, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.01903, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.69558, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.77909, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.50951, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.90487, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.93551, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.13318, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.58142, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.39535, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.34248, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.62367, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=6.03805, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=2.89325, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.31184, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.70719, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.54015, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.72622, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.10254, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.77909, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.79071, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.93551, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=1.70719, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=4.65431, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.49790, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.30022, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val

  self.comm = Comm(**args)


  0%|          | 0/20 [00:00<?, ?it/s]

Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.58142, fold = 1.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=1.50951, fold = 2.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.42599, fold = 3.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.49790, fold = 4.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=3.73783, fold = 5.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
>val_loss=2.49790, fold = 6.0
Loading model that can be used for inference only
Using a Transformer with 25.82 M parameters
best_model_saved
>val_loss=0.98839, fold = 7.0
Loading model that can be used for inference only
Using a Transformer with 25.82 

In [21]:
hist

{'AB': 2.677231714253453,
 'AF': 1.974013240878865,
 'AH': 2.2189171028969077,
 'AM': 2.163296902389891,
 'AR': 2.3202355280844142,
 'AX': 2.51731439768356,
 'AY': 2.2213969911656446,
 'AZ': 2.3700760523661826,
 'BC': 2.3861286330929294,
 'BD ': 1.9230018882343505,
 'BN': 2.199223217030874,
 'BP': 2.432304827461249,
 'BQ': 2.2213969911656446,
 'BR': 2.49634146506894,
 'BZ': 2.167427780564563,
 'CB': 2.5352234458111313,
 'CC': 2.1495165405816334,
 'CD ': 2.9869758956124692,
 'CF': 2.3933190120439334,
 'CH': 2.162643468364575,
 'CL': 2.3441871515744044,
 'CR': 2.631655915978884,
 'CS': 2.4442287218686825,
 'CU': 2.3897099119174086,
 'CW ': 2.2752645589469607,
 'DA': 2.4292453266849163,
 'DE': 2.554991153194885,
 'DF': 2.266370152158456,
 'DH': 2.2585563523395393,
 'DI': 2.7790997817687075,
 'DL': 2.348318029749076,
 'DN': 2.256046450913399,
 'DU': 2.9702398677030026,
 'DV': 2.1507217658125,
 'DY': 2.3980525275003624,
 'EB': 2.6982024550127153,
 'EE': 2.3321616143472053,
 'EG': 2.29500225

In [22]:
# m.fit(df.drop(['Class'],axis=1),y_ros)

In [23]:
# y_ros.value_counts(normalize = True)

In [24]:
# y_pred = m.predict_proba(test_pred_and_time)
# probabilities = np.concatenate((y_pred[:,:1], np.sum(y_pred[:,1:], 1, keepdims=True)), axis=1)
# p0 = probabilities[:,:1]
# p0[p0 > 0.86] = 1
# p0[p0 < 0.14] = 0

In [25]:
# submission = pd.DataFrame(test["Id"], columns=["Id"])
# submission["class_0"] = p0
# submission["class_1"] = 1 - p0
# submission.to_csv('submission.csv', index=False)

In [26]:
# submission_df = pd.read_csv('submission.csv')
# submission_df