# Regression Baseline

In [1]:
import os
import pandas as pd
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

from configs.regressor0_cfg import SimpleRegressorConfig
from src.models.regressors import BaselineRegressor
from src.utils.trainer import Trainer

In [2]:
# get the config
cfg = SimpleRegressorConfig()
cfg

SimpleRegressorConfig(seed=1997, n_fold=5, target_columns=['cohesion', 'syntax', 'vocabulary', 'phraseology', 'grammar', 'conventions'], data_dir='./data/fb3', training_dir='./outputs', model='microsoft/deberta-v3-base', criterion='l1', plm_size=768, gradient_checkpointing=True, apex=True, num_workers=4, epoch=5, batch_size=2, max_len=512, encoder_lr=1e-05, decoder_lr=0.0001, weight_decay=0.01, eps=1e-06, betas=(0.9, 0.999), scheduler='cosine', warmup_ratio=0.1, num_cycles=5, device='cpu')

In [4]:
# split the data
train_df = pd.read_csv(os.path.join(cfg.data_dir, 'train.csv'))
Fold = MultilabelStratifiedKFold(n_splits=cfg.n_fold, shuffle=True, random_state=cfg.seed)
for n, (train_index, val_index) in enumerate(Fold.split(train_df, train_df[cfg.target_columns])):
    train_df.loc[val_index, 'fold'] = int(n)
train_df['fold'] = train_df['fold'].astype(int)

train_df.head()

Unnamed: 0,text_id,full_text,cohesion,syntax,vocabulary,phraseology,grammar,conventions,fold
0,0016926B079C,I think that students would benefit from learn...,3.5,3.5,3.0,3.0,4.0,3.0,1
1,0022683E9EA5,When a problem is a change you have to let it ...,2.5,2.5,3.0,2.0,2.0,2.5,0
2,00299B378633,"Dear, Principal\n\nIf u change the school poli...",3.0,3.5,3.0,3.0,3.0,2.5,3
3,003885A45F42,The best time in life is when you become yours...,4.5,4.5,4.5,4.5,4.0,5.0,3
4,0049B1DF5CCC,Small act of kindness can impact in other peop...,2.5,3.0,3.0,3.0,2.5,2.5,0


In [6]:
# train for folds
for fold_id in range(cfg.n_fold):
    fold_train = train_df[train_df['fold'] != fold_id].reset_index(drop=True)
    fold_valid = train_df[train_df['fold'] == fold_id].reset_index(drop=True)

    model = BaselineRegressor(cfg, cfg_path=None, use_pretrained=True)

    trainer = Trainer(
        cfg=cfg,
        model=model,
        fold=fold_id,
        out_dir=cfg.training_dir,
        train_samples=fold_train,
        val_samples=fold_valid,
        test_samples=None,
        device=cfg.device
    )

    trainer.fit()
    break


Some weights of the model checkpoint at microsoft/deberta-v3-base were not used when initializing DebertaV2Model: ['mask_predictions.LayerNorm.weight', 'mask_predictions.dense.weight', 'mask_predictions.classifier.weight', 'lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'mask_predictions.classifier.bias', 'mask_predictions.LayerNorm.bias', 'mask_predictions.dense.bias', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


ProxyError: HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /microsoft/deberta-v3-base/resolve/main/added_tokens.json (Caused by ProxyError('Cannot connect to proxy.', TimeoutError('_ssl.c:980: The handshake operation timed out')))