In [6]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from tqdm import tqdm
from scipy.interpolate import interp1d
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import f1_score, mean_absolute_error
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import TransformerEncoder, TransformerEncoderLayer, LayerNorm
from torch.utils.data import TensorDataset, DataLoader, WeightedRandomSampler

In [7]:
dataDic = {"train": pd.read_csv("../src/train_timeseries/train_timeseries.csv"),
           "test": pd.read_csv("../src/test_timeseries/test_timeseries.csv"),
           "validation": pd.read_csv("../src/validation_timeseries/validation_timeseries.csv"),
           "soil" : pd.read_csv("../src/soil_data.csv"),
           }

In [8]:
class2id = {
    'None': 0,
    'D0': 1,
    'D1': 2,
    'D2': 3,
    'D3': 4,
    'D4': 5,
}
id2class = {v: k for k, v in class2id.items()}

In [9]:
dfs = {
    k: dataDic[k].set_index(['fips', 'date'])
    for k in dataDic.keys() if k != "soil"
}

In [10]:
def date_encode(date):
    if isinstance(date, str):
        date = datetime.strptime(date, "%Y-%m-%d")
    return (
        np.sin(2 * np.pi * date.timetuple().tm_yday / 366),
        np.cos(2 * np.pi * date.timetuple().tm_yday / 366),
    )

In [11]:
def interpolate_nans(padata, pkind='linear'):
    """
    see: https://stackoverflow.com/a/53050216/2167159
    """
    aindexes = np.arange(padata.shape[0])
    agood_indexes, = np.where(np.isfinite(padata))
    f = interp1d(agood_indexes
               , padata[agood_indexes]
               , bounds_error=False
               , copy=False
               , fill_value="extrapolate"
               , kind=pkind)
    return f(aindexes)

In [12]:
def loadXY(
    df,
    random_state=42,
    window_size=180, # how many days in the past (default/competition: 180)
    target_size=6, # how many weeks into the future (default/competition: 6)
    fuse_past=True, # add the past drought observations? (default: True)
    return_fips=False, # return the county identifier (do not use for predictions)
    encode_season=True, # encode the season using the function above (default: True) 
    use_prev_year=False, # add observations from 1 year prior?
):
    df = dfs[df]
    soil_df = dfs["soil"]
    time_data_cols = sorted(
        [c for c in df.columns if c not in ["fips", "date", "score"]]
    )
    static_data_cols = sorted(
        [c for c in soil_df.columns if c not in ["soil", "lat", "lon"]]
    )
    count = 0
    score_df = df.dropna(subset=["score"])
    X_static = np.empty((len(df) // window_size, len(static_data_cols)))
    X_fips_date = []
    add_dim = 0
    if use_prev_year:
        add_dim += len(time_data_cols)
    if fuse_past:
        add_dim += 1
        if use_prev_year:
            add_dim += 1
    if encode_season:
        add_dim += 2
    X_time = np.empty(
        (len(df) // window_size, window_size, len(time_data_cols) + add_dim)
    )
    y_past = np.empty((len(df) // window_size, window_size))
    y_target = np.empty((len(df) // window_size, target_size))
    if random_state is not None:
        np.random.seed(random_state)
    for fips in tqdm(score_df.index.get_level_values(0).unique()):
        if random_state is not None:
            start_i = np.random.randint(1, window_size)
        else:
            start_i = 1
        fips_df = df[(df.index.get_level_values(0) == fips)]
        X = fips_df[time_data_cols].values
        y = fips_df["score"].values
        X_s = soil_df[soil_df["fips"] == fips][static_data_cols].values[0]
        for i in range(start_i, len(y) - (window_size + target_size * 7), window_size):
            X_fips_date.append((fips, fips_df.index[i : i + window_size][-1]))
            X_time[count, :, : len(time_data_cols)] = X[i : i + window_size]
            if use_prev_year:
                if i < 365 or len(X[i - 365 : i + window_size - 365]) < window_size:
                    continue
                X_time[count, :, -len(time_data_cols) :] = X[
                    i - 365 : i + window_size - 365
                ]
            if not fuse_past:
                y_past[count] = interpolate_nans(y[i : i + window_size])
            else:
                X_time[count, :, len(time_data_cols)] = interpolate_nans(
                    y[i : i + window_size]
                )
            if encode_season:
                enc_dates = [
                    date_encode(d) for f, d in fips_df.index[i : i + window_size].values
                ]
                d_sin, d_cos = [s for s, c in enc_dates], [c for s, c in enc_dates]
                X_time[count, :, len(time_data_cols) + (add_dim - 2)] = d_sin
                X_time[count, :, len(time_data_cols) + (add_dim - 2) + 1] = d_cos
            temp_y = y[i + window_size : i + window_size + target_size * 7]
            y_target[count] = np.array(temp_y[~np.isnan(temp_y)][:target_size])
            X_static[count] = X_s
            count += 1
    print(f"loaded {count} samples")
    results = [X_static[:count], X_time[:count], y_target[:count]]
    if not fuse_past:
        results.append(y_past[:count])
    if return_fips:
        results.append(X_fips_date)
    return results

In [13]:
scaler_dict = {}
scaler_dict_static = {}
scaler_dict_past = {}


def normalize(X_static, X_time, y_past=None, fit=False):
    for index in tqdm(range(X_time.shape[-1])):
        if fit:
            scaler_dict[index] = RobustScaler().fit(X_time[:, :, index].reshape(-1, 1))
        X_time[:, :, index] = (
            scaler_dict[index]
            .transform(X_time[:, :, index].reshape(-1, 1))
            .reshape(-1, X_time.shape[-2])
        )
    for index in tqdm(range(X_static.shape[-1])):
        if fit:
            scaler_dict_static[index] = RobustScaler().fit(
                X_static[:, index].reshape(-1, 1)
            )
        X_static[:, index] = (
            scaler_dict_static[index]
            .transform(X_static[:, index].reshape(-1, 1))
            .reshape(1, -1)
        )
    index = 0
    if y_past is not None:
        if fit:
            scaler_dict_past[index] = RobustScaler().fit(y_past.reshape(-1, 1))
        y_past[:, :] = (
            scaler_dict_past[index]
            .transform(y_past.reshape(-1, 1))
            .reshape(-1, y_past.shape[-1])
        )
        return X_static, X_time, y_past
    return X_static, X_time

In [15]:
import pickle
with open("../data/data.pkl", "rb") as f:
    data = pickle.load(f)
    X_tabular_train = data["X_tabular_train"]
    X_time_train = data["X_time_train"]
    y_target_train = data["y_target_train"]
    X_tabular_validation = data["X_tabular_validation"]
    X_time_valid = data["X_time_valid"]
    y_target_valid = data["y_target_valid"]
    valid_fips = data["valid_fips"]
    X_tabular_test = data["X_tabular_test"]
    X_time_test = data["X_time_test"]
    y_target_test = data["y_target_test"]
    test_fips = data["test_fips"]

In [16]:
X_tabular_train, X_time_train = normalize(X_tabular_train, X_time_train, fit=True)
X_tabular_validation, X_time_valid = normalize(X_tabular_validation, X_time_valid)
X_tabular_test, X_time_test = normalize(X_tabular_test, X_time_test)

100%|██████████| 40/40 [00:38<00:00,  1.04it/s]
100%|██████████| 30/30 [00:00<00:00, 359.82it/s]
100%|██████████| 40/40 [00:00<00:00, 173.48it/s]
100%|██████████| 30/30 [00:00<00:00, 11999.73it/s]
100%|██████████| 40/40 [00:00<00:00, 178.98it/s]
100%|██████████| 30/30 [00:00<00:00, 13119.50it/s]


In [17]:
print(X_time_train.shape, X_time_valid.shape, X_time_test.shape)

(97099, 180, 40) (2457, 180, 40) (2477, 180, 40)


In [18]:
batch_size = 128
output_weeks = 6

train_data = TensorDataset(
    torch.tensor(X_time_train),
    torch.tensor(X_tabular_train),
    torch.tensor(y_target_train[:, :output_weeks]),
)
train_loader = DataLoader(
    train_data, shuffle=True, batch_size=batch_size, drop_last=False
)
valid_data = TensorDataset(
    torch.tensor(X_time_valid),
    torch.tensor(X_tabular_validation),
    torch.tensor(y_target_valid[:, :output_weeks]),
)
valid_loader = DataLoader(
    valid_data, shuffle=False, batch_size=batch_size, drop_last=False
)

test_data = TensorDataset(
    torch.tensor(X_time_test),
    torch.tensor(X_tabular_test),
    torch.tensor(y_target_test[:, :output_weeks]),
)
test_loader = DataLoader(
    test_data, shuffle=False, batch_size=batch_size, drop_last=False
)


In [19]:
class PositionalEncoding(nn.Module):

    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        if d_model % 2 != 0:
            pe = torch.zeros(max_len, d_model+1)
        else:
            pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        print(div_term.shape)
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        if d_model % 2 != 0:
            pe = pe[:, :-1]
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

class DroughtNetTransformer(nn.Module):

    def __init__(self, output_size, num_input_features, hidden_dim, n_layers, ffnn_layers,
        drop_prob, static_dim, num_heads, input_length, init_dim=128):
        super(DroughtNetTransformer, self).__init__()
        self.model_type = 'Transformer'
        self.init_linear = nn.Linear(num_input_features, init_dim)
        self.pos_encoder = PositionalEncoding(init_dim, drop_prob, input_length)
        encoder_layers = TransformerEncoderLayer(init_dim, num_heads, hidden_dim, drop_prob)
        encoder_norm = LayerNorm(init_dim)
        self.transformer_encoder = TransformerEncoder(encoder_layers, n_layers, encoder_norm)
        self.ninp = num_input_features
        # todo: add several layers given ffnn_layers
        # todo: add static
        self.ffnn_layers = []
        if ffnn_layers == 1:
            self.final = nn.Linear(init_dim*input_length, output_size)
        else:
            self.final = nn.Linear(hidden_dim, output_size)
            
        for i in range(ffnn_layers-1):
            if i == 0:
                self.ffnn_layers.append(nn.Linear(init_dim*input_length+static_dim, hidden_dim))
            else:
                self.ffnn_layers.append(nn.Linear(hidden_dim, hidden_dim))
                
        self.ffnn_layers = nn.ModuleList(self.ffnn_layers)

        self.init_dim = init_dim
        self.input_length = input_length
        self.output_size = output_size
        
        self.init_weights()

    def init_weights(self):
        pass # possibly use initalization here

    def forward(self, x, static=None):
        # todo add static
        batch_size = x.size(0)
        x = x.cuda().to(dtype=torch.float32)
        if static is not None:
            static = static.cuda().to(dtype=torch.float32)
        x = self.init_linear(x)
        x = x * math.sqrt(self.ninp)
        output = self.pos_encoder(x)
        output = self.transformer_encoder(x)
        output = output.reshape(
            batch_size,
            self.init_dim*self.input_length
        )
        for i in range(len(self.ffnn_layers)):
            if i == 0 and static is not None:
                output = self.ffnn_layers[i](torch.cat((output, static), 1))
            else:
                output = self.ffnn_layers[i](output)
        output = self.final(output)
        return output

In [20]:
hidden_dim = 128
n_layers = 4
ffnn_layers = 2
dropout = 0.1
static_dim = X_tabular_train.shape[1]
n_heads = 2
window_size = 180
lr = 7e-5
epochs = 7

In [26]:
is_cuda = torch.cuda.is_available()
if is_cuda:
    device = torch.device("cuda")
    print("using GPU")
else:
    device = torch.device("cpu")
    print("using CPU")

model = DroughtNetTransformer(
            output_weeks,
            X_time_train.shape[-1],
            hidden_dim,
            n_layers,
            ffnn_layers,
            dropout,
            static_dim,
            n_heads,
            window_size
        )
model.to(device)

loss_function = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=lr, 
                                                steps_per_epoch=len(train_loader),
                                                epochs=epochs)

counter = 0
valid_loss_min = np.inf
torch.manual_seed(42)
np.random.seed(42)

for i in range(epochs):
    for k, (inputs, static, labels) in tqdm(
            enumerate(train_loader),
            desc=f"epoch {i+1}/{epochs}",
            total=len(train_loader),
        ):
        model.train()
        counter += 1
        inputs, labels, static = (
                inputs.to(device),
                labels.to(device),
                static.to(device),
            )
        model.zero_grad()
        output = model(inputs, static)
        loss = loss_function(output, labels.float())
        nn.utils.clip_grad_norm_(model.parameters(), 5)
        optimizer.step()
        scheduler.step()

        with torch.no_grad():
            if k == len(train_loader) - 1 or k == (len(train_loader) - 1) // 2:
                val_losses = []
                model.eval()
                labels = []
                preds = []
                raw_labels = []
                raw_preds = []
                for inp, stat, lab in valid_loader:
                    inp, stat, lab = (
                        inp.to(device),
                        stat.to(device),
                        lab.to(device),
                    )
                    out = model(inp, stat)
                    val_loss = loss_function(out, lab.float())
                    val_losses.append(val_loss.item())
                    for labs in lab:
                        labels.append([int(l.round()) for l in labs])
                        raw_labels.append([float(l) for l in labs])
                    for pred in out:
                        preds.append([int(p.round()) for p in pred])
                        raw_preds.append([float(p) for p in pred])

                    # log data
                labels = np.array(labels)
                preds = np.clip(np.array(preds), 0, 5)
                raw_preds = np.array(raw_preds)
                raw_labels = np.array(raw_labels)

                for i in range(output_weeks):
                    log_dict = {
                        "loss": float(loss),
                        "epoch": counter / len(train_loader),
                        "step": counter,
                        "lr": optimizer.param_groups[0]["lr"],
                        "week": i + 1,
                    }
                    # w = f'week_{i+1}_'
                    w = ""
                    log_dict[f"{w}validation_loss"] = np.mean(val_losses)
                    log_dict[f"{w}macro_f1"] = f1_score(
                        labels[:, i], preds[:, i], average="macro"
                    )
                    log_dict[f"{w}micro_f1"] = f1_score(
                        labels[:, i], preds[:, i], average="micro"
                    )
                    log_dict[f"{w}mae"] = mean_absolute_error(
                        raw_labels[:, i], raw_preds[:, i]
                    )
                    print(log_dict)
                    # writer.add_scalars("Loss(MSE)", {'train': loss,
                    #                                  'validation': log_dict[f"{w}validation_loss"]},
                    #                                  counter)
                    # writer.add_scalars("F1(MSE)", {'macro': log_dict[f"{w}macro_f1"],
                    #                                'micro': log_dict[f"{w}micro_f1"]},
                    #                                counter)
                    # writer.add_scalar("MAE", log_dict[f"{w}mae"],
                    #                   counter)
                    # writer.add_scalar("Learning-Rate", log_dict["lr"],
                    #                   counter)
                    for j, f1 in enumerate(
                        f1_score(labels[:, i], preds[:, i], average=None)
                    ):
                        log_dict[f"{w}{id2class[j]}_f1"] = f1
                    model.train()
                if np.mean(val_losses) <= valid_loss_min:
                    torch.save(model.state_dict(), "./Transformer_first_test.pt")
                    print(
                        "Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...".format(
                            valid_loss_min, np.mean(val_losses)
                        )
                    )
                    valid_loss_min = np.mean(val_losses)



using GPU
torch.Size([64])


epoch 1/7:  51%|█████     | 385/759 [00:12<00:36, 10.35it/s]

{'loss': 1.7980451583862305, 'epoch': 0.5006587615283268, 'step': 380, 'lr': 1.1802751618442933e-05, 'week': 1, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14087242937375957), 'micro_f1': np.float64(0.7057387057387058), 'mae': np.float64(0.6366035369382649)}
{'loss': 1.7980451583862305, 'epoch': 0.5006587615283268, 'step': 380, 'lr': 1.1802751618442933e-05, 'week': 2, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14084210101765637), 'micro_f1': np.float64(0.7138787138787139), 'mae': np.float64(0.7029591009501307)}
{'loss': 1.7980451583862305, 'epoch': 0.5006587615283268, 'step': 380, 'lr': 1.1802751618442933e-05, 'week': 3, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.1625640301523715), 'micro_f1': np.float64(0.671957671957672), 'mae': np.float64(0.6085960524943783)}
{'loss': 1.7980451583862305, 'epoch': 0.5006587615283268, 'step': 380, 'lr': 1.1802751618442933e-05, 'week': 4, 'validation_loss'

epoch 1/7: 100%|██████████| 759/759 [00:24<00:00, 31.48it/s]


{'loss': 1.4573363065719604, 'epoch': 1.0, 'step': 759, 'lr': 3.3920537677468094e-05, 'week': 1, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14087242937375957), 'micro_f1': np.float64(0.7057387057387058), 'mae': np.float64(0.6366035369382649)}
{'loss': 1.4573363065719604, 'epoch': 1.0, 'step': 759, 'lr': 3.3920537677468094e-05, 'week': 2, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14084210101765637), 'micro_f1': np.float64(0.7138787138787139), 'mae': np.float64(0.7029591009501307)}
{'loss': 1.4573363065719604, 'epoch': 1.0, 'step': 759, 'lr': 3.3920537677468094e-05, 'week': 3, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.1625640301523715), 'micro_f1': np.float64(0.671957671957672), 'mae': np.float64(0.6085960524943783)}
{'loss': 1.4573363065719604, 'epoch': 1.0, 'step': 759, 'lr': 3.3920537677468094e-05, 'week': 4, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.1

epoch 2/7:  51%|█████     | 386/759 [00:12<00:38,  9.69it/s]

{'loss': 2.11326265335083, 'epoch': 1.5006587615283267, 'step': 1139, 'lr': 5.741210937787608e-05, 'week': 1, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14087242937375957), 'micro_f1': np.float64(0.7057387057387058), 'mae': np.float64(0.6366035369382649)}
{'loss': 2.11326265335083, 'epoch': 1.5006587615283267, 'step': 1139, 'lr': 5.741210937787608e-05, 'week': 2, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14084210101765637), 'micro_f1': np.float64(0.7138787138787139), 'mae': np.float64(0.7029591009501307)}
{'loss': 2.11326265335083, 'epoch': 1.5006587615283267, 'step': 1139, 'lr': 5.741210937787608e-05, 'week': 3, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.1625640301523715), 'micro_f1': np.float64(0.671957671957672), 'mae': np.float64(0.6085960524943783)}
{'loss': 2.11326265335083, 'epoch': 1.5006587615283267, 'step': 1139, 'lr': 5.741210937787608e-05, 'week': 4, 'validation_loss': np.flo

epoch 2/7: 100%|██████████| 759/759 [00:24<00:00, 31.24it/s]


{'loss': 2.3277134895324707, 'epoch': 2.0, 'step': 1518, 'lr': 6.963406348756813e-05, 'week': 1, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14087242937375957), 'micro_f1': np.float64(0.7057387057387058), 'mae': np.float64(0.6366035369382649)}
{'loss': 2.3277134895324707, 'epoch': 2.0, 'step': 1518, 'lr': 6.963406348756813e-05, 'week': 2, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14084210101765637), 'micro_f1': np.float64(0.7138787138787139), 'mae': np.float64(0.7029591009501307)}
{'loss': 2.3277134895324707, 'epoch': 2.0, 'step': 1518, 'lr': 6.963406348756813e-05, 'week': 3, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.1625640301523715), 'micro_f1': np.float64(0.671957671957672), 'mae': np.float64(0.6085960524943783)}
{'loss': 2.3277134895324707, 'epoch': 2.0, 'step': 1518, 'lr': 6.963406348756813e-05, 'week': 4, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.1

epoch 3/7:  51%|█████     | 386/759 [00:12<00:38,  9.68it/s]

{'loss': 2.2867095470428467, 'epoch': 2.5006587615283267, 'step': 1898, 'lr': 6.884404865301213e-05, 'week': 1, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14087242937375957), 'micro_f1': np.float64(0.7057387057387058), 'mae': np.float64(0.6366035369382649)}
{'loss': 2.2867095470428467, 'epoch': 2.5006587615283267, 'step': 1898, 'lr': 6.884404865301213e-05, 'week': 2, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14084210101765637), 'micro_f1': np.float64(0.7138787138787139), 'mae': np.float64(0.7029591009501307)}
{'loss': 2.2867095470428467, 'epoch': 2.5006587615283267, 'step': 1898, 'lr': 6.884404865301213e-05, 'week': 3, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.1625640301523715), 'micro_f1': np.float64(0.671957671957672), 'mae': np.float64(0.6085960524943783)}
{'loss': 2.2867095470428467, 'epoch': 2.5006587615283267, 'step': 1898, 'lr': 6.884404865301213e-05, 'week': 4, 'validation_loss'

epoch 3/7: 100%|██████████| 759/759 [00:24<00:00, 31.03it/s]


{'loss': 2.041424512863159, 'epoch': 3.0, 'step': 2277, 'lr': 6.431696711982113e-05, 'week': 1, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14087242937375957), 'micro_f1': np.float64(0.7057387057387058), 'mae': np.float64(0.6366035369382649)}
{'loss': 2.041424512863159, 'epoch': 3.0, 'step': 2277, 'lr': 6.431696711982113e-05, 'week': 2, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14084210101765637), 'micro_f1': np.float64(0.7138787138787139), 'mae': np.float64(0.7029591009501307)}
{'loss': 2.041424512863159, 'epoch': 3.0, 'step': 2277, 'lr': 6.431696711982113e-05, 'week': 3, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.1625640301523715), 'micro_f1': np.float64(0.671957671957672), 'mae': np.float64(0.6085960524943783)}
{'loss': 2.041424512863159, 'epoch': 3.0, 'step': 2277, 'lr': 6.431696711982113e-05, 'week': 4, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14835

epoch 4/7:  51%|█████     | 386/759 [00:12<00:38,  9.66it/s]

{'loss': 2.099496603012085, 'epoch': 3.5006587615283267, 'step': 2657, 'lr': 5.6787505961414953e-05, 'week': 1, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14087242937375957), 'micro_f1': np.float64(0.7057387057387058), 'mae': np.float64(0.6366035369382649)}
{'loss': 2.099496603012085, 'epoch': 3.5006587615283267, 'step': 2657, 'lr': 5.6787505961414953e-05, 'week': 2, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14084210101765637), 'micro_f1': np.float64(0.7138787138787139), 'mae': np.float64(0.7029591009501307)}
{'loss': 2.099496603012085, 'epoch': 3.5006587615283267, 'step': 2657, 'lr': 5.6787505961414953e-05, 'week': 3, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.1625640301523715), 'micro_f1': np.float64(0.671957671957672), 'mae': np.float64(0.6085960524943783)}
{'loss': 2.099496603012085, 'epoch': 3.5006587615283267, 'step': 2657, 'lr': 5.6787505961414953e-05, 'week': 4, 'validation_loss'

epoch 4/7: 100%|██████████| 759/759 [00:24<00:00, 30.87it/s]


{'loss': 2.200443744659424, 'epoch': 4.0, 'step': 3036, 'lr': 4.7060118397295637e-05, 'week': 1, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14087242937375957), 'micro_f1': np.float64(0.7057387057387058), 'mae': np.float64(0.6366035369382649)}
{'loss': 2.200443744659424, 'epoch': 4.0, 'step': 3036, 'lr': 4.7060118397295637e-05, 'week': 2, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14084210101765637), 'micro_f1': np.float64(0.7138787138787139), 'mae': np.float64(0.7029591009501307)}
{'loss': 2.200443744659424, 'epoch': 4.0, 'step': 3036, 'lr': 4.7060118397295637e-05, 'week': 3, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.1625640301523715), 'micro_f1': np.float64(0.671957671957672), 'mae': np.float64(0.6085960524943783)}
{'loss': 2.200443744659424, 'epoch': 4.0, 'step': 3036, 'lr': 4.7060118397295637e-05, 'week': 4, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.1

epoch 5/7:  51%|█████     | 386/759 [00:12<00:38,  9.65it/s]

{'loss': 1.996777057647705, 'epoch': 4.500658761528327, 'step': 3416, 'lr': 3.607761507918826e-05, 'week': 1, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14087242937375957), 'micro_f1': np.float64(0.7057387057387058), 'mae': np.float64(0.6366035369382649)}
{'loss': 1.996777057647705, 'epoch': 4.500658761528327, 'step': 3416, 'lr': 3.607761507918826e-05, 'week': 2, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14084210101765637), 'micro_f1': np.float64(0.7138787138787139), 'mae': np.float64(0.7029591009501307)}
{'loss': 1.996777057647705, 'epoch': 4.500658761528327, 'step': 3416, 'lr': 3.607761507918826e-05, 'week': 3, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.1625640301523715), 'micro_f1': np.float64(0.671957671957672), 'mae': np.float64(0.6085960524943783)}
{'loss': 1.996777057647705, 'epoch': 4.500658761528327, 'step': 3416, 'lr': 3.607761507918826e-05, 'week': 4, 'validation_loss': np.flo

epoch 5/7: 100%|██████████| 759/759 [00:24<00:00, 30.82it/s]


{'loss': 2.8801796436309814, 'epoch': 5.0, 'step': 3795, 'lr': 2.501337481423089e-05, 'week': 1, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14087242937375957), 'micro_f1': np.float64(0.7057387057387058), 'mae': np.float64(0.6366035369382649)}
{'loss': 2.8801796436309814, 'epoch': 5.0, 'step': 3795, 'lr': 2.501337481423089e-05, 'week': 2, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14084210101765637), 'micro_f1': np.float64(0.7138787138787139), 'mae': np.float64(0.7029591009501307)}
{'loss': 2.8801796436309814, 'epoch': 5.0, 'step': 3795, 'lr': 2.501337481423089e-05, 'week': 3, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.1625640301523715), 'micro_f1': np.float64(0.671957671957672), 'mae': np.float64(0.6085960524943783)}
{'loss': 2.8801796436309814, 'epoch': 5.0, 'step': 3795, 'lr': 2.501337481423089e-05, 'week': 4, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.1

epoch 6/7:  51%|█████     | 386/759 [00:12<00:41,  9.09it/s]

{'loss': 1.7360825538635254, 'epoch': 5.500658761528327, 'step': 4175, 'lr': 1.4939780449954502e-05, 'week': 1, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14087242937375957), 'micro_f1': np.float64(0.7057387057387058), 'mae': np.float64(0.6366035369382649)}
{'loss': 1.7360825538635254, 'epoch': 5.500658761528327, 'step': 4175, 'lr': 1.4939780449954502e-05, 'week': 2, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14084210101765637), 'micro_f1': np.float64(0.7138787138787139), 'mae': np.float64(0.7029591009501307)}
{'loss': 1.7360825538635254, 'epoch': 5.500658761528327, 'step': 4175, 'lr': 1.4939780449954502e-05, 'week': 3, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.1625640301523715), 'micro_f1': np.float64(0.671957671957672), 'mae': np.float64(0.6085960524943783)}
{'loss': 1.7360825538635254, 'epoch': 5.500658761528327, 'step': 4175, 'lr': 1.4939780449954502e-05, 'week': 4, 'validation_loss'

epoch 6/7: 100%|██████████| 759/759 [00:24<00:00, 30.61it/s]


{'loss': 1.945360779762268, 'epoch': 6.0, 'step': 4554, 'lr': 6.933102286160286e-06, 'week': 1, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14087242937375957), 'micro_f1': np.float64(0.7057387057387058), 'mae': np.float64(0.6366035369382649)}
{'loss': 1.945360779762268, 'epoch': 6.0, 'step': 4554, 'lr': 6.933102286160286e-06, 'week': 2, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14084210101765637), 'micro_f1': np.float64(0.7138787138787139), 'mae': np.float64(0.7029591009501307)}
{'loss': 1.945360779762268, 'epoch': 6.0, 'step': 4554, 'lr': 6.933102286160286e-06, 'week': 3, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.1625640301523715), 'micro_f1': np.float64(0.671957671957672), 'mae': np.float64(0.6085960524943783)}
{'loss': 1.945360779762268, 'epoch': 6.0, 'step': 4554, 'lr': 6.933102286160286e-06, 'week': 4, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14835

epoch 7/7:  51%|█████     | 386/759 [00:12<00:38,  9.64it/s]

{'loss': 1.247196912765503, 'epoch': 6.500658761528327, 'step': 4934, 'lr': 1.7693741148608977e-06, 'week': 1, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14087242937375957), 'micro_f1': np.float64(0.7057387057387058), 'mae': np.float64(0.6366035369382649)}
{'loss': 1.247196912765503, 'epoch': 6.500658761528327, 'step': 4934, 'lr': 1.7693741148608977e-06, 'week': 2, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14084210101765637), 'micro_f1': np.float64(0.7138787138787139), 'mae': np.float64(0.7029591009501307)}
{'loss': 1.247196912765503, 'epoch': 6.500658761528327, 'step': 4934, 'lr': 1.7693741148608977e-06, 'week': 3, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.1625640301523715), 'micro_f1': np.float64(0.671957671957672), 'mae': np.float64(0.6085960524943783)}
{'loss': 1.247196912765503, 'epoch': 6.500658761528327, 'step': 4934, 'lr': 1.7693741148608977e-06, 'week': 4, 'validation_loss': np

epoch 7/7: 100%|██████████| 759/759 [00:24<00:00, 30.73it/s]

{'loss': 1.837167501449585, 'epoch': 7.0, 'step': 5313, 'lr': 2.9248706347149187e-10, 'week': 1, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14087242937375957), 'micro_f1': np.float64(0.7057387057387058), 'mae': np.float64(0.6366035369382649)}
{'loss': 1.837167501449585, 'epoch': 7.0, 'step': 5313, 'lr': 2.9248706347149187e-10, 'week': 2, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.14084210101765637), 'micro_f1': np.float64(0.7138787138787139), 'mae': np.float64(0.7029591009501307)}
{'loss': 1.837167501449585, 'epoch': 7.0, 'step': 5313, 'lr': 2.9248706347149187e-10, 'week': 3, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.1625640301523715), 'micro_f1': np.float64(0.671957671957672), 'mae': np.float64(0.6085960524943783)}
{'loss': 1.837167501449585, 'epoch': 7.0, 'step': 5313, 'lr': 2.9248706347149187e-10, 'week': 4, 'validation_loss': np.float64(1.1378739580512047), 'macro_f1': np.float64(0.1


