In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from tqdm import tqdm
from scipy.interpolate import interp1d
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import f1_score, mean_absolute_error

import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter


In [37]:
writer = SummaryWriter('../runs/HM_Scratch')

In [32]:
filesList = os.listdir('../src')
print(filesList)

['soil_data.csv', 'train_timeseries', 'counties.geojson', 'test_timeseries', 'validation_timeseries', 'counties.zip']


In [4]:
dataDic = {"train": pd.read_csv("../src/train_timeseries/train_timeseries.csv"),
           "test": pd.read_csv("../src/test_timeseries/test_timeseries.csv"),
           "validation": pd.read_csv("../src/validation_timeseries/validation_timeseries.csv"),
           "soil" : pd.read_csv("../src/soil_data.csv"),
           }

In [5]:
dataDic["train"].columns

Index(['fips', 'date', 'PRECTOT', 'PS', 'QV2M', 'T2M', 'T2MDEW', 'T2MWET',
       'T2M_MAX', 'T2M_MIN', 'T2M_RANGE', 'TS', 'WS10M', 'WS10M_MAX',
       'WS10M_MIN', 'WS10M_RANGE', 'WS50M', 'WS50M_MAX', 'WS50M_MIN',
       'WS50M_RANGE', 'score'],
      dtype='object')

In [6]:
class2id = {
    'None': 0,
    'D0': 1,
    'D1': 2,
    'D2': 3,
    'D3': 4,
    'D4': 5,
}
id2class = {v: k for k, v in class2id.items()}

In [7]:
dfs = {
    k: dataDic[k].set_index(['fips', 'date'])
    for k in dataDic.keys() if k != "soil"
}

dfs["soil"] = dataDic["soil"]

In [40]:
batch_size = 128
output_weeks = 6
hidden_dim = 360
n_layers = 3
ffnn_layers = 4
dropout = 0.4
lr = 7e-5
epochs = 10


In [9]:
def interpolate_nans(padata, pkind='linear'):
    """
    see: https://stackoverflow.com/a/53050216/2167159
    """
    aindexes = np.arange(padata.shape[0])
    agood_indexes, = np.where(np.isfinite(padata))
    f = interp1d(agood_indexes
               , padata[agood_indexes]
               , bounds_error=False
               , copy=False
               , fill_value="extrapolate"
               , kind=pkind)
    return f(aindexes)

In [10]:
def date_encode(date):
    if isinstance(date, str):
        date = datetime.strptime(date, "%Y-%m-%d")
    return (
        np.sin(2 * np.pi * date.timetuple().tm_yday / 366),
        np.cos(2 * np.pi * date.timetuple().tm_yday / 366),
    )

In [11]:
def loadXY(
    df,
    random_state=42,
    window_size=180, # how many days in the past (default/competition: 180)
    target_size=6, # how many weeks into the future (default/competition: 6)
    fuse_past=True, # add the past drought observations? (default: True)
    return_fips=False, # return the county identifier (do not use for predictions)
    encode_season=True, # encode the season using the function above (default: True) 
    use_prev_year=False, # add observations from 1 year prior?
):
    df = dfs[df]
    soil_df = dfs["soil"]
    time_data_cols = sorted(
        [c for c in df.columns if c not in ["fips", "date", "score"]]
    )
    static_data_cols = sorted(
        [c for c in soil_df.columns if c not in ["soil", "lat", "lon"]]
    )
    count = 0
    score_df = df.dropna(subset=["score"])
    X_static = np.empty((len(df) // window_size, len(static_data_cols)))
    X_fips_date = []
    add_dim = 0
    if use_prev_year:
        add_dim += len(time_data_cols)
    if fuse_past:
        add_dim += 1
        if use_prev_year:
            add_dim += 1
    if encode_season:
        add_dim += 2
    X_time = np.empty(
        (len(df) // window_size, window_size, len(time_data_cols) + add_dim)
    )
    y_past = np.empty((len(df) // window_size, window_size))
    y_target = np.empty((len(df) // window_size, target_size))
    if random_state is not None:
        np.random.seed(random_state)
    for fips in tqdm(score_df.index.get_level_values(0).unique()):
        if random_state is not None:
            start_i = np.random.randint(1, window_size)
        else:
            start_i = 1
        fips_df = df[(df.index.get_level_values(0) == fips)]
        X = fips_df[time_data_cols].values
        y = fips_df["score"].values
        X_s = soil_df[soil_df["fips"] == fips][static_data_cols].values[0]
        for i in range(start_i, len(y) - (window_size + target_size * 7), window_size):
            X_fips_date.append((fips, fips_df.index[i : i + window_size][-1]))
            X_time[count, :, : len(time_data_cols)] = X[i : i + window_size]
            if use_prev_year:
                if i < 365 or len(X[i - 365 : i + window_size - 365]) < window_size:
                    continue
                X_time[count, :, -len(time_data_cols) :] = X[
                    i - 365 : i + window_size - 365
                ]
            if not fuse_past:
                y_past[count] = interpolate_nans(y[i : i + window_size])
            else:
                X_time[count, :, len(time_data_cols)] = interpolate_nans(
                    y[i : i + window_size]
                )
            if encode_season:
                enc_dates = [
                    date_encode(d) for f, d in fips_df.index[i : i + window_size].values
                ]
                d_sin, d_cos = [s for s, c in enc_dates], [c for s, c in enc_dates]
                X_time[count, :, len(time_data_cols) + (add_dim - 2)] = d_sin
                X_time[count, :, len(time_data_cols) + (add_dim - 2) + 1] = d_cos
            temp_y = y[i + window_size : i + window_size + target_size * 7]
            y_target[count] = np.array(temp_y[~np.isnan(temp_y)][:target_size])
            X_static[count] = X_s
            count += 1
    print(f"loaded {count} samples")
    results = [X_static[:count], X_time[:count], y_target[:count]]
    if not fuse_past:
        results.append(y_past[:count])
    if return_fips:
        results.append(X_fips_date)
    return results

In [12]:
scaler_dict = {}
scaler_dict_static = {}
scaler_dict_past = {}


def normalize(X_static, X_time, y_past=None, fit=False):
    for index in tqdm(range(X_time.shape[-1])):
        if fit:
            scaler_dict[index] = RobustScaler().fit(X_time[:, :, index].reshape(-1, 1))
        X_time[:, :, index] = (
            scaler_dict[index]
            .transform(X_time[:, :, index].reshape(-1, 1))
            .reshape(-1, X_time.shape[-2])
        )
    for index in tqdm(range(X_static.shape[-1])):
        if fit:
            scaler_dict_static[index] = RobustScaler().fit(
                X_static[:, index].reshape(-1, 1)
            )
        X_static[:, index] = (
            scaler_dict_static[index]
            .transform(X_static[:, index].reshape(-1, 1))
            .reshape(1, -1)
        )
    index = 0
    if y_past is not None:
        if fit:
            scaler_dict_past[index] = RobustScaler().fit(y_past.reshape(-1, 1))
        y_past[:, :] = (
            scaler_dict_past[index]
            .transform(y_past.reshape(-1, 1))
            .reshape(-1, y_past.shape[-1])
        )
        return X_static, X_time, y_past
    return X_static, X_time

In [13]:
X_tabular_train, X_time_train, y_target_train = loadXY("train")
print("train shape", X_time_train.shape)
X_tabular_validation, X_time_valid, y_target_valid, valid_fips = loadXY("validation", return_fips=True)
print("validation shape", X_time_valid.shape)
X_tabular_train, X_time_train = normalize(X_tabular_train, X_time_train, fit=True)
X_tabular_validation, X_time_valid = normalize(X_tabular_validation, X_time_valid)

100%|██████████| 3108/3108 [09:36<00:00,  5.40it/s]


loaded 103390 samples
train shape (103390, 180, 21)


100%|██████████| 3108/3108 [00:48<00:00, 63.55it/s]


loaded 8748 samples
validation shape (8748, 180, 21)


100%|██████████| 21/21 [00:21<00:00,  1.02s/it]
100%|██████████| 30/30 [00:00<00:00, 337.67it/s]
100%|██████████| 21/21 [00:00<00:00, 31.18it/s]
100%|██████████| 30/30 [00:00<00:00, 8448.88it/s]


In [24]:
X_tabular_test, X_time_test, y_target_test, test_fips = loadXY("test", return_fips=True)
print("test shape", X_time_test.shape)
X_tabular_test, X_time_test = normalize(X_tabular_test, X_time_test)

100%|██████████| 3108/3108 [00:46<00:00, 66.58it/s]


loaded 8768 samples
test shape (8768, 180, 21)


100%|██████████| 21/21 [00:00<00:00, 31.13it/s]
100%|██████████| 30/30 [00:00<00:00, 8606.64it/s]


In [28]:
# import pickle
# # export all
# np.save("../data/processed_no_cat_and_no_tensors/X_tabular_train.npy", X_tabular_train)
# np.save("../data/processed_no_cat_and_no_tensors/X_time_train.npy", X_time_train)
# np.save("../data/processed_no_cat_and_no_tensors/y_target_train.npy", y_target_train)
# np.save("../data/processed_no_cat_and_no_tensors/X_tabular_validation.npy", X_tabular_validation)
# np.save("../data/processed_no_cat_and_no_tensors/X_time_valid.npy", X_time_valid)
# np.save("../data/processed_no_cat_and_no_tensors/y_target_valid.npy", y_target_valid)
# np.save("../data/processed_no_cat_and_no_tensors/X_tabular_test.npy", X_tabular_test)
# np.save("../data/processed_no_cat_and_no_tensors/X_time_test.npy", X_time_test)
# np.save("../data/processed_no_cat_and_no_tensors/y_target_test.npy", y_target_test)
# with open(f"../data/processed_no_cat_and_no_tensors/valid_fips.pickle", "wb") as f:
#     pickle.dump(valid_fips, f)
# with open(f"../data/processed_no_cat_and_no_tensors/test_fips.pickle", "wb") as f:
#     pickle.dump(test_fips, f)


In [33]:
train_data = TensorDataset(
    torch.tensor(X_time_train),
    torch.tensor(X_tabular_train),
    torch.tensor(y_target_train[:, :output_weeks]),
)
train_loader = DataLoader(
    train_data, shuffle=True, batch_size=batch_size, drop_last=False
)
valid_data = TensorDataset(
    torch.tensor(X_time_valid),
    torch.tensor(X_tabular_validation),
    torch.tensor(y_target_valid[:, :output_weeks]),
)
valid_loader = DataLoader(
    valid_data, shuffle=False, batch_size=batch_size, drop_last=False
)

In [38]:
class HybridModel(nn.Module):
    def __init__(
        self,
        num_numerical_features,
        num_time_series_features,
        hidden_size,
        num_lstm_layers,
        num_fc_tabular_layers,
        num_fc_combined_layers,
        output_size,
        dropout,
        ablation_TS=False,
        ablation_tabular=False,
        ablation_attention=False,
    ):
        super(HybridModel, self).__init__()
        
        self.ablation_tabular = ablation_tabular
        self.ablation_TS = ablation_TS
        self.ablation_attention = ablation_attention

        if not self.ablation_tabular:
            # Static data branch
            tabular_fc_layers = []
            input_size = num_numerical_features
            for _ in range(num_fc_tabular_layers):
                tabular_fc_layers.append(nn.Linear(input_size, 128))
                tabular_fc_layers.append(nn.ReLU())
                input_size = 128
            self.tabular_fc_layers = nn.Sequential(
                *tabular_fc_layers, nn.Linear(128, 64), nn.ReLU()
            )

        if not self.ablation_TS:
            # TS branch
            self.lstm = nn.LSTM(
                input_size=num_time_series_features,
                hidden_size=hidden_size,
                num_layers=num_lstm_layers,
                batch_first=True,
            )

            # Atenttion
            self.attention = nn.Linear(hidden_size, 1)
            self.dropout = nn.Dropout(dropout)

        # Combined part
        self.fc_after_context = nn.Linear(hidden_size, 64)
        combined_fc_layers = []
        if not self.ablation_tabular and not self.ablation_TS:
            input_dim = 64 + 64  # Assuming 64 from tabular output and 64 from LSTM output after attention
        else:
            input_dim = 64
        for _ in range(num_fc_combined_layers):
            combined_fc_layers.append(nn.Linear(input_dim, 64))
            combined_fc_layers.append(nn.ReLU())
            input_dim = 64
        self.combined_fc_layers = nn.Sequential(
            *combined_fc_layers, nn.Linear(64, 32), nn.ReLU(), nn.Linear(32, output_size)
        )

    def forward(self, time_series_data, numerical_data):
        numerical_data = numerical_data.to(torch.float32)
        time_series_data = time_series_data.to(torch.float32)
        if not self.ablation_tabular:
            # Pass the tabular data through FC layers
            x1 = self.tabular_fc_layers(numerical_data)
        if not self.ablation_TS:
            # Pass the time series data through the LSTM
            lstm_out, (hn, cn) = self.lstm(time_series_data)
            # Pass the data through the attention mechanism
            if not self.ablation_attention:
                attention_weights = torch.softmax(self.attention(lstm_out), dim=1)
                context_vector = torch.sum(attention_weights * lstm_out, dim=1)
            else:
                context_vector = lstm_out[:, -1, :]  # Last time step output
            
            context_vector = self.bn_lstm_out(context_vector)
            droped_out = self.dropout(context_vector)
            x2 = torch.relu(self.fc_after_context(droped_out))

        # Concatenate the outputs from the tabular and the temporal data and pass it through FC layers
        if not self.ablation_tabular and not self.ablation_TS:
            x = torch.cat((x1, x2), dim=1)
        elif not self.ablation_tabular:
            x = x1
        else:
            x = x2

        x = self.combined_fc_layers(x)
        return x

In [None]:
is_cuda = torch.cuda.is_available()
if is_cuda:
    device = torch.device("cuda")
    print("using GPU")
else:
    device = torch.device("cpu")
    print("using CPU")

model = HybridModel(
    num_numerical_features=X_tabular_train.shape[-1],
    num_time_series_features=X_time_train.shape[-1],
    hidden_size=hidden_dim,
    num_lstm_layers=n_layers,
    num_fc_tabular_layers=ffnn_layers,
    num_fc_combined_layers=2,
    output_size=output_weeks,
    dropout=dropout,
)

model.to(device)
loss_function = nn.HubberLoss()

optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=lr, steps_per_epoch=len(train_loader), epochs=epochs)
counter = 0
valid_loss_min = np.inf
torch.manual_seed(42)
np.random.seed(42)

for i in range(epochs):
    for k, (inputs, static, labels) in tqdm(enumerate(train_loader),
                                            desc=f"epoch {i+1}/{epochs}",
                                            total=len(train_loader),):
        model.train()
        counter += 1
        inputs, labels, static = (
            inputs.to(device),
            labels.to(device),
            static.to(device),
        )
        model.zero_grad()
        output= model(inputs, static)
        loss = loss_function(output, labels.float())
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), 5)
        optimizer.step()
        scheduler.step()

        with torch.no_grad():
            if k == len(train_loader) - 1 or k == (len(train_loader) - 1) // 2:
                val_losses = []
                model.eval()
                labels = []
                preds = []
                raw_labels = []
                raw_preds = []
                for inp, stat, lab in valid_loader:
                    inp, lab, stat = inp.to(device), lab.to(device), stat.to(device)
                    out = model(inp, stat)
                    val_loss = loss_function(out, lab.float())
                    val_losses.append(val_loss.item())
                    for labs in lab:
                        labels.append([int(l.round()) for l in labs])
                        raw_labels.append([float(l) for l in labs])
                    for pred in out:
                        preds.append([int(p.round()) for p in pred])
                        raw_preds.append([float(p) for p in pred])
                # log data
                labels = np.array(labels)
                preds = np.clip(np.array(preds), 0, 5)
                raw_preds = np.array(raw_preds)
                raw_labels = np.array(raw_labels)
                for i in range(output_weeks):
                    log_dict = {
                        "loss": float(loss),
                        "epoch": counter / len(train_loader),
                        "step": counter,
                        "lr": optimizer.param_groups[0]["lr"],
                        "week": i + 1,
                    }
                    # w = f'week_{i+1}_'
                    w = ""
                    log_dict[f"{w}validation_loss"] = np.mean(val_losses)
                    log_dict[f"{w}macro_f1"] = f1_score(
                        labels[:, i], preds[:, i], average="macro"
                    )
                    log_dict[f"{w}micro_f1"] = f1_score(
                        labels[:, i], preds[:, i], average="micro"
                    )
                    log_dict[f"{w}mae"] = mean_absolute_error(
                        raw_labels[:, i], raw_preds[:, i]
                    )
                    print(log_dict)
                    writer.add_scalars("Loss(Hubber)", {'train': loss,
                                                     'validation': log_dict[f"{w}validation_loss"]},
                                                     counter)
                    writer.add_scalars("F1(MSE)", {'macro': log_dict[f"{w}macro_f1"],
                                                   'micro': log_dict[f"{w}micro_f1"]},
                                                   counter)
                    writer.add_scalar("MAE", log_dict[f"{w}mae"],
                                      counter)
                    writer.add_scalar("Learning-Rate", log_dict["lr"],
                                      counter)
                    for j, f1 in enumerate(
                        f1_score(labels[:, i], preds[:, i], average=None)
                    ):
                        log_dict[f"{w}{id2class[j]}_f1"] = f1
                    model.train()
                if np.mean(val_losses) <= valid_loss_min:
                    torch.save(model.state_dict(), "./state_dict.pt")
                    print(
                        "Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...".format(
                            valid_loss_min, np.mean(val_losses)
                        )
                    )
                    valid_loss_min = np.mean(val_losses)

using GPU


epoch 1/10:  50%|█████     | 404/808 [04:05<37:05,  5.51s/it]

{'loss': 1.536420226097107, 'epoch': 0.5, 'step': 404, 'lr': 7.305177512317032e-06, 'week': 1, 'validation_loss': np.float64(1.1974370693922907), 'macro_f1': np.float64(0.1329893360853113), 'micro_f1': np.float64(0.6628943758573388), 'mae': np.float64(0.5638914506719982)}
{'loss': 1.536420226097107, 'epoch': 0.5, 'step': 404, 'lr': 7.305177512317032e-06, 'week': 2, 'validation_loss': np.float64(1.1974370693922907), 'macro_f1': np.float64(0.1385221992759396), 'micro_f1': np.float64(0.6620941929583904), 'mae': np.float64(0.6818223081362413)}
{'loss': 1.536420226097107, 'epoch': 0.5, 'step': 404, 'lr': 7.305177512317032e-06, 'week': 3, 'validation_loss': np.float64(1.1974370693922907), 'macro_f1': np.float64(0.13290718720645175), 'micro_f1': np.float64(0.6631229995427527), 'mae': np.float64(0.7225723570303477)}
{'loss': 1.536420226097107, 'epoch': 0.5, 'step': 404, 'lr': 7.305177512317032e-06, 'week': 4, 'validation_loss': np.float64(1.1974370693922907), 'macro_f1': np.float64(0.133292172

epoch 1/10: 100%|██████████| 808/808 [08:10<00:00,  1.65it/s]


{'loss': 0.6369934678077698, 'epoch': 1.0, 'step': 808, 'lr': 1.9612577643465342e-05, 'week': 1, 'validation_loss': np.float64(0.6657752779082976), 'macro_f1': np.float64(0.28258583372298246), 'micro_f1': np.float64(0.6547782350251486), 'mae': np.float64(0.46834351842660693)}
{'loss': 0.6369934678077698, 'epoch': 1.0, 'step': 808, 'lr': 1.9612577643465342e-05, 'week': 2, 'validation_loss': np.float64(0.6657752779082976), 'macro_f1': np.float64(0.3265359312885872), 'micro_f1': np.float64(0.6490626428898034), 'mae': np.float64(0.5106135849832998)}
{'loss': 0.6369934678077698, 'epoch': 1.0, 'step': 808, 'lr': 1.9612577643465342e-05, 'week': 3, 'validation_loss': np.float64(0.6657752779082976), 'macro_f1': np.float64(0.18055588242180964), 'micro_f1': np.float64(0.655578417924097), 'mae': np.float64(0.5226629727440415)}
{'loss': 0.6369934678077698, 'epoch': 1.0, 'step': 808, 'lr': 1.9612577643465342e-05, 'week': 4, 'validation_loss': np.float64(0.6657752779082976), 'macro_f1': np.float64(0.

epoch 2/10:  50%|█████     | 404/808 [04:05<37:00,  5.50s/it]

{'loss': 0.5274914503097534, 'epoch': 1.5, 'step': 1212, 'lr': 3.6421782399043904e-05, 'week': 1, 'validation_loss': np.float64(0.39313294042063796), 'macro_f1': np.float64(0.4655164447654623), 'micro_f1': np.float64(0.7516003657978967), 'mae': np.float64(0.298836478499859)}
{'loss': 0.5274914503097534, 'epoch': 1.5, 'step': 1212, 'lr': 3.6421782399043904e-05, 'week': 2, 'validation_loss': np.float64(0.39313294042063796), 'macro_f1': np.float64(0.43168414647503855), 'micro_f1': np.float64(0.7349108367626886), 'mae': np.float64(0.3305892503354407)}
{'loss': 0.5274914503097534, 'epoch': 1.5, 'step': 1212, 'lr': 3.6421782399043904e-05, 'week': 3, 'validation_loss': np.float64(0.39313294042063796), 'macro_f1': np.float64(0.36330098222187496), 'micro_f1': np.float64(0.7136488340192044), 'mae': np.float64(0.3709554733992836)}
{'loss': 0.5274914503097534, 'epoch': 1.5, 'step': 1212, 'lr': 3.6421782399043904e-05, 'week': 4, 'validation_loss': np.float64(0.39313294042063796), 'macro_f1': np.flo

epoch 2/10: 100%|██████████| 808/808 [08:11<00:00,  1.65it/s]


{'loss': 0.21270079910755157, 'epoch': 2.0, 'step': 1616, 'lr': 5.322514587043574e-05, 'week': 1, 'validation_loss': np.float64(0.32792191515150276), 'macro_f1': np.float64(0.44880283518443553), 'micro_f1': np.float64(0.7769775948788294), 'mae': np.float64(0.2539081094172593)}
{'loss': 0.21270079910755157, 'epoch': 2.0, 'step': 1616, 'lr': 5.322514587043574e-05, 'week': 2, 'validation_loss': np.float64(0.32792191515150276), 'macro_f1': np.float64(0.40167446929096245), 'micro_f1': np.float64(0.7411979881115683), 'mae': np.float64(0.3064746330854442)}
{'loss': 0.21270079910755157, 'epoch': 2.0, 'step': 1616, 'lr': 5.322514587043574e-05, 'week': 3, 'validation_loss': np.float64(0.32792191515150276), 'macro_f1': np.float64(0.38031850581623594), 'micro_f1': np.float64(0.7288523090992227), 'mae': np.float64(0.3350773828053125)}
{'loss': 0.21270079910755157, 'epoch': 2.0, 'step': 1616, 'lr': 5.322514587043574e-05, 'week': 4, 'validation_loss': np.float64(0.32792191515150276), 'macro_f1': np.f

epoch 3/10:  50%|█████     | 404/808 [04:05<36:59,  5.49s/it]

{'loss': 0.35108041763305664, 'epoch': 2.5, 'step': 2020, 'lr': 6.551658857891442e-05, 'week': 1, 'validation_loss': np.float64(0.304158723656682), 'macro_f1': np.float64(0.43076240530156423), 'micro_f1': np.float64(0.785893918609968), 'mae': np.float64(0.27297030320464405)}
{'loss': 0.35108041763305664, 'epoch': 2.5, 'step': 2020, 'lr': 6.551658857891442e-05, 'week': 2, 'validation_loss': np.float64(0.304158723656682), 'macro_f1': np.float64(0.38381687959329785), 'micro_f1': np.float64(0.7409693644261546), 'mae': np.float64(0.33923720586156547)}
{'loss': 0.35108041763305664, 'epoch': 2.5, 'step': 2020, 'lr': 6.551658857891442e-05, 'week': 3, 'validation_loss': np.float64(0.304158723656682), 'macro_f1': np.float64(0.35157676597804155), 'micro_f1': np.float64(0.7240512117055327), 'mae': np.float64(0.3618634608527147)}
{'loss': 0.35108041763305664, 'epoch': 2.5, 'step': 2020, 'lr': 6.551658857891442e-05, 'week': 4, 'validation_loss': np.float64(0.304158723656682), 'macro_f1': np.float64(

epoch 3/10: 100%|██████████| 808/808 [08:11<00:00,  1.64it/s]


{'loss': 0.33217960596084595, 'epoch': 3.0, 'step': 2424, 'lr': 6.99999946009513e-05, 'week': 1, 'validation_loss': np.float64(0.5416933061636012), 'macro_f1': np.float64(0.16582348851081682), 'micro_f1': np.float64(0.6762688614540466), 'mae': np.float64(0.3860365805638084)}
{'loss': 0.33217960596084595, 'epoch': 3.0, 'step': 2424, 'lr': 6.99999946009513e-05, 'week': 2, 'validation_loss': np.float64(0.5416933061636012), 'macro_f1': np.float64(0.16307033035286073), 'micro_f1': np.float64(0.6723822588020119), 'mae': np.float64(0.40675130964608763)}
{'loss': 0.33217960596084595, 'epoch': 3.0, 'step': 2424, 'lr': 6.99999946009513e-05, 'week': 3, 'validation_loss': np.float64(0.5416933061636012), 'macro_f1': np.float64(0.16835420823931913), 'micro_f1': np.float64(0.6751257430269776), 'mae': np.float64(0.41673772781807356)}
{'loss': 0.33217960596084595, 'epoch': 3.0, 'step': 2424, 'lr': 6.99999946009513e-05, 'week': 4, 'validation_loss': np.float64(0.5416933061636012), 'macro_f1': np.float64

epoch 4/10:  50%|█████     | 404/808 [04:05<36:52,  5.48s/it]

{'loss': 0.2981545329093933, 'epoch': 3.5, 'step': 2828, 'lr': 6.911814926126814e-05, 'week': 1, 'validation_loss': np.float64(0.33610727843167126), 'macro_f1': np.float64(0.4968798269565699), 'micro_f1': np.float64(0.7323959762231367), 'mae': np.float64(0.3233684248063041)}
{'loss': 0.2981545329093933, 'epoch': 3.5, 'step': 2828, 'lr': 6.911814926126814e-05, 'week': 2, 'validation_loss': np.float64(0.33610727843167126), 'macro_f1': np.float64(0.4564662121974871), 'micro_f1': np.float64(0.6744398719707362), 'mae': np.float64(0.3824414005152019)}
{'loss': 0.2981545329093933, 'epoch': 3.5, 'step': 2828, 'lr': 6.911814926126814e-05, 'week': 3, 'validation_loss': np.float64(0.33610727843167126), 'macro_f1': np.float64(0.42905206293987525), 'micro_f1': np.float64(0.6620941929583904), 'mae': np.float64(0.41217887988791496)}
{'loss': 0.2981545329093933, 'epoch': 3.5, 'step': 2828, 'lr': 6.911814926126814e-05, 'week': 4, 'validation_loss': np.float64(0.33610727843167126), 'macro_f1': np.float6

epoch 4/10:  54%|█████▍    | 440/808 [04:26<03:42,  1.65it/s]


KeyboardInterrupt: 

In [30]:
def predict(x, static):
    out= model(torch.tensor(x), static)
    return out

In [31]:
dict_map = {
    "y_pred": [],
    "y_pred_rounded": [],
    # "fips": [],
    # "date": [],
    "y_true": [],
    "week": [],
}
i = 0
for x, static, y in tqdm(
    valid_loader, # ou test_loader
    desc="validation predictions...",):
    x, static, y = x.to(device), static.to(device), y.to(device)
    with torch.no_grad():
        pred = predict(x, static).clone().detach()
    for w in range(output_weeks):
        dict_map["y_pred"] += [float(p[w]) for p in pred]
        dict_map["y_pred_rounded"] += [int(p.round()[w]) for p in pred]
        # dict_map["fips"] += [f[1][0] for f in valid_fips[i : i + len(x)]]
        # dict_map["date"] += [f[1][1] for f in valid_fips[i : i + len(x)]]
        dict_map["y_true"] += [float(item[w]) for item in y]
        dict_map["week"] += [w] * len(x)
    i += len(x)
df = pd.DataFrame(dict_map)

  out= model(torch.tensor(x), static)
validation predictions...: 100%|██████████| 69/69 [00:11<00:00,  6.00it/s]


In [34]:
for w in range(6):
    wdf = df[df['week']==w]
    mae = mean_absolute_error(wdf['y_true'], wdf['y_pred']).round(3)
    f1 = f1_score(wdf['y_true'].round(),wdf['y_pred'].round(), average='macro').round(3)
    print(f"Week {w+1}", f"MAE {mae}", f"F1 {f1}")

Week 1 MAE 0.148 F1 0.787
Week 2 MAE 0.203 F1 0.715
Week 3 MAE 0.256 F1 0.663
Week 4 MAE 0.31 F1 0.591
Week 5 MAE 0.357 F1 0.545
Week 6 MAE 0.399 F1 0.499


: 

In [32]:
df

Unnamed: 0,y_pred,y_pred_rounded,y_true,week
0,-0.001810,0,0.0000,0
1,0.040762,0,0.0000,0
2,-0.007563,0,0.0000,0
3,0.031373,0,0.0000,0
4,0.878264,1,0.7767,0
...,...,...,...,...
52483,0.100800,0,0.9964,5
52484,1.101720,1,1.9211,5
52485,0.198933,0,0.0000,5
52486,0.209685,0,0.0000,5


In [None]:
# Error distribution
plt.figure(figsize=(10, 6))
plt.hist(abs(df['y_true'] - df['y_pred']), bins=40, alpha=0.7, label='error')

In [None]:
# predicted values vs residuals
plt.figure(figsize=(10, 6))
plt.scatter(df['y_pred'], df['y_true'] - df['y_pred'], alpha=0.4)
plt.xlabel("Predicted")
plt.ylabel("Residuals")

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(df['y_pred'], df['y_true'], alpha=0.4)
plt.xlabel("Predicted")
plt.ylabel("True")