<a href="https://colab.research.google.com/github/Krankile/npmf/blob/main/notebooks/training_loop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Setup

##Kernel setup

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%%capture
!pip install wandb
!git clone https://github.com/Krankile/npmf.git

In [3]:
!wandb login

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


##General setup

In [73]:
%%capture
!cd npmf && git pull

import math
import multiprocessing
import os
import pickle
from collections import Counter, defaultdict
from dataclasses import asdict, dataclass
from datetime import datetime, timedelta
from operator import itemgetter
from typing import Callable, List, Tuple

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from npmf.utils.colors import main, main2, main3
from npmf.utils.dataset import TimeDeltaDataset
from npmf.utils.dtypes import fundamental_types
from npmf.utils.eikon import column_mapping
from npmf.utils.tests import pickle_df
from npmf.utils.wandb import get_dataset, put_dataset
from numpy.ma.core import outerproduct
from pandas.tseries.offsets import BDay, Day
from sklearn.preprocessing import MinMaxScaler, minmax_scale
from torch import nn
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm, trange

import wandb as wb

In [5]:
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=[main, main2, main3, "black"])
mpl.rcParams['figure.figsize'] = (6, 4)  # (6, 4) is default and used in the paper

In [6]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


In [7]:
np.random.seed(69)

# Create a Neural network class

# Get some data

In [None]:
%%capture
stock_df = get_dataset("stock-oil-final:latest", project="master-test")
fundamentals_df = get_dataset("fundamentals-oil-final:latest", project="master-test")
meta_df = get_dataset("meta-oil-final:latest", project="master-test")
macro_df = get_dataset("macro-oil-final:latest", project="master-test")

stock_df = stock_df.drop_duplicates(subset=["ticker", "date"])

# Run the loop! (Like Odd-Geir Lademo)

![picture](https://drive.google.com/uc?id=1Y55gFQSi4Baovmi0kUQGhbgGOBTI03E7)


In [10]:
class MultivariateNetwork(nn.Module):
    def __init__(self, lag_len, meta_cont_len, meta_cat_len, macro_len, hidden_dim, out_len, **params):
        super().__init__()

        self.lags = nn.Sequential(
            nn.Linear(lag_len, hidden_dim),
            nn.ReLU(),
        )

        self.meta_cont = nn.Sequential(
            nn.Linear(meta_cont_len, hidden_dim),
            nn.ReLU(),
        )

        self.meta_cat = [nn.Embedding(l, hidden_dim) for l in meta_cat_len]

        self.macro = nn.Sequential(
            nn.Linear(macro_len, hidden_dim),
            nn.ReLU(),
        )

        self.predict = nn.Sequential(
            nn.Linear(3*hidden_dim + 9*hidden_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, out_len),
        )


    def forward(self, lags, meta_cont, meta_cat, macro):

        lags = self.lags(lags)
        meta_cont = self.meta_cont(meta_cont)
        meta_cat = torch.cat([emb(meta_cat[:, i]) for i, emb in enumerate(self.meta_cat)], dim=1)
        macro = self.macro(macro)

        x = torch.cat((lags, meta_cont, meta_cat, macro), dim=1)
        x = self.predict(x)

        return x

In [33]:
torch.Tensor([[1, 2, np.nan], [1, 2, np.nan], [1, 2, np.nan]]).isnan().sum(dim=1)

tensor([0, 0, 3])

In [37]:
def mape_loss(target, y_pred):
    return ((y_pred - target).abs() / (target.abs() + 1e-8)).nanmean(dim=1).mean(dim=0)

In [52]:
@dataclass
class RunParams:
    n_reports: int
    training_w: int
    forecast_w: int
    epochs: int
    loss_fn: Callable[[torch.Tensor, torch.Tensor], torch.Tensor]

    lag_len: int
    meta_cont_len: int
    meta_cat_len: List[int]
    macro_len: int
    out_len: int
    hidden_dim: int
    batch_size: int

In [205]:
def get_stocks_in_timeframe(
    stock_df, stock_dates, scale=True, remove_na=True
) -> pd.DataFrame:
    out = pd.DataFrame(
        data=0, columns=stock_dates, index=stock_df.ticker.unique(), dtype=np.float64
    )
    stock_df = stock_df.pivot(index="ticker", columns="date", values="market_cap")
    out = out.add(stock_df)

    # Remove tickers where data missing for ticker in the whole period
    out = out.dropna(axis=0, how="all")

    if remove_na:
        out: pd.DataFrame = out.ffill(axis=1).replace(np.nan, 0)

    # Perform MinMaxScaling on the full dataset
    if scale:
        out = pd.DataFrame(
            data=minmax_scale(out.values, axis=1),
            index=out.index,
            columns=out.columns,
        )
    return out


def get_historic_dates(current_time, trading_days):
    back_in_time_buffer = timedelta(trading_days + trading_days * 5)

    return pd.date_range(
        start=current_time - back_in_time_buffer, end=current_time, freq="B"
    )[-trading_days:]


def get_forecast_dates(
    current_time: np.datetime64, forecast_window: int
) -> pd.DatetimeIndex:
    forward_in_time_buffer = timedelta(forecast_window + forecast_window * 5)
    return pd.date_range(
        start=current_time + timedelta(1),
        end=current_time + forward_in_time_buffer,
        freq="B",
    )[:forecast_window]


def _get_last_market_cap(stock_df: pd.DataFrame) -> pd.Series:
    return (
        stock_df.dropna(subset=["market_cap"])
        .drop_duplicates(subset=["ticker"], keep="last")
        .set_index("ticker")
        .market_cap.squeeze()
        .astype(np.float64)
    )


def _minmax_scale_series(series: pd.Series) -> pd.Series:
    return pd.Series(
        minmax_scale(series.to_numpy().reshape((-1, 1))).squeeze(),
        index=series.index,
    )


def get_global_local_column(
    stock_df: pd.DataFrame,
) -> Tuple[pd.Series, pd.Series, pd.Series]:
    apple_market_cap = 2.687 * (10**12)  # ish as of may 2022 (USD)

    last_market_cap_col = _get_last_market_cap(stock_df)

    relative_to_global_market_column: pd.Series = last_market_cap_col / apple_market_cap
    relative_to_current_market_column = _minmax_scale_series(last_market_cap_col)

    return (
        relative_to_global_market_column,
        relative_to_current_market_column,
        last_market_cap_col,
    )


def create_fundamental_df(
    fundamentals,
    legal_fundamental_df,
    n_reports,
    relative_to_current_market_column,
    relative_to_global_market_column,
    last_market_cap_col,
):
    fund_columns = []
    for i in range(n_reports):
        fund_columns.extend(
            legal_fundamental_df.loc[0, "revenue":]
            .index.to_series()
            .map(lambda title: f"{title}_q=-{n_reports-i}")
        )
    columns = ["global_relative"] + ["peers_relative"] + fund_columns
    fundamental_df = pd.DataFrame(
        index=legal_fundamental_df.ticker.unique(), columns=columns
    )

    fundamental_df["peers_relative"] = relative_to_current_market_column.loc[
        fundamental_df.index
    ]
    fundamental_df["global_relative"] = relative_to_global_market_column.loc[
        fundamental_df.index
    ]

    fundamental_df.loc[:, f"revenue_q={-n_reports}":"net_income_p_q=-1"] = fundamentals
    for q in range(n_reports, 0, -1):
        fundamental_df.loc[:, f"revenue_q={-q}":f"fcf_q={-q}"] = fundamental_df.loc[
            :, f"revenue_q={-q}":f"fcf_q={-q}"
        ].div(last_market_cap_col, axis=0)
        fundamental_df.loc[
            :, f"total_assets_q={-q}":f"total_current_liabilities_q={-q}"
        ] = fundamental_df.loc[
            :, f"total_assets_q={-q}":f"total_current_liabilities_q={-q}"
        ].div(
            fundamental_df.loc[:, f"total_assets_q={-q}"], axis=0
        )
        fundamental_df = fundamental_df.drop(columns=f"total_assets_q={-q}")

    fundamental_df = fundamental_df.replace(np.nan, 0)

    return fundamental_df


def get_last_q_fundamentals(fundamental_df, q):
    fundamental_df = fundamental_df[~fundamental_df.date.isna()].astype(
        fundamental_types
    )
    tickers = fundamental_df.ticker.unique()

    fundamental_df["rank"] = (
        fundamental_df.groupby("ticker")
        .date.rank(method="first", ascending=False)
        .astype(int)
    )
    fundamental_df = fundamental_df.set_index(["ticker", "rank"])
    fundamental_df = fundamental_df[fundamental_df.index.get_level_values(1) <= 4].loc[
        :, "revenue":
    ]

    multidx = pd.MultiIndex.from_product(
        [tickers, range(q, 0, -1)], names=["ticker", "rank"]
    )
    funds = pd.DataFrame(
        data=0,
        index=multidx,
        columns=fundamental_df.loc[:, "revenue":].columns,
        dtype=fundamental_df.dtypes.values,
    )

    result = funds.add(fundamental_df).sort_index(ascending=[True, False])
    return result


def get_fundamentals(fundamental_df, stock_tickers, current_time, n_reports):
    # Only keep fundamentals for where we have stock data
    legal_fundamental_df = fundamental_df[
        (fundamental_df.announce_date < current_time)
        & (fundamental_df.ticker.isin(stock_tickers))
        & ~fundamental_df.date.isna()
    ]

    # Important dimensions
    n_companies_with_fundamentals = len(legal_fundamental_df.ticker.unique())
    m_fundamentals = legal_fundamental_df.loc[:, "revenue":].shape[1]

    # Get last q fundamentals and return NA rows if they are still missing
    fundamental_df_all_quarters = get_last_q_fundamentals(
        legal_fundamental_df, n_reports
    )
    fundamentals = fundamental_df_all_quarters.to_numpy().reshape(
        (n_companies_with_fundamentals, n_reports * m_fundamentals)
    )

    return fundamentals, legal_fundamental_df


def get_meta_df(meta_df: pd.DataFrame, stocks_and_fundamentals: pd.DataFrame):
    legal_meta_df: pd.DataFrame = meta_df.set_index("ticker")

    # Join meta and stock-fundamentals
    legal_meta_df = legal_meta_df.loc[stocks_and_fundamentals.index, :]
    legal_meta_df.loc[:, "exchange_code":"state_province_hq"] = legal_meta_df.loc[
        :, "exchange_code":"state_province_hq"
    ].astype("category")
    legal_meta_df.loc[:, "economic_sector":"activity"] = legal_meta_df.loc[
        :, "economic_sector":"activity"
    ].astype("category")

    meta_cont = legal_meta_df["founding_year"].astype(np.float64)

    meta_cont = meta_cont.replace(to_replace=np.nan, value=meta_cont.mean(skipna=True))
    meta_cont = (meta_cont / 2000).to_frame()

    cat_cols = legal_meta_df.select_dtypes("category").columns
    meta_cat = legal_meta_df[cat_cols].apply(lambda col: col.cat.codes) + 1

    return meta_cont, meta_cat


def normalize_macro(legal_macro_df, macro_df):
    df = legal_macro_df.copy()
    for column in [c for c in legal_macro_df.columns if ("_fx" not in c)]:
        df[column] = legal_macro_df[column] / (
            int(math.ceil(macro_df[column].max() / 100.0)) * 100
        )
    return df


def get_macro_df(
    macro_df: pd.DataFrame, historic_dates: pd.DatetimeIndex
) -> pd.DataFrame:
    macro_df = macro_df.set_index("date")

    legal_macro_df = macro_df.loc[macro_df.index.isin(historic_dates), :]

    full_macro_df = pd.DataFrame(
        data=legal_macro_df, index=historic_dates, columns=legal_macro_df.columns
    ).ffill(axis=0)
    full_macro_df = normalize_macro(full_macro_df, macro_df).replace(np.nan, 0)
    return full_macro_df


def get_forecast(
    stock_df: pd.DataFrame,
    stocks_and_fundamentals: pd.DataFrame,
    forecast_dates: pd.DatetimeIndex,
    last_market_cap_col: pd.Series,
):

    forecasts: pd.DataFrame = stock_df[stock_df.date.isin(forecast_dates)]

    forecasts_unnormalized = get_stocks_in_timeframe(
        forecasts,
        forecast_dates,
        scale=False,
        remove_na=False,
    )
    tickers = stocks_and_fundamentals.index.intersection(forecasts_unnormalized.index)
    forecasts_unnormalized = forecasts_unnormalized.loc[tickers, :]

    # TODO: Check if using the same MinMax-scaler as for training set is better
    forecasts_normalized = forecasts_unnormalized.div(last_market_cap_col.loc[tickers], axis=0)
 
    forecasts_normalized = forecasts_normalized.astype(np.float64)

    return forecasts_normalized


class TimeDeltaDataset(Dataset):
    def __init__(
        self,
        current_time: pd.Timestamp,
        training_window: int,
        forecast_window: int,
        n_reports: int,
        stock_df: pd.DataFrame,
        fundamental_df: pd.DataFrame,
        meta_df: pd.DataFrame,
        macro_df: pd.DataFrame,
    ):
        # Get the relevant dates for training and forecasting
        historic_dates = get_historic_dates(current_time, training_window)
        forecast_dates = get_forecast_dates(current_time, forecast_window)

        # Get stock df
        legal_stock_df = stock_df.copy().loc[stock_df.date.isin(historic_dates), :]
        formatted_stocks = get_stocks_in_timeframe(
            legal_stock_df, historic_dates, scale=True, remove_na=True
        )

        # Get relative size information
        (
            relative_to_global_market_column,
            relative_to_current_market_column,
            last_market_cap_col,
        ) = get_global_local_column(legal_stock_df)

        # Get fundamentals df
        stock_tickers: np.array = legal_stock_df.ticker.unique()
        fundamentals, legal_fundamental_df = get_fundamentals(
            fundamental_df, stock_tickers, current_time, n_reports
        )
        fundamental_df = create_fundamental_df(
            fundamentals,
            legal_fundamental_df,
            n_reports,
            relative_to_current_market_column,
            relative_to_global_market_column,
            last_market_cap_col,
        )

        # Combine stocks and fundamentals
        # TODO: Review the strategy for dealing with nan values
        stocks_and_fundamentals = formatted_stocks.join(fundamental_df).replace(
            np.nan, 0
        )

        # Get forecasts
        self.forecast = get_forecast(
            stock_df, stocks_and_fundamentals, forecast_dates, last_market_cap_col
        )
        self.stocks_and_fundamentals = stocks_and_fundamentals.loc[self.forecast.index, :]

        # Get meta df
        self.meta_cont, self.meta_cat = get_meta_df(
            meta_df, self.stocks_and_fundamentals
        )

        # Get macro df
        self.macro_df = get_macro_df(macro_df, historic_dates)

    def __len__(self):
        return self.stocks_and_fundamentals.shape[0]

    def __getitem__(self, idx):

        return (
            self.stocks_and_fundamentals.iloc[idx, :].to_numpy(),
            self.meta_cont.iloc[idx, :].to_numpy().astype(np.float64),
            self.meta_cat.iloc[idx, :].to_numpy(),
            self.macro_df.T.to_numpy().ravel(),
            self.forecast.iloc[idx, :].to_numpy(),
        )


In [208]:
def train(model, optimizer, data_train, data_val, device, params: RunParams, pbar):
    train_losses = []
    val_losses = []

    postfix = dict(train_loss=None, val_loss=None, epoch=0)
    for epoch in range(params.epochs):
        postfix = {**postfix, "epoch": epoch}
        pbar.set_postfix(postfix)
        for run_type, dataloader in {"train": data_train, "val": data_val}.items():
            model.train(run_type == "train")
            
            for stocks_and_fundamentals, meta_cont, meta_cat, macro, target in dataloader:
                
                stocks_and_fundamentals = stocks_and_fundamentals.to(torch.float).to(device)
                meta_cont = meta_cont.to(torch.float).to(device)
                meta_cat = meta_cat.to(torch.long).to(device)
                macro = macro.to(torch.float).to(device)
                target = target.to(torch.float).to(device)

                y_pred = model(stocks_and_fundamentals, meta_cont, meta_cat, macro)

                print("NaNs")
                print(stocks_and_fundamentals.isnan().sum(), meta_cont.isnan().sum(), meta_cat.isnan().sum(), macro.isnan().sum(), y_pred.isnan().sum(), target.isnan().sum())
                print(stocks_and_fundamentals.max(), meta_cont.max(), meta_cat.max(), macro.max(), target.max())

                loss = params.loss_fn(target, y_pred)

                print(target)
                print(y_pred)

                l = ((y_pred - target).abs() / (target.abs() + 1e-8)).nanmean(dim=1).mean()
                print("Loss", l)

                if run_type == "train":
                    train_losses.append(loss.item())
                    loss.backward()

                    optimizer.step()
                else:
                    val_losses.append(loss.item())
        postfix = {**postfix, "train_loss": np.mean(train_losses), "val_loss": np.mean(val_losses)}
        pbar.set_postfix(postfix)

    return train_losses, val_losses

In [209]:
def main():
    params = RunParams(
        n_reports=4,
        training_w=240,
        forecast_w=20,
        epochs=5,
        loss_fn=mape_loss,
        lag_len=302,
        meta_cont_len=1,
        meta_cat_len=np.array([89, 5, 70, 185, 1, 3, 5, 10, 44]) + 1, 
        macro_len=1920,
        out_len=20,
        hidden_dim=32,
        batch_size=64,
    )

    cpus = multiprocessing.cpu_count()
    cpus = 0

    model = MultivariateNetwork(**asdict(params))
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    date_range = pd.date_range(start="2000-12-31", end="2018-10-31", freq="M")
    n_ranges = len(date_range)
    periods = iter(date_range)
    tra = None
    period = next(periods)

    val = TimeDeltaDataset(period, params.training_w, params.forecast_w, params.n_reports, stock_df, fundamentals_df, meta_df, macro_df)

    outer = tqdm(periods, total=(n_ranges-1), desc=f"Period {period.date()}", leave=True, position=0)

    for period in outer:
        outer.set_description(f"Period {period.date()}")
        tra = val
        # TODO Refactor this class to only require the top-level params once
        val = TimeDeltaDataset(period, params.training_w, params.forecast_w, params.n_reports, stock_df, fundamentals_df, meta_df, macro_df)

        tra_loader = DataLoader(tra, params.batch_size, shuffle=True, drop_last=True, num_workers=cpus)
        val_loader = DataLoader(val, params.batch_size, shuffle=False, num_workers=cpus)

        train(model, optimizer, tra_loader, val_loader, device, params, pbar=outer)

main()

Period 2001-01-31:   0%|          | 0/214 [00:00<?, ?it/s, train_loss=None, val_loss=None, epoch=0]

NaNs
tensor(0) tensor(0) tensor(0) tensor(0) tensor(0) tensor(199)
tensor(1216.8677) tensor(1.0075) tensor(92) tensor(0.3349) tensor(nan)
tensor([[   nan,    nan, 1.0012,  ..., 1.0585, 1.0603, 1.0640],
        [   nan, 0.9757, 0.9914,  ..., 1.1402, 1.1416, 1.1617],
        [   nan, 1.0079, 0.9757,  ..., 0.9331, 0.9586, 0.9681],
        ...,
        [   nan, 1.0131, 1.0326,  ...,    nan,    nan, 1.5507],
        [   nan, 1.0046, 1.0001,  ..., 1.0899, 1.0935, 1.0956],
        [   nan, 0.9425, 0.9558,  ..., 0.9912, 1.0044, 1.0221]])
tensor([[ 0.1317,  0.0447, -0.0381,  ...,  0.2179, -0.1232,  0.0799],
        [ 0.0524, -0.0387, -0.1125,  ...,  0.3429, -0.1894,  0.1721],
        [ 0.2152,  0.1556, -0.0535,  ...,  0.2678, -0.1133,  0.0208],
        ...,
        [-0.0285, -0.0891, -0.0439,  ...,  0.2412, -0.0348,  0.0685],
        [-0.0556, -0.0858,  0.0381,  ...,  0.3119, -0.1704,  0.2227],
        [-0.0377, -0.0900, -0.0624,  ...,  0.3189, -0.1293,  0.1695]],
       grad_fn=<AddmmBackward0

Period 2001-01-31:   0%|          | 0/214 [00:01<?, ?it/s, train_loss=nan, val_loss=nan, epoch=1]

tensor(1280) tensor(54)
tensor(1172.5795) tensor(1.0070) tensor(87) tensor(0.3349) tensor(nan)
tensor([[1.0099, 1.0252, 1.0309,  ..., 1.0667, 1.0579, 1.0489],
        [1.0762, 1.0693, 1.1421,  ..., 1.1862, 1.1033, 1.0562],
        [0.9760, 0.9738, 0.9978,  ..., 0.9896, 1.0063, 0.9363],
        ...,
        [1.0031, 1.0018, 0.9941,  ..., 0.9803, 0.9793, 0.9754],
        [   nan, 1.0131, 0.9934,  ..., 1.0526, 1.0526, 0.9539],
        [0.9851, 0.9830, 0.9787,  ...,    nan,    nan, 0.9939]])
tensor([[nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        ...,
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan]], grad_fn=<AddmmBackward0>)
Loss tensor(nan, grad_fn=<MeanBackward0>)
NaNs
tensor(0) tensor(0) tensor(0) tensor(0) tensor(1280) tensor(52)
tensor(1437.2019) tensor(1.0095) tensor(91) tensor(0.3349) tensor(nan)
tensor([[

Period 2001-01-31:   0%|          | 0/214 [00:01<?, ?it/s, train_loss=nan, val_loss=nan, epoch=2]

tensor(0) tensor(1280) tensor(83)
tensor(37.0132) tensor(1.0065) tensor(90) tensor(0.3349) tensor(nan)
tensor([[   nan,    nan, 0.9667,  ..., 0.9327, 0.9333, 0.9333],
        [   nan,    nan, 0.9552,  ..., 0.8840, 0.8844, 0.8757],
        [1.0379, 1.0155, 1.0114,  ..., 0.9749, 1.0304, 0.9784],
        ...,
        [1.0303,    nan, 0.9392,  ..., 1.1642, 1.3372, 1.2459],
        [1.0092, 1.0044, 1.0074,  ..., 0.9880, 0.9856, 0.9869],
        [1.1703, 1.0853, 1.0769,  ..., 0.8986, 0.8977, 0.8942]])
tensor([[nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        ...,
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan]], grad_fn=<AddmmBackward0>)
Loss tensor(nan, grad_fn=<MeanBackward0>)
NaNs
tensor(0) tensor(0) tensor(0) tensor(0) tensor(1280) tensor(80)
tensor(4231.9893) tensor(1.0085) tensor(82) tensor(0.3349) tensor(nan)
t

Period 2001-01-31:   0%|          | 0/214 [00:02<?, ?it/s, train_loss=nan, val_loss=nan, epoch=3]

tensor([[nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        ...,
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan]], grad_fn=<AddmmBackward0>)
Loss tensor(nan, grad_fn=<MeanBackward0>)
NaNs
tensor(0) tensor(0) tensor(0) tensor(0) tensor(1280) tensor(52)
tensor(1437.2019) tensor(1.0095) tensor(91) tensor(0.3349) tensor(nan)
tensor([[1.0171, 1.0098, 0.9794,  ..., 0.8364, 0.8165, 0.8274],
        [0.9930, 0.9601, 0.9178,  ..., 0.6408, 0.6056, 0.5915],
        [1.0031, 1.0018, 0.9941,  ..., 1.1437, 0.9793, 1.6257],
        ...,
        [1.0366, 1.0018, 1.0272,  ..., 1.0195, 1.0250, 1.0535],
        [0.9879, 1.1032, 1.0805,  ..., 1.0364, 1.0358, 1.0128],
        [1.0031, 1.2801, 1.2702,  ..., 1.2526, 1.9043, 2.4386]])
tensor([[nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan

Period 2001-01-31:   0%|          | 0/214 [00:02<?, ?it/s, train_loss=nan, val_loss=nan, epoch=3]

NaNs
tensor(0) tensor(0) tensor(0) tensor(0) tensor(1280) tensor(83)
tensor(37.0132) tensor(1.0065) tensor(90) tensor(0.3349) tensor(nan)
tensor([[   nan,    nan, 0.9667,  ..., 0.9327, 0.9333, 0.9333],
        [   nan,    nan, 0.9552,  ..., 0.8840, 0.8844, 0.8757],
        [1.0379, 1.0155, 1.0114,  ..., 0.9749, 1.0304, 0.9784],
        ...,
        [1.0303,    nan, 0.9392,  ..., 1.1642, 1.3372, 1.2459],
        [1.0092, 1.0044, 1.0074,  ..., 0.9880, 0.9856, 0.9869],
        [1.1703, 1.0853, 1.0769,  ..., 0.8986, 0.8977, 0.8942]])
tensor([[nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        ...,
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan]], grad_fn=<AddmmBackward0>)
Loss tensor(nan, grad_fn=<MeanBackward0>)
NaNs
tensor(0) tensor(0) tensor(0) tensor(0) tensor(1280) tensor(80)
tensor(4231.9893) tensor(1.0085) tens




KeyboardInterrupt: ignored