In [3]:
!pip install einops
!pip install gpytorch

Collecting einops
  Downloading einops-0.6.1-py3-none-any.whl (42 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: einops
Successfully installed einops-0.6.1
Collecting gpytorch
  Downloading gpytorch-1.11-py3-none-any.whl (266 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m266.1/266.1 kB[0m [31m24.1 MB/s[0m eta [36m0:00:00[0m
Collecting linear-operator>=0.5.0 (from gpytorch)
  Downloading linear_operator-0.5.0-py3-none-any.whl (172 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m173.0/173.0 kB[0m [31m26.9 MB/s[0m eta [36m0:00:00[0m
Collecting jaxtyping>=0.2.9 (from linear-operator>=0.5.0->gpytorch)
  Downloading jaxtyping-0.2.20-py3-none-any.whl (24 kB)
Collecting typeguard~=2.13.3 (from linear-operator>=0.5.0->gpyto

In [4]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as opt
from torch.distributions.normal import Normal
import gpytorch as gpt
from einops import rearrange
import pandas as pd
import numpy as np
from scipy.stats import lognorm, norm
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, make_scorer, mean_absolute_percentage_error
from statsmodels.tsa.seasonal import seasonal_decompose
from copy import copy, deepcopy
import plotly
from plotly import tools
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from copy import copy, deepcopy
import dask
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
#torch.cuda.set_device(0)
#torch.backends.cudnn.benchmark = True

Mounted at /content/drive


In [5]:
def plot_gp(mu, lb, ub, test_x, test_y, train_x=None, train_y=None, name='', samples={},
            layout='v', xaxis_title='Time', yaxis_title='Sales', fig_size=[1000,500], w=3, f=10):
    fig = make_subplots(rows=1, cols=1, subplot_titles=("Samples"))
    samples = {'sample '+str(i): s for i, s in enumerate(samples)} if not isinstance(samples, dict) else samples
    if train_x is not None:
        fig.add_trace(go.Scatter(x=train_x, y=train_y, mode='lines', name='History', line=dict(width=w), line_color='#1a76ff'))  # plot training data

    fig.add_trace(
        go.Scatter(x=test_x, y=ub, fill=None, mode='lines', line_color='rgba(199, 19, 19, 0.3)',
                   fillcolor='rgba(249, 129, 37, 0.3)', showlegend=True, name='95% uncertainty interval'))
    fig.add_trace(
        go.Scatter(x=test_x, y=lb, fill='tonexty', mode='lines', line_color='rgba(199, 19, 19, 0.3)',
                   fillcolor='rgba(249, 129, 37, 0.3)', showlegend=True, name='95% uncertainty interval'))

    fig.add_trace(go.Scatter(x=test_x, y=mu, line=dict(color='#c71313', width=w), mode='lines', name='Skyolia Forecast'))  # plot the mean
    fig.add_trace(go.Scatter(x=test_x, y=test_y, line=dict(color='#1a76ff', width=w), mode='lines', name='Observed'))
    for k, v in samples.items():
        fig.add_trace(go.Scatter(x=test_x, y=v, name=k, mode='lines',
                                 line=dict(width=w)))  # plot samples
    fig.update_layout(title_text=name, paper_bgcolor='#343434', plot_bgcolor='#343434', xaxis_title=xaxis_title, yaxis_title=yaxis_title,
                          font=dict(family="Montserrat", color="#fff", size=f), title_x=0.5, hovermode="x")
    fig.update_xaxes(showgrid=True, showline=False, gridcolor='#c9c9c9', gridwidth=0.0005)
    fig.update_yaxes(showgrid=True, showline=False, gridcolor='#c9c9c9', gridwidth=0.0005)
    return fig

def confidence_interval(mu, cov):
    std = np.sqrt(np.diag(cov)) #compute std
    uncertainty = 1.96 * std
    return mu, std, mu-uncertainty, mu+uncertainty

def order_quantity(mu, std, cu, co):
    cf = cu/(cu+co)
    return scipy.stats.norm.ppf(cf, loc=mu, scale=std)

def plot_cov(covs, cols, subplot_titles, labels=None):
    fig = make_subplots(rows=int(len(covs)/cols) + 1, cols=cols, subplot_titles=subplot_titles)
    height = (1000/cols)*2
    for i, cov in enumerate(covs):
        row, col = int(i / cols)+1, (i%cols)+1
        fig.add_trace(go.Heatmap(z=cov, x=labels, y=labels, colorscale='Greys'), row=row, col=col)
    fig.update_layout(title_text='Cov matrix', height=height)#, yaxis1=dict(domain=[0, 1]), yaxis1=dict(domain=[0, 1])
    return fig

def plot_ts_decomposition(df, index, obs, model="additive", features=False, period=None, samples=None):
    df.index = df[index]
    decompose = df[[index, obs]]
    decompose.index = df[index]
    decompose = decompose[[obs]]

    decomposition = seasonal_decompose(decompose, model=model, period=period)
    trend, seasonal, residual = decomposition.trend, decomposition.seasonal, decomposition.resid
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=decompose.index, y=decompose.iloc[:,0], mode='lines', name='observed')) #plot the observed
    fig.add_trace(go.Scatter(x=decompose.index, y=trend.tolist(), mode='lines', name='trend')) #plot the trend
    fig.add_trace(go.Scatter(x=decompose.index, y=seasonal.tolist(), mode='lines', name='seasonal')) #plot the seasonal
    fig.add_trace(go.Scatter(x=decompose.index, y=residual.tolist(), mode='lines', name='residual')) #plot the residual
    if features:
        features = [col for col in list(df.columns) if col not in [index, obs]]
        for col in features:
            fig.add_trace(go.Scatter(x=decompose.index, y=df[col].values, name=col, mode='lines'))
    if samples is not None:
        for i, s in enumerate(samples):
            fig.add_trace(go.Scatter(x=decompose.index, y=s, name='sample '+str(i), mode='lines')) #plot samples
    fig.update_layout(title_text='Decomposition')
    return fig, trend.dropna().values, seasonal.dropna().values, residual.dropna().values

def plot_stl_decomposition(df, index, obs, model="additive", period=None, seasonal=7, samples=None):
    df.index = df[index]
    decompose = df[[index, obs]]
    decompose.index = df[index]
    decompose = decompose[[obs]]

    decomposition = STL(decompose, period=period, seasonal=seasonal).fit()
    trend, seasonal, residual = decomposition.trend, decomposition.seasonal, decomposition.resid
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=decompose.index, y=decompose.iloc[:,0], mode='lines', name='observed')) #plot the observed
    fig.add_trace(go.Scatter(x=decompose.index, y=trend.tolist(), mode='lines', name='trend')) #plot the trend
    fig.add_trace(go.Scatter(x=decompose.index, y=seasonal.tolist(), mode='lines', name='seasonal')) #plot the seasonal
    fig.add_trace(go.Scatter(x=decompose.index, y=residual.tolist(), mode='lines', name='residual')) #plot the residual
    if samples is not None:
        for i, s in enumerate(samples):
            fig.add_trace(go.Scatter(x=decompose.index, y=s, name='sample '+str(i), mode='lines')) #plot samples
    fig.update_layout(title_text='Decomposition')
    return fig, trend.dropna().values, seasonal.dropna().values, residual.dropna().values

def date_encoding(df, dt_column, daily=True, weekly=True, yearly=True):
    df[dt_column], periodic_column = df[dt_column].astype('datetime64[ns]'), []
    if daily:
        df['hourofday'] = df[dt_column].dt.hour
        df['sin_hourofday'] = np.sin(2*np.pi*df['hourofday']/np.max(df['hourofday']))
        df['cos_hourofday'] = np.cos(2*np.pi*df['hourofday']/np.max(df['hourofday']))
        df.drop(columns=['hourofday'], inplace=True), periodic_column.extend(['sin_hourofday', 'cos_hourofday'])

    if weekly:
        df['dayofweek'] = df[dt_column].dt.dayofweek
        df['sin_dayofweek'] = np.sin(2*np.pi*df['dayofweek']/np.max(df['dayofweek']))
        df['cos_dayofweek'] = np.cos(2*np.pi*df['dayofweek']/np.max(df['dayofweek']))
        df.drop(columns=['dayofweek'], inplace=True), periodic_column.extend(['sin_dayofweek', 'cos_dayofweek'])
    if yearly:
        df['dayofyear'] = df[dt_column].dt.dayofyear
        df['sin_dayofyear'] = np.sin(2*np.pi*df['dayofyear']/np.max(df['dayofyear']))
        df['cos_dayofyear'] = np.cos(2*np.pi*df['dayofyear']/np.max(df['dayofyear']))
        df.drop(columns=['dayofyear'], inplace=True), periodic_column.extend(['sin_dayofyear', 'cos_dayofyear'])
    return df, periodic_column


def categorical_encoding(train, valid, test, categorical):
    new_cat, LEncoders = [], {}
    for cat in categorical:
        LE = LabelEncoder()
        train[cat] = LE.fit_transform(train[cat]).astype(int)
        valid[cat] = LE.transform(valid[cat]).astype(int)
        test[cat] = LE.transform(test[cat]).astype(int)
        LEncoders[cat] = LE
    return train, valid, test, LEncoders


def numerical_scaling(train, valid, test, numerical):
    MS = MinMaxScaler(feature_range=(-1, 1))
    scaled_train = MS.fit_transform(train[numerical])
    scaled_valid = MS.transform(valid[numerical])
    scaled_test = MS.transform(test[numerical])
    train[numerical] = scaled_train
    valid[numerical] = scaled_valid
    test[numerical] = scaled_test
    return train, valid, test, MS


def dkl_ts_pipeline(train, valid, test, categorical, numerical, periodic, eps=0):
    _train, _valid, _test = train.copy(), valid.copy(), test.copy()
    _train, _valid, _test, LEncoders = categorical_encoding(_train, _valid, _test, categorical)  # CATEGORICAL ENCODING
    if len(numerical) > 0:
        _train, _valid, _test, MS = numerical_scaling(_train, _valid, _test, numerical)  # NUMERCIAL SCALING
    features = categorical + numerical + periodic
    X_train, T_train = _train[features], _train[time_column]
    X_valid, T_valid = _valid[features], _valid[time_column]
    X_test, T_test = _test[features], _test[time_column]
    #Y_train, Y_valid, Y_test, YScaler = output_scaling(train, valid, test, out_column)
    Y_train, Y_valid, Y_test = np.log(_train[out_column].values+eps), np.log(_valid[out_column].values+eps), _test[out_column].values+eps
    return X_train, T_train, Y_train, X_valid, T_valid, Y_valid, X_test, T_test, Y_test


def shift_df(df, shift, dropna=True):
    origin = df.copy()
    for i in range(1, shift+1):
        shifted_df = origin.shift(i)
        shifted_df = shifted_df.rename(columns=dict(zip(shifted_df.columns, [str(c)+'_'+str(i) for c in shifted_df.columns])))
        df = pd.concat([shifted_df, df], axis=1)
    return df.dropna() if dropna else df

def mixture_quantile(pi, mu, std, q):
    pi, mu, std = torch.from_numpy(pi), torch.from_numpy(mu), torch.from_numpy(std)
    n, solutions, evaluations = pi.shape[0], [], []
    for i in range(n):
        mix = Categorical(pi[i])
        comp = LogNormal(mu[i], std[i], validate_args=None)
        pdf = MixtureSameFamily(mix, comp)
        def objf(x):
            x = torch.from_numpy(x)
            return torch.abs(pdf.cdf(x) - q).data.numpy()
        def bounds():
            res = [LogNormal(m, s, validate_args=None).icdf(torch.tensor(q)).data.numpy() for m, s in zip(mu[i], std[0])]
            return np.min(res), np.max(res)

        lb, ub = bounds()
        result = dual_annealing(objf, list(zip([lb], [ub])),maxiter=1000)
        solution = result['x']
        evaluation = objf(solution)
        solutions.append(solution), evaluations.append(evaluation)
    return np.asarray(solutions).ravel(), np.asarray(evaluations)

In [6]:
df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/time series/rossmann sales/sub_rossmann.csv')
df

Unnamed: 0,Store,Date,Sales,Customers,Open,Promo,StateHoliday,SchoolHoliday,StoreType,Assortment,CompetitionDistance,Promo2,Competition
0,45,2015-07-31,6301,442,1,1,0,1,d,a,9710.0,0,1.0
1,45,2015-07-30,6063,445,1,1,0,1,d,a,9710.0,0,1.0
2,45,2015-07-29,5341,387,1,1,0,1,d,a,9710.0,0,1.0
3,45,2015-07-28,5504,365,1,1,0,1,d,a,9710.0,0,1.0
4,45,2015-07-27,7450,500,1,1,0,1,d,a,9710.0,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4705,1045,2013-01-05,4854,622,1,0,0,0,a,c,26990.0,0,0.0
4706,1045,2013-01-04,6351,769,1,0,0,1,a,c,26990.0,0,0.0
4707,1045,2013-01-03,7582,923,1,0,0,1,a,c,26990.0,0,0.0
4708,1045,2013-01-02,8282,975,1,0,0,1,a,c,26990.0,0,0.0


In [7]:
out_column, time_column = 'Sales', 'Date'
df[time_column] = pd.to_datetime(df[time_column])

to_remove = ['Customers'] + [c for c in df.columns if df[c].nunique()==1]
features = [c for c in df.columns if (c not in [out_column]) and (c not in to_remove)]
categorical = [c for c in features if (df[c].dtype=='object') and (df[c].nunique() >= 2)] + ['Store']
numerical = [c for c in features if (df[c].dtype=='float') or (df[c].dtype=='int') and (c not in categorical)]

fig = make_subplots(rows=1, cols=2, horizontal_spacing = 0.03, subplot_titles=('Label Distribution', "Features Correlation"))
fig.append_trace(go.Histogram(x=df[out_column]), row=1, col=1)
fig.append_trace(go.Heatmap(z=df[numerical+[out_column]].corr(),x=numerical+[out_column],y=numerical+[out_column]), row=1, col=2)
fig.show()

In [None]:
sub = df.loc[(df['Store'] == 1045)]
fig, trend, seasonal, residual = plot_ts_decomposition(sub, time_column, out_column, features=True, period=7)
fig.show()
print(np.mean(trend), np.var(trend), np.std(trend))
print(np.mean(seasonal), np.var(seasonal), np.std(seasonal))
print(np.mean(residual), np.var(residual), np.std(residual))

7056.175518925518 2127566.948610514 1458.618164089051
3.6627465284194023 4387582.072148839 2094.6555975025676
-0.20498168967954772 2400761.7794677755 1549.4391822423283


In [8]:
df, periodic = date_encoding(df, time_column, daily=False, weekly=True, yearly=True)
train = df[df[time_column] < '2015-06-01']
valid = df[(df[time_column] >= '2015-06-01') & (df[time_column] < '2015-06-14')]
test = df[df[time_column]>='2015-06-14']

X_train, T_train, Y_train, X_valid, T_valid, Y_valid, X_test, T_test, Y_test = dkl_ts_pipeline(train, valid, test, categorical, numerical, periodic, eps=1e-5)
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((4405, 14), (240, 14), (4405,), (240,))

In [None]:
X_train

Unnamed: 0,StateHoliday,StoreType,Assortment,Store,Open,Promo,SchoolHoliday,CompetitionDistance,Promo2,Competition,sin_dayofweek,cos_dayofweek,sin_dayofyear,cos_dayofyear
61,0,2,0,0,-1.0,-1.0,-1.0,-0.511811,-1.0,1.0,-2.449294e-16,1.0,0.516062,-0.856551
62,0,2,0,0,1.0,-1.0,-1.0,-0.511811,-1.0,1.0,-8.660254e-01,0.5,0.530730,-0.847541
63,0,2,0,0,1.0,-1.0,-1.0,-0.511811,-1.0,1.0,-8.660254e-01,-0.5,0.545240,-0.838280
64,0,2,0,0,1.0,-1.0,-1.0,-0.511811,-1.0,1.0,1.224647e-16,-1.0,0.559589,-0.828770
65,0,2,0,0,1.0,-1.0,-1.0,-0.511811,-1.0,1.0,8.660254e-01,-0.5,0.573772,-0.819015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4705,0,0,1,4,1.0,-1.0,-1.0,1.000000,-1.0,-1.0,-8.660254e-01,0.5,0.085965,0.996298
4706,0,0,1,4,1.0,-1.0,1.0,1.000000,-1.0,-1.0,-8.660254e-01,-0.5,0.068802,0.997630
4707,0,0,1,4,1.0,-1.0,1.0,1.000000,-1.0,-1.0,1.224647e-16,-1.0,0.051620,0.998667
4708,0,0,1,4,1.0,-1.0,1.0,1.000000,-1.0,-1.0,8.660254e-01,-0.5,0.034422,0.999407


In [17]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

from torch import autograd

def sinc(x):
    x = x * np.pi
    return torch.where(x != 0., torch.sin(x) / x, 1.)


class RegrDataLoader(Dataset):
    def __init__(self, X, Y, numerical_col, categorical_col):
        self.X1, self.X2, self.Y = X[numerical_col].values.astype(np.float32), X[categorical_col].values, Y.astype(np.float32)

    def __len__(self):
        return len(self.Y)

    def __getitem__(self, idx):
        return self.X1[idx], self.X2[idx], self.Y[idx]

class Embedder(nn.Module):
    def __init__(self, vocab_size, dim):
        super().__init__()
        self.embeddings = nn.Embedding(vocab_size, dim)

    def forward(self, x):
        return self.embeddings(x)

class Embedder(nn.Module):
    def __init__(self, embedding_sizes):
        super().__init__()
        self.embeddings = nn.ModuleList([nn.Embedding(categories, size) for categories,size in embedding_sizes])

    def forward(self, x):
        x = [e(x[:,i]) for i,e in enumerate(self.embeddings)]
        return torch.cat(x, 1)

class NNModel(nn.Module):
    def __init__(self, input_shape, units=None, factors=None, activ=True, norm=False, dropout=False, slops=None):
        super().__init__()
        self.input_shape = input_shape
        self.units = units
        self.factors = factors
        self.activ, self.norm = activ, norm
        self.network = nn.ModuleList()
        if self.factors:
            self.units = np.round(self.input_shape * np.asarray(self.factors)).astype(int)
        if self.units is not None:
            self.dropout = np.zeros_like(self.units) if not dropout else dropout
            self.slops = np.full(len(self.units), 1) if slops is None else slops
            for i, j, k in zip(self.units, self.dropout, self.slops):
                if i >= 1:
                    block = self.__build_block__(input_shape, i, p=j, slop=k)
                    self.network.extend(block)
                    input_shape = i
        self.output_shape = input_shape
        self.reset_parameters()

    def __build_block__(self, input_shape, units, p, slop):
        block = []
        block.append(nn.Linear(input_shape, units, bias=not self.norm))
        if self.norm:
            #block.append(nn.BatchNorm1d(units))
            block.append(nn.LayerNorm(units, eps=1e-5))
        if self.activ:
            #block.append(nn.LeakyReLU())
            block.append(nn.ELU(slop))
            #block.append(nn.GELU())
        if p > 0:
            block.append(nn.Dropout(p))
        return block

    def forward(self, x):
        for layer in self.network:
          tmp = layer(x)
          x = tmp
        return x

    def reset_parameters(self):
        for layer in self.network:
            if isinstance(layer, nn.Linear):
                nn.init.xavier_normal_(layer.weight)
                if not self.norm:
                    layer.bias.data.fill_(0.1)


class TabMLP(nn.Module):
    def __init__(self, vocab_size, embed_size, numerical, mlp_units, mlp_dropout=0.00001):
        """
        categories: tuple containing the number of unique values within each category
        """
        super().__init__()
        embedding_sizes = list(zip(vocab_size, embed_size))
        self.cat_embedder = Embedder(embedding_sizes)
        self.norm = nn.BatchNorm1d(numerical, eps=1e-5)
        input_size = numerical + sum(embed_size)
        self.mlp = NNModel(int(input_size), units=mlp_units, factors=None, dropout=[mlp_dropout]*len(mlp_units))
        self.output_shape = mlp_units[-1]

    def forward(self, x_cont, x_cat):
        x = x_cont
        if x_cat.nelement() != 0: #skipped if there's no categorical feature
            x_ = self.cat_embedder(x_cat)
            x = self.norm(x)
            x = torch.cat([x_, x], 1)
        return self.mlp(x)

class DKLModel(gpt.models.ExactGP):
    def __init__(self, feature_extractor, ll_units, covar_module, train_x, train_y, likelihood):
        '''train_x = torch.from_numpy(train_x.values.astype(np.float32)) if train_x is not None else None
        train_y = torch.from_numpy(train_y.astype(np.float32)) if train_y is not None else None'''
        super(DKLModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpt.means.ConstantMean()
        self.covar_module = covar_module
        self.feature_extractor = feature_extractor
        self.last_layer = nn.Sequential(nn.Linear(feature_extractor.output_shape, ll_units))

    def forward(self, x_cont, x_cat):
        x = self.feature_extractor(x_cont, x_cat)
        x = self.last_layer(x)
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpt.distributions.MultivariateNormal(mean_x, covar_x)

class DKL:
    def __init__(self, model, likelihood, numerical_col, categorical_col):
        self.model = model.to(device)
        self.numerical_col, self.categorical_col = numerical_col, categorical_col
        self.losses = {'Epoch': [], 'Train': [], 'Test': [], 'State': [], 'LR': []}
        self.likelihood = likelihood.to(device)
        self.mll = gpt.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model)

    def train_model(self, optim, train_load, grad_clip):
          total_loss = 0
          self.model.train()
          self.likelihood.train()
          for i, (X1, X2, Y) in enumerate(train_load):
              X1, X2, Y = X1.to(device), X2.to(device), Y.to(device)
              self.optim.zero_grad()
              pred = self.model(X1, X2)
              loss = -self.mll(pred, Y)
              loss.backward()
              torch.nn.utils.clip_grad_norm_(self.model.parameters(), grad_clip)
              self.optim.step()
              total_loss += loss.item()
          return total_loss/(i+1)

    def eval_model(self, test_load):
        self.model.eval()
        self.likelihood.eval()
        total_loss = 0
        with torch.no_grad(), gpt.settings.use_toeplitz(False), gpt.settings.fast_pred_var():
            for i, (X1, X2, Y) in enumerate(test_load):
                X1, X2, Y = X1.to(device), X2.to(device), Y.to(device)
                pred = self.model(X1, X2)
                loss = -self.mll(pred, Y)
                total_loss += loss.item()
        return total_loss/(i+1)

    def fit(self, X_train, Y_train, epoch, lr, opt_kwarg, batch_size=None,  grad_clip=100, momentum=0.9, X_test=None, Y_test=None, l2_reg=0, eval=True, verbose=True, save=True):
        batch_size = len(X_train) if batch_size is None else batch_size
        train_load = DataLoader(RegrDataLoader(X_train, Y_train, self.numerical_col, self.categorical_col), batch_size=batch_size, shuffle=False)  # DATALOADER obj
        if X_test is not None:
            test_load = DataLoader(RegrDataLoader(X_test, Y_test, self.numerical_col, self.categorical_col), batch_size=batch_size, shuffle=False)  # DATALOADER obj

        best_loss = 1e100
        self.optim = opt.SGD([{'params': model.feature_extractor.parameters()},
                         {'params': model.covar_module.parameters()},
                         {'params': model.mean_module.parameters()},
                         {'params': model.likelihood.parameters()}], lr=lr, momentum=momentum, nesterov=True)

        '''self.optim = opt.Adam([{'params': model.feature_extractor.parameters()},
                     {'params': model.covar_module.parameters()},
                     {'params': model.mean_module.parameters()},
                     {'params': model.likelihood.parameters()}], lr=lr)'''

        scheduler = None
        scheduler = opt.lr_scheduler.CyclicLR(self.optim, **opt_kwarg)
        #scheduler = opt.lr_scheduler.ReduceLROnPlateau(self.optim, **opt_kwarg)

        for i in range(epoch):
            if verbose:
                print('##### EPOCH ' + str(i) + ' #####')

            try:
                train_loss = self.train_model(self.optim, train_load, grad_clip)
            except gpt.utils.errors.NotPSDError:
                print('EXCEPTION')
                continue

            if verbose:
                print('train loss : ', train_loss)
            self.losses['Epoch'].append(i), self.losses['Train'].append(train_loss)

            if X_test is not None:
                try:
                    valid_loss = self.eval_model(test_load)
                except gpt.utils.errors.NotPSDError:
                    print('EXCEPTION')
                    continue

                if verbose:
                    print('test loss : ', valid_loss)
                self.losses['Test'].append(valid_loss)

                if scheduler is not None:
                    '''scheduler.step(valid_loss)
                    self.losses['LR'].append(self.optim.param_groups[0]['lr'])'''
                    scheduler.step()
                    self.losses['LR'].append(scheduler.get_last_lr()[0])

                if valid_loss < best_loss:
                    self.losses['BState'] = deepcopy(self.model.state_dict())
                    best_loss = valid_loss
                    print('===========SAVE===========')

    def predict(self, X):
        self.model.eval()
        self.likelihood.eval()
        X1 = torch.tensor(X[self.numerical_col].values.astype(np.float32)).to(device)
        X2 = torch.tensor(X[self.categorical_col].values).to(device)
        with torch.no_grad(), gpt.settings.use_toeplitz(False), gpt.settings.fast_pred_var():
            preds = self.likelihood(self.model(X1, X2))
            #-self.mll(preds, torch.from_numpy(y_scaler.transform(test_y[:,np.newaxis].astype(np.float32)).flatten())), preds.covariance_matrix.shape
        mu_test = preds.mean
        std = torch.sqrt(preds.variance)
        cov_test = preds.covariance_matrix
        return mu_test, std, cov_test


def create_kernel(param):
    #kernel = gpt.kernels.ScaleKernel(gpt.kernels.RBFKernel(ard_num_dims=param))
    #kernel = gpt.kernels.ScaleKernel(gpt.kernels.RQKernel(ard_num_dims=param))
    kernel = gpt.kernels.ScaleKernel(gpt.kernels.PiecewisePolynomialKernel(q=0, ard_num_dims=param))
    #kernel = gpt.kernels.ScaleKernel(gpt.kernels.SpectralMixtureKernel(num_mixtures=2, ard_num_dims=param))
    #kernel = gpt.kernels.ScaleKernel(gpt.kernels.PolynomialKernel(power=3))
    return kernel

def gradient_clipper(model: nn.Module, val: float) -> nn.Module:
    def process_grad(grad):
        grad[grad != grad] = 1e-10
        return torch.clamp(grad, -val, val)
    for parameter in model.parameters():
        parameter.register_hook(lambda grad: process_grad(grad))
    return model

In [18]:
ll_units, epoch, lr, batch_size, mlp_d = 16, 50000, 1e-5, None, 1e-6
cyclic_kwarg = {'base_lr': lr, 'max_lr': 1e-2, 'step_size_up':250, 'step_size_down':250}
plateau_kwarg = {'factor':0.5, 'patience':200, 'verbose':True, 'min_lr':1e-7, 'mode':'min'}

embed_size = np.ceil(np.sqrt(list(X_train[categorical].nunique()))).astype(int)
feature_extractor = TabMLP(df[categorical].nunique(), embed_size, len(numerical+periodic), [128, 512, 768, 256], mlp_dropout=mlp_d)
covar_module = create_kernel(ll_units)

likelihood = gpt.likelihoods.GaussianLikelihood()
numerical_torch = torch.from_numpy(X_train[numerical+periodic].values.astype(np.float32))
categorical_torch = torch.from_numpy(X_train[categorical].values.astype(int))
model = gradient_clipper(DKLModel(feature_extractor, ll_units, covar_module, (numerical_torch, categorical_torch), torch.from_numpy(Y_train.astype(np.float32)), likelihood), 10)
dkl = DKL(model, likelihood, numerical+periodic, categorical)
#model.load_state_dict(best_state)
print(model)
print(sum(p.numel() for p in dkl.model.parameters() if p.requires_grad))
dkl.fit(X_train, Y_train, epoch, lr, cyclic_kwarg, batch_size=batch_size, grad_clip=10, momentum=0.9, X_test=X_valid, Y_test=Y_valid, l2_reg=0, eval=True, verbose=True)

DKLModel(
  (likelihood): GaussianLikelihood(
    (noise_covar): HomoskedasticNoise(
      (raw_noise_constraint): GreaterThan(1.000E-04)
    )
  )
  (mean_module): ConstantMean()
  (covar_module): ScaleKernel(
    (base_kernel): PiecewisePolynomialKernel(
      (raw_lengthscale_constraint): Positive()
    )
    (raw_outputscale_constraint): Positive()
  )
  (feature_extractor): TabMLP(
    (cat_embedder): Embedder(
      (embeddings): ModuleList(
        (0): Embedding(4, 2)
        (1): Embedding(3, 2)
        (2): Embedding(2, 2)
        (3): Embedding(5, 3)
      )
    )
    (norm): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (mlp): NNModel(
      (network): ModuleList(
        (0): Linear(in_features=19, out_features=128, bias=True)
        (1): ELU(alpha=1)
        (2): Dropout(p=1e-06, inplace=False)
        (3): Linear(in_features=128, out_features=512, bias=True)
        (4): ELU(alpha=1)
        (5): Dropout(p=1e-06, inplace=False)
    


A not p.d., added jitter of 1.0e-06 to the diagonal


A not p.d., added jitter of 1.0e-05 to the diagonal


A not p.d., added jitter of 1.0e-04 to the diagonal



EXCEPTION
##### EPOCH 1466 #####
train loss :  -0.25647711753845215
test loss :  0.058395855128765106
##### EPOCH 1467 #####
train loss :  -0.2520563304424286
test loss :  0.008254652842879295
##### EPOCH 1468 #####
train loss :  -0.3225701153278351
test loss :  0.008810014463961124
##### EPOCH 1469 #####
train loss :  -0.31648972630500793
test loss :  -0.07003302872180939
##### EPOCH 1470 #####
train loss :  -0.24984747171401978
test loss :  -0.0050080521032214165
##### EPOCH 1471 #####
train loss :  -0.3910536468029022
test loss :  -0.08272293955087662
##### EPOCH 1472 #####
train loss :  -0.44096916913986206
test loss :  0.023932941257953644
##### EPOCH 1473 #####
train loss :  -0.37627318501472473
test loss :  -0.03470071777701378
##### EPOCH 1474 #####
train loss :  -0.33753159642219543
test loss :  0.13212914764881134
##### EPOCH 1475 #####
train loss :  -0.473258912563324
test loss :  -0.05661937966942787
##### EPOCH 1476 #####
train loss :  -0.4568047821521759
test loss :  0.00


CG terminated in 1000 iterations with average residual norm 739.7330322265625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  837100608.0
##### EPOCH 1870 #####
train loss :  -0.5219957232475281
test loss :  0.6955903172492981
##### EPOCH 1871 #####
train loss :  -0.4423312544822693
test loss :  0.2790466248989105
##### EPOCH 1872 #####
train loss :  -0.21670453250408173
test loss :  0.19046659767627716
##### EPOCH 1873 #####
train loss :  -0.30382323265075684
test loss :  0.21547546982765198
##### EPOCH 1874 #####
train loss :  -0.19106151163578033
test loss :  0.26218003034591675
##### EPOCH 1875 #####
train loss :  -0.2799169421195984
test loss :  0.3936448395252228
##### EPOCH 1876 #####
train loss :  -0.4995112717151642
test loss :  0.6135106086730957
##### EPOCH 1877 #####
train loss :  -0.23984363675117493
test loss :  0.37087753415107727
##### EPOCH 1878 #####
train loss :  -0.11173991858959198
test loss :  0.7681018710136414
##### EPOCH 1879 #####
train loss :  -0.18831327557563782
test loss :  0.6167978048324585
##### EPOCH 1880 #####
train loss :  -0.25613394379615784
test loss :  1.83


CG terminated in 1000 iterations with average residual norm 216.99169921875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  48265468.0
##### EPOCH 1913 #####
train loss :  -0.3947789967060089
test loss :  2.1939895153045654
##### EPOCH 1914 #####
train loss :  -0.3003493845462799
test loss :  1.888336181640625
##### EPOCH 1915 #####
train loss :  -0.3302593529224396
test loss :  0.7151656150817871
##### EPOCH 1916 #####
train loss :  -0.6287643313407898
test loss :  2.2285563945770264
##### EPOCH 1917 #####
train loss :  -0.33821308612823486
EXCEPTION
##### EPOCH 1918 #####
train loss :  -0.3511289656162262



CG terminated in 1000 iterations with average residual norm 486.8565368652344 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  171957408.0
##### EPOCH 1919 #####
train loss :  -0.32098865509033203
test loss :  0.8872455358505249
##### EPOCH 1920 #####
train loss :  -0.20268139243125916
test loss :  1.3456357717514038
##### EPOCH 1921 #####
train loss :  -0.24841676652431488



CG terminated in 1000 iterations with average residual norm 829.847412109375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  737375616.0
##### EPOCH 1922 #####
train loss :  -0.4590213894844055
test loss :  0.5598424077033997
##### EPOCH 1923 #####
train loss :  -0.3706832826137543



CG terminated in 1000 iterations with average residual norm 478.2005310058594 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  320208672.0
##### EPOCH 1924 #####
train loss :  -0.5697180032730103



CG terminated in 1000 iterations with average residual norm 3246.17138671875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  9798865920.0
##### EPOCH 1925 #####
train loss :  -0.2944513261318207



CG terminated in 1000 iterations with average residual norm 658.8419799804688 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  937514944.0
##### EPOCH 1926 #####
train loss :  -0.17401756346225739



CG terminated in 1000 iterations with average residual norm 802.16796875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  477752448.0
##### EPOCH 1927 #####
train loss :  0.03978610783815384
test loss :  0.46984851360321045
##### EPOCH 1928 #####
train loss :  -0.5703518390655518
test loss :  1.0145173072814941
##### EPOCH 1929 #####
train loss :  -0.3022376596927643
test loss :  0.8331627249717712
##### EPOCH 1930 #####
train loss :  -0.4316359758377075
test loss :  0.7727203369140625
##### EPOCH 1931 #####
train loss :  -0.32431095838546753
test loss :  0.7682474255561829
##### EPOCH 1932 #####
train loss :  -0.36404386162757874
test loss :  1.6014212369918823
##### EPOCH 1933 #####
train loss :  -0.3491806089878082



CG terminated in 1000 iterations with average residual norm 268.9877014160156 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  45682452.0
##### EPOCH 1934 #####
train loss :  -0.2511545717716217



CG terminated in 1000 iterations with average residual norm 745.0687255859375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  455395712.0
##### EPOCH 1935 #####
train loss :  -0.40552836656570435



CG terminated in 1000 iterations with average residual norm 491.65948486328125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  174273504.0
##### EPOCH 1936 #####
train loss :  -0.4418475329875946



CG terminated in 1000 iterations with average residual norm 1435.6700439453125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1281757312.0
##### EPOCH 1937 #####
train loss :  -0.5488135814666748



CG terminated in 1000 iterations with average residual norm 849.1596069335938 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  487454784.0
##### EPOCH 1938 #####
train loss :  -0.2839139699935913



CG terminated in 1000 iterations with average residual norm 600.3298950195312 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  436799200.0
##### EPOCH 1939 #####
train loss :  -0.2757953107357025



CG terminated in 1000 iterations with average residual norm 376.7977600097656 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  136478848.0
##### EPOCH 1940 #####
train loss :  -0.534238338470459



CG terminated in 1000 iterations with average residual norm 520.0645751953125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  258033440.0
##### EPOCH 1941 #####
train loss :  -0.5715895891189575



CG terminated in 1000 iterations with average residual norm 406.3729248046875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  126444640.0
##### EPOCH 1942 #####
train loss :  -0.44336292147636414



CG terminated in 1000 iterations with average residual norm 272622.40625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  64056413126656.0
##### EPOCH 1943 #####
train loss :  -0.3694901168346405



CG terminated in 1000 iterations with average residual norm 639.9567260742188 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  348184864.0
##### EPOCH 1944 #####
train loss :  -0.4155743420124054



CG terminated in 1000 iterations with average residual norm 973.1703491210938 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  975806272.0
##### EPOCH 1945 #####
train loss :  -0.5336950421333313



CG terminated in 1000 iterations with average residual norm 1436.978515625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1239058048.0
##### EPOCH 1946 #####
train loss :  -0.5754945874214172



CG terminated in 1000 iterations with average residual norm 1671.9022216796875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  2564740608.0
##### EPOCH 1947 #####
train loss :  -0.22679516673088074



CG terminated in 1000 iterations with average residual norm 810.3216552734375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  704049728.0
##### EPOCH 1948 #####
train loss :  -0.2340073138475418



CG terminated in 1000 iterations with average residual norm 969.2181396484375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1357066624.0
##### EPOCH 1949 #####
train loss :  -0.39531680941581726



CG terminated in 1000 iterations with average residual norm 4422.92529296875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  10130404352.0
##### EPOCH 1950 #####
train loss :  -0.3719128966331482



CG terminated in 1000 iterations with average residual norm 844.431396484375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  595201280.0
##### EPOCH 1951 #####
train loss :  0.005412661470472813



CG terminated in 1000 iterations with average residual norm 857.6757202148438 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  598347712.0
##### EPOCH 1952 #####
train loss :  -0.45989763736724854



CG terminated in 1000 iterations with average residual norm 1740.9786376953125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  3127147264.0
##### EPOCH 1953 #####
train loss :  -0.09094113111495972



CG terminated in 1000 iterations with average residual norm 3716.132080078125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  11081436160.0
##### EPOCH 1954 #####
train loss :  -0.43543514609336853



CG terminated in 1000 iterations with average residual norm 600.3494873046875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  555289856.0
##### EPOCH 1955 #####
train loss :  -0.3794499635696411



CG terminated in 1000 iterations with average residual norm 883.4957275390625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  596855360.0
##### EPOCH 1956 #####
train loss :  -0.40728309750556946



CG terminated in 1000 iterations with average residual norm 808.721923828125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  635719040.0
##### EPOCH 1957 #####
train loss :  -0.3875369131565094



CG terminated in 1000 iterations with average residual norm 1364.559326171875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1971081600.0
##### EPOCH 1958 #####
train loss :  -0.33522409200668335



CG terminated in 1000 iterations with average residual norm 1680.7557373046875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  2933114624.0
##### EPOCH 1959 #####
train loss :  -0.4007842242717743



CG terminated in 1000 iterations with average residual norm 1234.23974609375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1365598336.0
##### EPOCH 1960 #####
train loss :  -0.3294905126094818



CG terminated in 1000 iterations with average residual norm 3082.705322265625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  13765917696.0
##### EPOCH 1961 #####
train loss :  -0.4104156196117401



CG terminated in 1000 iterations with average residual norm 5737.38427734375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  32920705024.0
##### EPOCH 1962 #####
train loss :  -0.3289037048816681



CG terminated in 1000 iterations with average residual norm 2243.866943359375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  3546132992.0
##### EPOCH 1963 #####
train loss :  -0.4050430953502655



CG terminated in 1000 iterations with average residual norm 897.6021118164062 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1005293056.0
##### EPOCH 1964 #####
train loss :  -0.41456517577171326



CG terminated in 1000 iterations with average residual norm 2531.45849609375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  5152419328.0
##### EPOCH 1965 #####
train loss :  -0.45693492889404297



CG terminated in 1000 iterations with average residual norm 6429.90576171875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  41101402112.0
##### EPOCH 1966 #####
train loss :  -0.06311708688735962



CG terminated in 1000 iterations with average residual norm 972.9924926757812 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  804843008.0
##### EPOCH 1967 #####
train loss :  -0.3256651759147644



CG terminated in 1000 iterations with average residual norm 494.7752380371094 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  304254912.0
##### EPOCH 1968 #####
train loss :  -0.22222338616847992



CG terminated in 1000 iterations with average residual norm 1631.7001953125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1988588416.0
##### EPOCH 1969 #####
train loss :  -0.3830786347389221



CG terminated in 1000 iterations with average residual norm 1051.1517333984375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1172727552.0
##### EPOCH 1970 #####
train loss :  -0.23311378061771393



CG terminated in 1000 iterations with average residual norm 10467.056640625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  140328796160.0
##### EPOCH 1971 #####
train loss :  -0.0719660073518753



CG terminated in 1000 iterations with average residual norm 3167.70068359375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  9363408896.0
##### EPOCH 1972 #####
train loss :  -0.18278567492961884



CG terminated in 1000 iterations with average residual norm 300.27569580078125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  116424360.0
##### EPOCH 1973 #####
train loss :  -0.25215986371040344



CG terminated in 1000 iterations with average residual norm 745.4142456054688 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  439339552.0
##### EPOCH 1974 #####
train loss :  -0.43065473437309265



CG terminated in 1000 iterations with average residual norm 550.8131713867188 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  308237920.0
##### EPOCH 1975 #####
train loss :  -0.426970511674881



CG terminated in 1000 iterations with average residual norm 886.541259765625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  797012480.0
##### EPOCH 1976 #####
train loss :  -0.44172826409339905



CG terminated in 1000 iterations with average residual norm 608.3218383789062 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  325377920.0
##### EPOCH 1977 #####
train loss :  -0.3849532902240753



CG terminated in 1000 iterations with average residual norm 5099.0263671875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  52228141056.0
##### EPOCH 1978 #####
train loss :  -0.4566478133201599



CG terminated in 1000 iterations with average residual norm 1185.0699462890625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1450099200.0
##### EPOCH 1979 #####
train loss :  -0.09812701493501663



CG terminated in 1000 iterations with average residual norm 918.4462280273438 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  978163200.0
##### EPOCH 1980 #####
train loss :  -0.32288801670074463



CG terminated in 1000 iterations with average residual norm 2077.891845703125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  4682398720.0
##### EPOCH 1981 #####
train loss :  -0.4211357533931732



CG terminated in 1000 iterations with average residual norm 58777.5078125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  4458279862272.0
##### EPOCH 1982 #####
train loss :  -0.4568609893321991



CG terminated in 1000 iterations with average residual norm 5188.9755859375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  39714897920.0
##### EPOCH 1983 #####
train loss :  -0.31652307510375977



CG terminated in 1000 iterations with average residual norm 1884.907958984375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  3920367616.0
##### EPOCH 1984 #####
train loss :  -0.3697644770145416



CG terminated in 1000 iterations with average residual norm 2184.097900390625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  6035462656.0
##### EPOCH 1985 #####
train loss :  -0.34767165780067444



CG terminated in 1000 iterations with average residual norm 1468.9180908203125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1997536896.0
##### EPOCH 1986 #####
train loss :  -0.3991074562072754



CG terminated in 1000 iterations with average residual norm 16461.953125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  398277967872.0
##### EPOCH 1987 #####
train loss :  0.02736736088991165



CG terminated in 1000 iterations with average residual norm 1437.7745361328125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  2462389760.0
##### EPOCH 1988 #####
train loss :  -0.19482886791229248



CG terminated in 1000 iterations with average residual norm 1203.8212890625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1907169152.0
##### EPOCH 1989 #####
train loss :  -0.323639452457428



CG terminated in 1000 iterations with average residual norm 1750.0992431640625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  5552774144.0
##### EPOCH 1990 #####
train loss :  -0.5696593523025513



CG terminated in 1000 iterations with average residual norm 2468.6484375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  12538009600.0
##### EPOCH 1991 #####
train loss :  -0.4215894639492035



CG terminated in 1000 iterations with average residual norm 1974.179931640625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  5497974784.0
##### EPOCH 1992 #####
train loss :  -0.30070364475250244



CG terminated in 1000 iterations with average residual norm 3557.201904296875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  31121219584.0
##### EPOCH 1993 #####
train loss :  -0.2924332320690155



CG terminated in 1000 iterations with average residual norm 622.118896484375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  711874560.0
##### EPOCH 1994 #####
train loss :  -0.3120214641094208



CG terminated in 1000 iterations with average residual norm 1560.38623046875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  5568914432.0
##### EPOCH 1995 #####



CG terminated in 1000 iterations with average residual norm 21832.638671875 which is larger than the tolerance of 1 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



train loss :  748.1397705078125



CG terminated in 1000 iterations with average residual norm 1252.3310546875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1142626176.0
##### EPOCH 1996 #####
train loss :  -0.39163413643836975



CG terminated in 1000 iterations with average residual norm 793.24853515625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  881942656.0
##### EPOCH 1997 #####
train loss :  -0.5870782136917114



CG terminated in 1000 iterations with average residual norm 814.9019165039062 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1456762368.0
##### EPOCH 1998 #####
train loss :  -0.40984198451042175



CG terminated in 1000 iterations with average residual norm 688.1104736328125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  834465664.0
##### EPOCH 1999 #####
train loss :  -0.34509944915771484



CG terminated in 1000 iterations with average residual norm 900.4473876953125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1934388864.0
##### EPOCH 2000 #####
train loss :  -0.6172296404838562



CG terminated in 1000 iterations with average residual norm 2385.025390625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  15170998272.0
##### EPOCH 2001 #####
train loss :  -0.3330323100090027



CG terminated in 1000 iterations with average residual norm 623.7076416015625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1082604032.0
##### EPOCH 2002 #####
train loss :  -0.42744961380958557



CG terminated in 1000 iterations with average residual norm 579.9700317382812 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  592125824.0
##### EPOCH 2003 #####
train loss :  -0.1588802933692932



CG terminated in 1000 iterations with average residual norm 1115.6036376953125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1277584384.0
##### EPOCH 2004 #####
train loss :  -0.6030040383338928



CG terminated in 1000 iterations with average residual norm 962.6417236328125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  2004944128.0
##### EPOCH 2005 #####
train loss :  -0.3774702250957489



CG terminated in 1000 iterations with average residual norm 2638.81591796875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  18385606656.0
##### EPOCH 2006 #####
train loss :  -0.3284268379211426



CG terminated in 1000 iterations with average residual norm 2014.62646484375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  8274291712.0
##### EPOCH 2007 #####
train loss :  -0.4272153973579407



CG terminated in 1000 iterations with average residual norm 1821.790771484375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  5327607808.0
##### EPOCH 2008 #####



CG terminated in 1000 iterations with average residual norm 26951.421875 which is larger than the tolerance of 1 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



train loss :  285.7732849121094



CG terminated in 1000 iterations with average residual norm 888.2386474609375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  2853934848.0
##### EPOCH 2009 #####
train loss :  -0.6023502945899963



CG terminated in 1000 iterations with average residual norm 806.6693725585938 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  2108537600.0
##### EPOCH 2010 #####
train loss :  -0.23390178382396698



CG terminated in 1000 iterations with average residual norm 3988.843017578125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  47615918080.0
##### EPOCH 2011 #####
train loss :  -0.40065109729766846



CG terminated in 1000 iterations with average residual norm 712.38330078125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1053107584.0
##### EPOCH 2012 #####
train loss :  -0.4042987525463104



CG terminated in 1000 iterations with average residual norm 630.8958740234375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1093034880.0
##### EPOCH 2013 #####
train loss :  -0.6106393337249756



CG terminated in 1000 iterations with average residual norm 1364.8511962890625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



EXCEPTION
##### EPOCH 2014 #####
train loss :  -0.5523207783699036



CG terminated in 1000 iterations with average residual norm 687.51904296875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1483002112.0
##### EPOCH 2015 #####
train loss :  -0.40486863255500793



CG terminated in 1000 iterations with average residual norm 1353.49560546875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  5063752192.0
##### EPOCH 2016 #####
train loss :  -0.4653289318084717



CG terminated in 1000 iterations with average residual norm 2258.841552734375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  15767734272.0
##### EPOCH 2017 #####
train loss :  0.051858238875865936



CG terminated in 1000 iterations with average residual norm 393.5793762207031 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  296422592.0
##### EPOCH 2018 #####
train loss :  -0.4128180146217346



CG terminated in 1000 iterations with average residual norm 1109.5001220703125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



EXCEPTION
##### EPOCH 2019 #####
train loss :  -0.3698573708534241



CG terminated in 1000 iterations with average residual norm 736.9036865234375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  815650688.0
##### EPOCH 2020 #####
train loss :  -0.3918451964855194



CG terminated in 1000 iterations with average residual norm 1875.2808837890625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  4148693504.0
##### EPOCH 2021 #####
train loss :  -0.45529159903526306



CG terminated in 1000 iterations with average residual norm 1230.6407470703125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  3106073088.0
##### EPOCH 2022 #####
train loss :  -0.3182167112827301



CG terminated in 1000 iterations with average residual norm 1006.4601440429688 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  2138499712.0
##### EPOCH 2023 #####
train loss :  -0.3910474181175232



CG terminated in 1000 iterations with average residual norm 1043.97900390625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  2029560576.0
##### EPOCH 2024 #####
train loss :  -0.46420249342918396



CG terminated in 1000 iterations with average residual norm 1074.0174560546875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  3822315520.0
##### EPOCH 2025 #####
train loss :  -0.4676135778427124



CG terminated in 1000 iterations with average residual norm 2459.996337890625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  13130083328.0
##### EPOCH 2026 #####



CG terminated in 1000 iterations with average residual norm 26322.16796875 which is larger than the tolerance of 1 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



train loss :  1020.9481201171875



CG terminated in 1000 iterations with average residual norm 663.9287109375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  234049696.0
##### EPOCH 2027 #####
train loss :  -0.2414122372865677



CG terminated in 1000 iterations with average residual norm 682.5317993164062 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  310306208.0
##### EPOCH 2028 #####
train loss :  -0.44848647713661194



CG terminated in 1000 iterations with average residual norm 479.4754943847656 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  196797600.0
##### EPOCH 2029 #####
train loss :  -0.3039233088493347



CG terminated in 1000 iterations with average residual norm 473.300048828125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  267985776.0
##### EPOCH 2030 #####
train loss :  -0.5336011648178101



CG terminated in 1000 iterations with average residual norm 598.1278076171875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  522135584.0
##### EPOCH 2031 #####
train loss :  -0.3185708522796631



CG terminated in 1000 iterations with average residual norm 23989.130859375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  979979993088.0
##### EPOCH 2032 #####
train loss :  -0.3787629306316376



CG terminated in 1000 iterations with average residual norm 892.025146484375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1425857664.0
##### EPOCH 2033 #####
train loss :  -0.3572731018066406



CG terminated in 1000 iterations with average residual norm 6616.3955078125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  72812986368.0
##### EPOCH 2034 #####
train loss :  -0.2246445119380951



CG terminated in 1000 iterations with average residual norm 926.8502197265625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  870617664.0
##### EPOCH 2035 #####
train loss :  -0.4620389938354492



CG terminated in 1000 iterations with average residual norm 769.8961791992188 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  808746304.0
##### EPOCH 2036 #####
train loss :  -0.5148912072181702



CG terminated in 1000 iterations with average residual norm 5104.73583984375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  32160989184.0
##### EPOCH 2037 #####
train loss :  -0.40477728843688965



CG terminated in 1000 iterations with average residual norm 6126.0693359375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  60698763264.0
##### EPOCH 2038 #####
train loss :  -0.3702159523963928



CG terminated in 1000 iterations with average residual norm 472.79193115234375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  262228576.0
##### EPOCH 2039 #####
train loss :  -0.475912481546402



CG terminated in 1000 iterations with average residual norm 596.447998046875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  328918944.0
##### EPOCH 2040 #####
train loss :  -0.4097481071949005



CG terminated in 1000 iterations with average residual norm 1012.77587890625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1057421120.0
##### EPOCH 2041 #####
train loss :  -0.3782731890678406



CG terminated in 1000 iterations with average residual norm 1988.4080810546875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  3169309696.0
##### EPOCH 2042 #####
train loss :  -0.3708738088607788



CG terminated in 1000 iterations with average residual norm 624.488525390625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  405540032.0
##### EPOCH 2043 #####
train loss :  -0.41554751992225647
test loss :  2.1867237091064453
##### EPOCH 2044 #####
train loss :  -0.6286854147911072



CG terminated in 1000 iterations with average residual norm 294.4014587402344 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  92675720.0
##### EPOCH 2045 #####
train loss :  -0.20001429319381714



CG terminated in 1000 iterations with average residual norm 361.746826171875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  127978792.0
##### EPOCH 2046 #####
train loss :  -0.2355606108903885



CG terminated in 1000 iterations with average residual norm 1105.1981201171875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1647130752.0
##### EPOCH 2047 #####
train loss :  -0.2003224492073059



CG terminated in 1000 iterations with average residual norm 2531.248046875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  6220521984.0
##### EPOCH 2048 #####
train loss :  -0.3487975299358368
test loss :  0.43588680028915405
##### EPOCH 2049 #####
train loss :  -0.4778038561344147



CG terminated in 1000 iterations with average residual norm 449.87701416015625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  200890384.0
##### EPOCH 2050 #####
train loss :  -0.40947166085243225
test loss :  1.004386305809021
##### EPOCH 2051 #####
train loss :  -0.03865458071231842



CG terminated in 1000 iterations with average residual norm 456.43048095703125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  238620112.0
##### EPOCH 2052 #####
train loss :  -0.3879319727420807



CG terminated in 1000 iterations with average residual norm 505.66448974609375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  250366448.0
##### EPOCH 2053 #####
train loss :  -0.3318221867084503



CG terminated in 1000 iterations with average residual norm 1717.731689453125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  3036254720.0
##### EPOCH 2054 #####
train loss :  -0.4001453220844269
test loss :  0.8570070862770081
##### EPOCH 2055 #####
train loss :  -0.38082200288772583



CG terminated in 1000 iterations with average residual norm 3779.40478515625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  19828664320.0
##### EPOCH 2056 #####
train loss :  -0.13431015610694885
test loss :  0.7619970440864563
##### EPOCH 2057 #####
train loss :  -0.3213401436805725



CG terminated in 1000 iterations with average residual norm 987.29931640625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1254009472.0
##### EPOCH 2058 #####
train loss :  -0.43591365218162537



CG terminated in 1000 iterations with average residual norm 424.5675964355469 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  163778752.0
##### EPOCH 2059 #####
train loss :  -0.3290655314922333



CG terminated in 1000 iterations with average residual norm 1176.0328369140625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1141479936.0
##### EPOCH 2060 #####
train loss :  -0.11853428930044174
test loss :  2.1216442584991455
##### EPOCH 2061 #####
train loss :  -0.11699828505516052



CG terminated in 1000 iterations with average residual norm 329.2338562011719 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  105018544.0
##### EPOCH 2062 #####
train loss :  -0.27580586075782776



CG terminated in 1000 iterations with average residual norm 336.7380065917969 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  64889532.0
##### EPOCH 2063 #####
train loss :  -0.27412906289100647
test loss :  2.355489492416382
##### EPOCH 2064 #####
train loss :  -0.2748483717441559
test loss :  2.93450927734375
##### EPOCH 2065 #####
train loss :  -0.33321020007133484



CG terminated in 1000 iterations with average residual norm 1053.4085693359375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  805988608.0
##### EPOCH 2066 #####
train loss :  -0.23617437481880188



CG terminated in 1000 iterations with average residual norm 1076.5233154296875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  677032768.0
##### EPOCH 2067 #####
train loss :  -0.28126439452171326



CG terminated in 1000 iterations with average residual norm 975.8351440429688 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  404252352.0
##### EPOCH 2068 #####
train loss :  -0.4782472550868988



CG terminated in 1000 iterations with average residual norm 399.1549987792969 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  100670696.0
##### EPOCH 2069 #####
train loss :  -0.07509920746088028



CG terminated in 1000 iterations with average residual norm 301.1000061035156 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  61600440.0
##### EPOCH 2070 #####
train loss :  -0.15593120455741882
test loss :  1.0343836545944214
##### EPOCH 2071 #####
train loss :  -0.41057801246643066



CG terminated in 1000 iterations with average residual norm 452.6383056640625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  145311632.0
##### EPOCH 2072 #####
train loss :  -0.42222559452056885
test loss :  0.4940246641635895
##### EPOCH 2073 #####
train loss :  -0.3867397904396057



CG terminated in 1000 iterations with average residual norm 465.06884765625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  116855288.0
##### EPOCH 2074 #####
train loss :  -0.0942930355668068



CG terminated in 1000 iterations with average residual norm 1328.945068359375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  858099136.0
##### EPOCH 2075 #####
train loss :  -0.26143476366996765



CG terminated in 1000 iterations with average residual norm 1105.7911376953125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  714465024.0
##### EPOCH 2076 #####
train loss :  -0.5136616826057434



CG terminated in 1000 iterations with average residual norm 190.6953582763672 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  21175374.0
##### EPOCH 2077 #####
train loss :  -0.3836666941642761



CG terminated in 1000 iterations with average residual norm 673.729248046875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  284576512.0
##### EPOCH 2078 #####
train loss :  -0.5263029932975769
test loss :  0.4464995265007019
##### EPOCH 2079 #####
train loss :  -0.3663507103919983
test loss :  0.6558631062507629
##### EPOCH 2080 #####
train loss :  -0.4162179231643677



CG terminated in 1000 iterations with average residual norm 10277.5732421875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  107989327872.0
##### EPOCH 2081 #####
train loss :  -0.4978283941745758
test loss :  1.0445810556411743
##### EPOCH 2082 #####
train loss :  -0.2489645779132843



CG terminated in 1000 iterations with average residual norm 4583.84326171875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  11687576576.0
##### EPOCH 2083 #####
train loss :  -0.5037627220153809



CG terminated in 1000 iterations with average residual norm 86461.8125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  5770359865344.0
##### EPOCH 2084 #####
train loss :  -0.34447991847991943



CG terminated in 1000 iterations with average residual norm 1253.6751708984375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  614530304.0
##### EPOCH 2085 #####
train loss :  -0.2928909361362457



CG terminated in 1000 iterations with average residual norm 1088.3446044921875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  711881600.0
##### EPOCH 2086 #####
train loss :  -0.3451680541038513



CG terminated in 1000 iterations with average residual norm 3334.143310546875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  5117817856.0
##### EPOCH 2087 #####
train loss :  -0.4788001477718353



CG terminated in 1000 iterations with average residual norm 644.2891235351562 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  190593904.0
##### EPOCH 2088 #####
train loss :  -0.42114341259002686



CG terminated in 1000 iterations with average residual norm 998.7597045898438 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  507503360.0
##### EPOCH 2089 #####
train loss :  -0.1933673471212387



CG terminated in 1000 iterations with average residual norm 4631.91455078125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  14194125824.0
##### EPOCH 2090 #####
train loss :  -0.32635921239852905



CG terminated in 1000 iterations with average residual norm 9896.2041015625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  100875845632.0
##### EPOCH 2091 #####
train loss :  -0.506275475025177



CG terminated in 1000 iterations with average residual norm 398.4949951171875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



EXCEPTION
##### EPOCH 2092 #####
train loss :  -0.3640122711658478



CG terminated in 1000 iterations with average residual norm 362.42657470703125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  135130112.0
##### EPOCH 2093 #####
train loss :  -0.08309881389141083



CG terminated in 1000 iterations with average residual norm 648.6088256835938 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  306646208.0
##### EPOCH 2094 #####
train loss :  -0.15869605541229248



CG terminated in 1000 iterations with average residual norm 616.4420776367188 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  372592320.0
##### EPOCH 2095 #####
train loss :  -0.1614273339509964



CG terminated in 1000 iterations with average residual norm 309.5347595214844 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  60860656.0
##### EPOCH 2096 #####
train loss :  -0.2603440284729004



CG terminated in 1000 iterations with average residual norm 2206.87158203125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  2943341312.0
##### EPOCH 2097 #####
train loss :  -0.4124177396297455



CG terminated in 1000 iterations with average residual norm 444.1016540527344 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  131377192.0
##### EPOCH 2098 #####
train loss :  -0.2453344464302063



CG terminated in 1000 iterations with average residual norm 2891.62255859375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  10179529728.0
##### EPOCH 2099 #####
train loss :  -0.3743042051792145



CG terminated in 1000 iterations with average residual norm 9098.1943359375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  46101835776.0
##### EPOCH 2100 #####
train loss :  -0.4038196802139282



CG terminated in 1000 iterations with average residual norm 1017.4326782226562 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  462907520.0
##### EPOCH 2101 #####
train loss :  -0.3918546140193939



CG terminated in 1000 iterations with average residual norm 484.7009582519531 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  248214768.0
##### EPOCH 2102 #####
train loss :  -0.15591402351856232



CG terminated in 1000 iterations with average residual norm 3830.375244140625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  15075143680.0
##### EPOCH 2103 #####
train loss :  -0.4888657331466675



CG terminated in 1000 iterations with average residual norm 422.0593566894531 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  101675000.0
##### EPOCH 2104 #####
train loss :  -0.23088186979293823



CG terminated in 1000 iterations with average residual norm 432.7589416503906 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  157890896.0
##### EPOCH 2105 #####
train loss :  -0.30728086829185486



CG terminated in 1000 iterations with average residual norm 360.0956726074219 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  78537272.0
##### EPOCH 2106 #####
train loss :  -0.3313835561275482



CG terminated in 1000 iterations with average residual norm 443.9606018066406 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  152959792.0
##### EPOCH 2107 #####
train loss :  -0.44888630509376526



CG terminated in 1000 iterations with average residual norm 557.9166870117188 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  161841760.0
##### EPOCH 2108 #####
train loss :  -0.45937666296958923



CG terminated in 1000 iterations with average residual norm 143632.5 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  6056322269184.0
##### EPOCH 2109 #####
train loss :  -0.2592746913433075



CG terminated in 1000 iterations with average residual norm 1548.8055419921875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1469273472.0
##### EPOCH 2110 #####
train loss :  -0.08835618197917938



CG terminated in 1000 iterations with average residual norm 455.21923828125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  91745672.0
##### EPOCH 2111 #####
train loss :  -0.3182220160961151



CG terminated in 1000 iterations with average residual norm 399.0527038574219 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  129693056.0
##### EPOCH 2112 #####
train loss :  -0.23974265158176422
test loss :  1.6189992427825928
##### EPOCH 2113 #####
train loss :  -0.25482162833213806



CG terminated in 1000 iterations with average residual norm 808.5198974609375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  478010016.0
##### EPOCH 2114 #####
train loss :  -0.43651601672172546



CG terminated in 1000 iterations with average residual norm 15605.7705078125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  128677781504.0
##### EPOCH 2115 #####
train loss :  -0.22173942625522614
test loss :  0.3421259820461273
##### EPOCH 2116 #####
train loss :  -0.45411917567253113



CG terminated in 1000 iterations with average residual norm 719.438232421875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  251208832.0
##### EPOCH 2117 #####
train loss :  -0.2137453705072403



CG terminated in 1000 iterations with average residual norm 8963.2197265625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  45554569216.0
##### EPOCH 2118 #####
train loss :  -0.2997538149356842



CG terminated in 1000 iterations with average residual norm 1009.4540405273438 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  693733312.0
##### EPOCH 2119 #####
train loss :  -0.4815658926963806



CG terminated in 1000 iterations with average residual norm 1197.666015625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1295532928.0
##### EPOCH 2120 #####
train loss :  -0.541148841381073



CG terminated in 1000 iterations with average residual norm 685.22802734375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  268450752.0
##### EPOCH 2121 #####
train loss :  -0.21483254432678223



CG terminated in 1000 iterations with average residual norm 384.7521057128906 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  83799400.0
##### EPOCH 2122 #####
train loss :  -0.5039351582527161



CG terminated in 1000 iterations with average residual norm 612.71875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  528276928.0
##### EPOCH 2123 #####
train loss :  -0.21855978667736053



CG terminated in 1000 iterations with average residual norm 881.4225463867188 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  471608928.0
##### EPOCH 2124 #####
train loss :  -0.25306546688079834



CG terminated in 1000 iterations with average residual norm 409.60137939453125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  66455488.0
##### EPOCH 2125 #####
train loss :  -0.2090461105108261



CG terminated in 1000 iterations with average residual norm 671.6788940429688 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  475577024.0
##### EPOCH 2126 #####
train loss :  -0.09105563908815384



CG terminated in 1000 iterations with average residual norm 506.3741455078125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  194425136.0
##### EPOCH 2127 #####
train loss :  -0.2524262070655823
test loss :  0.6552737951278687
##### EPOCH 2128 #####
train loss :  -0.11069396883249283



CG terminated in 1000 iterations with average residual norm 393.6137390136719 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  87448392.0
##### EPOCH 2129 #####
train loss :  -0.1302742063999176



CG terminated in 1000 iterations with average residual norm 316.0602722167969 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  66410220.0
##### EPOCH 2130 #####
train loss :  -0.20268693566322327



CG terminated in 1000 iterations with average residual norm 306.43878173828125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  40767836.0
##### EPOCH 2131 #####
train loss :  -0.27393776178359985



CG terminated in 1000 iterations with average residual norm 323.37701416015625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  69678544.0
##### EPOCH 2132 #####
train loss :  -0.2911979556083679
test loss :  0.8287874460220337
##### EPOCH 2133 #####
train loss :  -0.45443975925445557



CG terminated in 1000 iterations with average residual norm 284.7326354980469 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



EXCEPTION
##### EPOCH 2134 #####
train loss :  -0.16016721725463867



CG terminated in 1000 iterations with average residual norm 2251.625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1352837760.0
##### EPOCH 2135 #####
train loss :  -0.14146044850349426



CG terminated in 1000 iterations with average residual norm 190.0037384033203 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  13242680.0
##### EPOCH 2136 #####
train loss :  0.0016790556255728006



CG terminated in 1000 iterations with average residual norm 637.9868774414062 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  292017216.0
##### EPOCH 2137 #####
train loss :  0.049466490745544434



CG terminated in 1000 iterations with average residual norm 672.888427734375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  226369264.0
##### EPOCH 2138 #####
train loss :  -0.1923237293958664
test loss :  0.40001219511032104
##### EPOCH 2139 #####
train loss :  -0.14906777441501617



CG terminated in 1000 iterations with average residual norm 513.2847290039062 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  179331600.0
##### EPOCH 2140 #####
train loss :  -0.006790212821215391



CG terminated in 1000 iterations with average residual norm 1262.8260498046875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  557696448.0
##### EPOCH 2141 #####
train loss :  -0.07655307650566101



CG terminated in 1000 iterations with average residual norm 563.7637939453125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  116778064.0
##### EPOCH 2142 #####
train loss :  -0.11569682508707047



CG terminated in 1000 iterations with average residual norm 2918.285888671875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  2807127040.0
##### EPOCH 2143 #####
train loss :  -0.19157439470291138
test loss :  0.7258927822113037
##### EPOCH 2144 #####
train loss :  -0.2844889461994171



CG terminated in 1000 iterations with average residual norm 399.44659423828125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  47168148.0
##### EPOCH 2145 #####
train loss :  -0.315886914730072
test loss :  0.7862275242805481
##### EPOCH 2146 #####
train loss :  -0.26920878887176514



CG terminated in 1000 iterations with average residual norm 542.0767822265625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  137577856.0
##### EPOCH 2147 #####
train loss :  -0.21564628183841705
test loss :  0.3379409909248352
##### EPOCH 2148 #####
train loss :  -0.29937881231307983
test loss :  1.172367811203003
##### EPOCH 2149 #####
train loss :  -0.31091344356536865
test loss :  0.5808767676353455
##### EPOCH 2150 #####
train loss :  -0.3422960042953491



CG terminated in 1000 iterations with average residual norm 1098.32470703125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  711798784.0
##### EPOCH 2151 #####
train loss :  -0.5153957605361938
test loss :  1.0336450338363647
##### EPOCH 2152 #####
train loss :  -0.2813481092453003



CG terminated in 1000 iterations with average residual norm 1735.27294921875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1963103232.0
##### EPOCH 2153 #####
train loss :  -0.1800500750541687
test loss :  0.7260108590126038
##### EPOCH 2154 #####
train loss :  -0.4125211536884308



CG terminated in 1000 iterations with average residual norm 1642.069580078125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1959860352.0
##### EPOCH 2155 #####
train loss :  -0.2553245425224304
test loss :  0.8049846887588501
##### EPOCH 2156 #####
train loss :  -0.24357441067695618



CG terminated in 1000 iterations with average residual norm 1386.7918701171875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  918796928.0
##### EPOCH 2157 #####
train loss :  -0.3323591351509094



CG terminated in 1000 iterations with average residual norm 688.8445434570312 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  212330304.0
##### EPOCH 2158 #####
train loss :  -0.5135784149169922



CG terminated in 1000 iterations with average residual norm 458.9017333984375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  158844912.0
##### EPOCH 2159 #####
train loss :  -0.1941673308610916



CG terminated in 1000 iterations with average residual norm 2984.056396484375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  6761787904.0
##### EPOCH 2160 #####
train loss :  -0.27474361658096313



CG terminated in 1000 iterations with average residual norm 2121.722412109375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  3916979200.0
##### EPOCH 2161 #####
train loss :  -0.4154106080532074



CG terminated in 1000 iterations with average residual norm 746694.3125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  378412321472512.0
##### EPOCH 2162 #####
train loss :  -0.320417582988739



CG terminated in 1000 iterations with average residual norm 604.7662353515625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  187490704.0
##### EPOCH 2163 #####
train loss :  -0.43249404430389404



CG terminated in 1000 iterations with average residual norm 519.0816650390625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



EXCEPTION
##### EPOCH 2164 #####
train loss :  -0.6196495294570923
test loss :  0.5118904709815979
##### EPOCH 2165 #####
train loss :  -0.45664894580841064



CG terminated in 1000 iterations with average residual norm 336.77032470703125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  86505384.0
##### EPOCH 2166 #####
train loss :  0.28036898374557495



CG terminated in 1000 iterations with average residual norm 261.0702209472656 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  46276224.0
##### EPOCH 2167 #####
train loss :  -0.1515120565891266



CG terminated in 1000 iterations with average residual norm 4194.82763671875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  18467719168.0
##### EPOCH 2168 #####
train loss :  -0.1456165462732315



CG terminated in 1000 iterations with average residual norm 193.269287109375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  86922104.0
##### EPOCH 2169 #####
train loss :  0.09545150399208069
test loss :  0.2979121208190918
##### EPOCH 2170 #####
train loss :  -0.08313550800085068



CG terminated in 1000 iterations with average residual norm 594.3233642578125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  265152160.0
##### EPOCH 2171 #####
train loss :  -0.1995445191860199



CG terminated in 1000 iterations with average residual norm 761.1048583984375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  215348464.0
##### EPOCH 2172 #####
train loss :  -0.2832840383052826
test loss :  0.3784152567386627
##### EPOCH 2173 #####
train loss :  0.13170835375785828
test loss :  0.1951291561126709
##### EPOCH 2174 #####
train loss :  -0.31343796849250793
test loss :  0.7491651177406311
##### EPOCH 2175 #####
train loss :  -0.21784758567810059
test loss :  0.8946830034255981
##### EPOCH 2176 #####
train loss :  -0.46558454632759094
test loss :  0.2966301441192627
##### EPOCH 2177 #####
train loss :  -0.3523913025856018
test loss :  0.6213346123695374
##### EPOCH 2178 #####
train loss :  -0.46794602274894714
test loss :  1.144389033317566
##### EPOCH 2179 #####
train loss :  -0.5475916266441345
test loss :  1.2171216011047363
##### EPOCH 2180 #####
train loss :  -0.31191572546958923
test loss :  1.790382981300354
##### EPOCH 2181 #####
train loss :  -0.3066953718662262



CG terminated in 1000 iterations with average residual norm 173.61898803710938 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  24347468.0
##### EPOCH 2182 #####
train loss :  -0.3006457984447479
test loss :  1.2329204082489014
##### EPOCH 2183 #####
train loss :  -0.581073522567749



CG terminated in 1000 iterations with average residual norm 931.4540405273438 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  458882816.0
##### EPOCH 2184 #####
train loss :  -0.5366414785385132



CG terminated in 1000 iterations with average residual norm 351.86328125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  71004512.0
##### EPOCH 2185 #####
train loss :  -0.29735174775123596



CG terminated in 1000 iterations with average residual norm 596.9166870117188 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  352691104.0
##### EPOCH 2186 #####
train loss :  -0.512448251247406



CG terminated in 1000 iterations with average residual norm 1880.7568359375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  4915489280.0
##### EPOCH 2187 #####
train loss :  -0.2529762387275696



CG terminated in 1000 iterations with average residual norm 287.5576171875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  85832744.0
##### EPOCH 2188 #####
train loss :  -0.3194931149482727



CG terminated in 1000 iterations with average residual norm 480.53118896484375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  226077184.0
##### EPOCH 2189 #####
train loss :  -0.49969837069511414



CG terminated in 1000 iterations with average residual norm 972.1693115234375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  659071168.0
##### EPOCH 2190 #####
train loss :  -0.2698662281036377



CG terminated in 1000 iterations with average residual norm 1040.9451904296875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1151128448.0
##### EPOCH 2191 #####
train loss :  -0.3209056258201599



CG terminated in 1000 iterations with average residual norm 2154.563720703125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  11600309248.0
##### EPOCH 2192 #####
train loss :  -0.362877756357193



CG terminated in 1000 iterations with average residual norm 516.2584838867188 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  353096736.0
##### EPOCH 2193 #####
train loss :  -0.2675105035305023



CG terminated in 1000 iterations with average residual norm 832.7504272460938 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  713234112.0
##### EPOCH 2194 #####
train loss :  -0.6118512153625488



CG terminated in 1000 iterations with average residual norm 258.68060302734375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  62151492.0
##### EPOCH 2195 #####
train loss :  -0.24609020352363586



CG terminated in 1000 iterations with average residual norm 14141.15625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  101996806144.0
##### EPOCH 2196 #####
train loss :  -0.3962402939796448



CG terminated in 1000 iterations with average residual norm 234.63987731933594 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  36967440.0
##### EPOCH 2197 #####
train loss :  -0.6401314735412598



CG terminated in 1000 iterations with average residual norm 364.9732360839844 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  206402016.0
##### EPOCH 2198 #####
train loss :  -0.44912397861480713



CG terminated in 1000 iterations with average residual norm 328.6584167480469 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  89728424.0
##### EPOCH 2199 #####
train loss :  -0.3754110038280487



CG terminated in 1000 iterations with average residual norm 673.4132080078125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  988709824.0
##### EPOCH 2200 #####
train loss :  -0.7609720826148987



CG terminated in 1000 iterations with average residual norm 6162.22509765625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  302635483136.0
##### EPOCH 2201 #####



CG terminated in 1000 iterations with average residual norm 629789.625 which is larger than the tolerance of 1 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



train loss :  26914.279296875



CG terminated in 1000 iterations with average residual norm 517.73486328125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  349180096.0
##### EPOCH 2202 #####
train loss :  -0.13916215300559998
test loss :  0.2797988951206207
##### EPOCH 2203 #####
train loss :  -0.09206856042146683
test loss :  0.476553350687027
##### EPOCH 2204 #####
train loss :  -0.014781564474105835
test loss :  0.4579212963581085
##### EPOCH 2205 #####
train loss :  -0.02967597171664238
test loss :  0.4394979178905487
##### EPOCH 2206 #####
train loss :  -0.012514631263911724



CG terminated in 1000 iterations with average residual norm 445.21044921875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  104921536.0
##### EPOCH 2207 #####
train loss :  0.030361615121364594



CG terminated in 1000 iterations with average residual norm 321.1506042480469 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  119957744.0
##### EPOCH 2208 #####
train loss :  -0.057762619107961655
test loss :  0.5473861694335938
##### EPOCH 2209 #####
train loss :  -0.14298060536384583
test loss :  1.0386438369750977
##### EPOCH 2210 #####
train loss :  0.05939462035894394
test loss :  0.6097912192344666
##### EPOCH 2211 #####
train loss :  -0.2771647274494171
test loss :  1.4592862129211426
##### EPOCH 2212 #####
train loss :  -0.38989296555519104



CG terminated in 1000 iterations with average residual norm 12624.8740234375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  138164502528.0
##### EPOCH 2213 #####
train loss :  -0.3157278597354889
test loss :  0.4474579989910126
##### EPOCH 2214 #####
train loss :  -0.2845369577407837
test loss :  0.6574176549911499
##### EPOCH 2215 #####
train loss :  -0.43579748272895813
test loss :  1.128703236579895
##### EPOCH 2216 #####
train loss :  -0.4328728914260864
test loss :  1.1817985773086548
##### EPOCH 2217 #####
train loss :  -0.4079163670539856
test loss :  2.3675038814544678
##### EPOCH 2218 #####
train loss :  -0.4590988755226135
test loss :  1.7850637435913086
##### EPOCH 2219 #####
train loss :  -0.3482673466205597



CG terminated in 1000 iterations with average residual norm 458.5561828613281 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  187198080.0
##### EPOCH 2220 #####
train loss :  -0.04724833369255066



CG terminated in 1000 iterations with average residual norm 527.5790405273438 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  297422720.0
##### EPOCH 2221 #####
train loss :  -0.06864458322525024



CG terminated in 1000 iterations with average residual norm 458.6945495605469 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  213738384.0
##### EPOCH 2222 #####
train loss :  -0.30084308981895447



CG terminated in 1000 iterations with average residual norm 822.9166870117188 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  599826496.0
##### EPOCH 2223 #####
train loss :  -0.45103919506073



CG terminated in 1000 iterations with average residual norm 4140.7626953125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  18408118272.0
##### EPOCH 2224 #####
train loss :  -0.6888671517372131



CG terminated in 1000 iterations with average residual norm 692.9144897460938 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  480462208.0
##### EPOCH 2225 #####
train loss :  -0.5553460121154785



CG terminated in 1000 iterations with average residual norm 1999.650390625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  4085293056.0
##### EPOCH 2226 #####
train loss :  -0.5902704000473022



CG terminated in 1000 iterations with average residual norm 3926.882568359375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  164756733952.0
##### EPOCH 2227 #####
train loss :  -0.4984387159347534
test loss :  1.1283677816390991
##### EPOCH 2228 #####
train loss :  0.14790399372577667
test loss :  0.057482559233903885
##### EPOCH 2229 #####
train loss :  -0.3012533485889435
test loss :  -0.004426281433552504
##### EPOCH 2230 #####
train loss :  -0.09542423486709595
test loss :  -0.005986375268548727
##### EPOCH 2231 #####
train loss :  -0.4032669961452484
test loss :  1.2945510149002075
##### EPOCH 2232 #####
train loss :  -0.31707045435905457
test loss :  0.9201309084892273
##### EPOCH 2233 #####
train loss :  -0.2399265468120575
test loss :  0.0855620726943016
##### EPOCH 2234 #####
train loss :  -0.33787232637405396
test loss :  1.0187509059906006
##### EPOCH 2235 #####
train loss :  -0.12371561676263809
test loss :  0.3042837381362915
##### EPOCH 2236 #####
train loss :  -0.27889615297317505
test loss :  2.050304651260376
##### EPOCH 2237 #####
train loss :  -0.14347709715366364
test loss : 


CG terminated in 1000 iterations with average residual norm 268.7496337890625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  21066742.0
##### EPOCH 2246 #####
train loss :  -0.12937147915363312
test loss :  0.1822858303785324
##### EPOCH 2247 #####
train loss :  -0.23039136826992035
test loss :  1.5168267488479614
##### EPOCH 2248 #####
train loss :  -0.38063734769821167



CG terminated in 1000 iterations with average residual norm 383.4996032714844 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  70197088.0
##### EPOCH 2249 #####
train loss :  -0.12768249213695526
test loss :  0.1602393537759781
##### EPOCH 2250 #####
train loss :  -0.4330839514732361
test loss :  0.667702317237854
##### EPOCH 2251 #####
train loss :  -0.24375952780246735
test loss :  0.20464688539505005
##### EPOCH 2252 #####
train loss :  -0.49296119809150696
test loss :  0.28545355796813965
##### EPOCH 2253 #####
train loss :  -0.166807621717453
test loss :  0.09419015794992447
##### EPOCH 2254 #####
train loss :  -0.4203203618526459
test loss :  0.5951533317565918
##### EPOCH 2255 #####
train loss :  -0.31904906034469604



CG terminated in 1000 iterations with average residual norm 1360.1123046875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1692509824.0
##### EPOCH 2256 #####
train loss :  -0.3591023087501526



CG terminated in 1000 iterations with average residual norm 512.5142822265625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  291917952.0
##### EPOCH 2257 #####
train loss :  -0.4198492765426636



CG terminated in 1000 iterations with average residual norm 2446.231201171875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  14833393664.0
##### EPOCH 2258 #####
train loss :  -0.4624652862548828



CG terminated in 1000 iterations with average residual norm 121.85768127441406 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  19795370.0
##### EPOCH 2259 #####
train loss :  0.15128859877586365
test loss :  0.1811186522245407
##### EPOCH 2260 #####
train loss :  -0.4297398030757904
test loss :  0.21431580185890198
##### EPOCH 2261 #####
train loss :  -0.49342110753059387
test loss :  0.7517057061195374
##### EPOCH 2262 #####
train loss :  -0.4343389570713043



CG terminated in 1000 iterations with average residual norm 489.64111328125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  175084176.0
##### EPOCH 2263 #####
train loss :  -0.4155823290348053



CG terminated in 1000 iterations with average residual norm 1559.3458251953125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  3532391424.0
##### EPOCH 2264 #####
train loss :  -0.5593852996826172



CG terminated in 1000 iterations with average residual norm 1434.287353515625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  5019617280.0
##### EPOCH 2265 #####
train loss :  -0.46908751130104065



CG terminated in 1000 iterations with average residual norm 1370.4359130859375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  4491159040.0
##### EPOCH 2266 #####
train loss :  -0.1919468492269516
test loss :  0.060980577021837234
##### EPOCH 2267 #####
train loss :  -0.2211640179157257
test loss :  0.04520903527736664
##### EPOCH 2268 #####
train loss :  -0.1353910267353058
test loss :  0.11891467869281769
##### EPOCH 2269 #####
train loss :  -0.5329309701919556
test loss :  0.17734915018081665
##### EPOCH 2270 #####
train loss :  -0.4981893002986908
test loss :  -0.17337799072265625
##### EPOCH 2271 #####
train loss :  -0.2924940884113312
test loss :  -0.08659574389457703
##### EPOCH 2272 #####
train loss :  -0.3340699374675751
test loss :  0.6771930456161499
##### EPOCH 2273 #####
train loss :  -0.5748919248580933



CG terminated in 1000 iterations with average residual norm 4983.01904296875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  16905740288.0
##### EPOCH 2274 #####
train loss :  -0.26513925194740295



CG terminated in 1000 iterations with average residual norm 320.48382568359375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  137954784.0
##### EPOCH 2275 #####
train loss :  -0.3246328830718994



CG terminated in 1000 iterations with average residual norm 6040.814453125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  32246228992.0
##### EPOCH 2276 #####
train loss :  -0.4061782658100128



CG terminated in 1000 iterations with average residual norm 29555.595703125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1841469587456.0
##### EPOCH 2277 #####
train loss :  -0.1639776974916458



CG terminated in 1000 iterations with average residual norm 233.79827880859375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  57154420.0
##### EPOCH 2278 #####
train loss :  -0.4583646357059479



CG terminated in 1000 iterations with average residual norm 6727.96728515625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  79391006720.0
##### EPOCH 2279 #####
train loss :  -0.19815693795681



CG terminated in 1000 iterations with average residual norm 575.6806030273438 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  434044736.0
##### EPOCH 2280 #####
train loss :  -0.1763322651386261



CG terminated in 1000 iterations with average residual norm 823.2265625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  912683712.0
##### EPOCH 2281 #####
train loss :  -0.5696754455566406



CG terminated in 1000 iterations with average residual norm 1253.96630859375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  3183316480.0
##### EPOCH 2282 #####
train loss :  -0.38494595885276794



CG terminated in 1000 iterations with average residual norm 9429.9443359375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  246892101632.0
##### EPOCH 2283 #####
train loss :  -0.18734382092952728



CG terminated in 1000 iterations with average residual norm 1836.7069091796875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  2502248192.0
##### EPOCH 2284 #####
train loss :  -0.16370978951454163



CG terminated in 1000 iterations with average residual norm 7622.4443359375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  47092486144.0
##### EPOCH 2285 #####
train loss :  0.05496738851070404
test loss :  0.18949878215789795
##### EPOCH 2286 #####
train loss :  -0.424748033285141



CG terminated in 1000 iterations with average residual norm 754.5291137695312 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  506743584.0
##### EPOCH 2287 #####
train loss :  -0.4217276871204376



CG terminated in 1000 iterations with average residual norm 654.1551513671875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  304965312.0
##### EPOCH 2288 #####
train loss :  -0.06507974117994308



CG terminated in 1000 iterations with average residual norm 5080.3564453125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  27739469824.0
##### EPOCH 2289 #####
train loss :  -0.15024308860301971



CG terminated in 1000 iterations with average residual norm 408.5018615722656 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  102193040.0
##### EPOCH 2290 #####
train loss :  -0.25564444065093994
test loss :  0.4069965183734894
##### EPOCH 2291 #####
train loss :  -0.45171213150024414



CG terminated in 1000 iterations with average residual norm 653.7760009765625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  275683232.0
##### EPOCH 2292 #####
train loss :  -0.3112601637840271
test loss :  0.3067638576030731
##### EPOCH 2293 #####
train loss :  -0.31010881066322327
test loss :  0.3783544600009918
##### EPOCH 2294 #####
train loss :  -0.48608681559562683



CG terminated in 1000 iterations with average residual norm 2861.154296875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  8561598976.0
##### EPOCH 2295 #####
train loss :  -0.5593857169151306



CG terminated in 1000 iterations with average residual norm 937.3203735351562 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  556156416.0
##### EPOCH 2296 #####
train loss :  -0.507717490196228



CG terminated in 1000 iterations with average residual norm 7035.10107421875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  44249055232.0
##### EPOCH 2297 #####
train loss :  -0.29073086380958557



CG terminated in 1000 iterations with average residual norm 356.2798156738281 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  159895968.0
##### EPOCH 2298 #####
train loss :  -0.49177438020706177



CG terminated in 1000 iterations with average residual norm 1098.3984375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1359740672.0
##### EPOCH 2299 #####
train loss :  -0.24964672327041626



CG terminated in 1000 iterations with average residual norm 23231.1328125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  1386086662144.0
##### EPOCH 2300 #####
train loss :  -0.4771546423435211



CG terminated in 1000 iterations with average residual norm 227.60562133789062 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  53382152.0
##### EPOCH 2301 #####
train loss :  -0.15134888887405396



CG terminated in 1000 iterations with average residual norm 267.8741455078125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  120431376.0
##### EPOCH 2302 #####
train loss :  0.001104535418562591



CG terminated in 1000 iterations with average residual norm 1623.80615234375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  2806081024.0
##### EPOCH 2303 #####
train loss :  -0.33688801527023315



CG terminated in 1000 iterations with average residual norm 322.6773681640625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  87290976.0
##### EPOCH 2304 #####
train loss :  -0.4000718295574188



CG terminated in 1000 iterations with average residual norm 7526.0791015625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  164294361088.0
##### EPOCH 2305 #####
train loss :  -0.5567499995231628



CG terminated in 1000 iterations with average residual norm 13007.9462890625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  359811022848.0
##### EPOCH 2306 #####
train loss :  -0.1441110223531723



CG terminated in 1000 iterations with average residual norm 8802.4365234375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  49726963712.0
##### EPOCH 2307 #####
train loss :  -0.547947347164154



CG terminated in 1000 iterations with average residual norm 4395.5458984375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  37171597312.0
##### EPOCH 2308 #####
train loss :  -0.6323966979980469



CG terminated in 1000 iterations with average residual norm 1345.07373046875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  7507465728.0
##### EPOCH 2309 #####



CG terminated in 1000 iterations with average residual norm 36793.6796875 which is larger than the tolerance of 1 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



train loss :  6127.07763671875



CG terminated in 1000 iterations with average residual norm 4358.05126953125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  178520866816.0
##### EPOCH 2310 #####



CG terminated in 1000 iterations with average residual norm 104851.765625 which is larger than the tolerance of 1 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



train loss :  58264.24609375



CG terminated in 1000 iterations with average residual norm 4183.7216796875 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  57517637632.0
##### EPOCH 2311 #####
train loss :  -0.3462110161781311



CG terminated in 1000 iterations with average residual norm 35766.41015625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  824795660288.0
##### EPOCH 2312 #####
train loss :  -0.11868825554847717
test loss :  0.4454229474067688
##### EPOCH 2313 #####
train loss :  -0.24658668041229248
test loss :  0.5087190866470337
##### EPOCH 2314 #####
train loss :  -0.31929436326026917
test loss :  0.34958183765411377
##### EPOCH 2315 #####
train loss :  -0.16371676325798035
test loss :  0.33823829889297485
##### EPOCH 2316 #####
train loss :  -0.17019090056419373
test loss :  0.22453901171684265
##### EPOCH 2317 #####
train loss :  -0.4442842900753021



CG terminated in 1000 iterations with average residual norm 1180.6187744140625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  491757536.0
##### EPOCH 2318 #####
train loss :  -0.09299901127815247
test loss :  0.3675096333026886
##### EPOCH 2319 #####
train loss :  -0.3122289776802063
test loss :  0.3161098062992096
##### EPOCH 2320 #####
train loss :  -0.3141430914402008
test loss :  0.41750863194465637
##### EPOCH 2321 #####
train loss :  -0.25198760628700256
test loss :  0.4599737823009491
##### EPOCH 2322 #####
train loss :  -0.462481826543808



CG terminated in 1000 iterations with average residual norm 6133.04150390625 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  33601189888.0
##### EPOCH 2323 #####
train loss :  -0.445269376039505



CG terminated in 1000 iterations with average residual norm 6667.01953125 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  49043054592.0
##### EPOCH 2324 #####
train loss :  -0.32498103380203247



CG terminated in 1000 iterations with average residual norm 22923.55859375 which is larger than the tolerance of 0.01 specified by linear_operator.settings.cg_tolerance. If performance is affected, consider raising the maximum number of CG iterations by running code in a linear_operator.settings.max_cg_iterations(value) context.



test loss :  478947803136.0
##### EPOCH 2325 #####
train loss :  -0.5975584983825684


KeyboardInterrupt: ignored

In [13]:
best_state = deepcopy(dkl.losses['BState'])
dkl.model.load_state_dict(best_state), np.min(dkl.losses['Test'])
print(np.min(dkl.losses['Test']))
fig = make_subplots(rows=2, cols=1)
s = 0
fig.append_trace(go.Scatter(x=dkl.losses['Epoch'][s:], y=dkl.losses['Train'][s:],mode='lines',name='Train'), row=1, col=1)
fig.append_trace(go.Scatter(x=dkl.losses['Epoch'][s:], y=dkl.losses['Test'][s:],mode='lines',name='Test'), row=1, col=1)
fig.append_trace(go.Scatter(x=dkl.losses['Epoch'][s:], y=dkl.losses['LR'][s:],mode='lines',name='LR'), row=2, col=1)
fig.update_layout(height=1000, width=1500, title_text="Stacked Subplots")
fig.show()

-0.2790168225765228


In [14]:
dkl_mu, dkl_std, dkl_cov = dkl.predict(X_test)
dkl_mu, dkl_std = dkl_mu.cpu().data.numpy(), dkl_std.cpu().data.numpy()
mu_test = np.exp(dkl_mu + np.power(dkl_std, 2) / 2)
std_test = np.sqrt((np.exp(np.power(dkl_std, 2)) - 1) * np.exp(2 * dkl_mu + np.power(dkl_std, 2)))
lb, ub = norm.ppf(0.025, mu_test, std_test), norm.ppf(0.975, mu_test, std_test)
pred = pd.DataFrame({"mu": mu_test, "lb": lb, "ub": ub})
pred = pd.concat((test.reset_index(), pred), 1)
pred


In a future version of pandas all arguments of concat except for the argument 'objs' will be keyword-only.



Unnamed: 0,index,Store,Date,Sales,Customers,Open,Promo,StateHoliday,SchoolHoliday,StoreType,...,CompetitionDistance,Promo2,Competition,sin_dayofweek,cos_dayofweek,sin_dayofyear,cos_dayofyear,mu,lb,ub
0,0,45,2015-07-31,6301,442,1,1,0,1,d,...,9710.0,0,1.0,-8.660254e-01,-0.5,-0.486273,-0.873807,5902.127930,3796.495221,8007.760639
1,1,45,2015-07-30,6063,445,1,1,0,1,d,...,9710.0,0,1.0,1.224647e-16,-1.0,-0.471160,-0.882048,5909.078125,3838.291958,7979.864292
2,2,45,2015-07-29,5341,387,1,1,0,1,d,...,9710.0,0,1.0,8.660254e-01,-0.5,-0.455907,-0.890028,5737.235352,3690.963568,7783.507135
3,3,45,2015-07-28,5504,365,1,1,0,1,d,...,9710.0,0,1.0,8.660254e-01,0.5,-0.440519,-0.897743,5605.695312,3469.381942,7742.008683
4,4,45,2015-07-27,7450,500,1,1,0,1,d,...,9710.0,0,1.0,0.000000e+00,1.0,-0.425000,-0.905193,6808.231445,4000.167933,9616.294958
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,3811,1045,2015-06-18,8701,1068,1,1,0,0,a,...,26990.0,0,1.0,1.224647e-16,-1.0,0.230306,-0.973118,8653.404297,4879.858028,12426.950566
236,3812,1045,2015-06-17,8305,973,1,1,0,0,a,...,26990.0,0,1.0,8.660254e-01,-0.5,0.247022,-0.969010,8411.341797,4734.319208,12088.364386
237,3813,1045,2015-06-16,9158,1094,1,1,0,0,a,...,26990.0,0,1.0,8.660254e-01,0.5,0.263665,-0.964614,8693.514648,4961.677906,12425.351391
238,3814,1045,2015-06-15,11109,1224,1,1,0,0,a,...,26990.0,0,1.0,0.000000e+00,1.0,0.280231,-0.959933,9852.357422,5356.738362,14347.976482


In [15]:
def mase(train_y, test_y, pred):
    n = train_y.shape[0]
    d = np.abs(np.diff(train_y)).sum()/(n-1)
    errors = np.abs(test_y - pred)
    return errors.mean()/d

def mape(test_y, pred):
    return np.round(np.mean(np.abs(100*(test_y-pred)/(test_y + 1e-9))), 0)

def rmspe(test_y, pred):
    return (np.sqrt(np.mean(np.square((test_y - pred) / (test_y + 1e-9))))) * 100

def persistence(train_y, test_y):
    predictions, history = [], list(np.copy(train_y))
    for i in test_y:
        predictions.append(history[-1])
        history.append(i)
    return np.asarray(predictions)

def mda(actual, predicted):
    """ Mean Directional Accuracy """
    return np.mean((np.sign(actual[1:] - actual[:-1]) == np.sign(predicted[1:] - predicted[:-1])).astype(int))

def wape(true, pred):
    return np.sum(np.abs(true - pred))/np.sum(true)

naive = persistence(Y_train, Y_test)
errors = {'MAE':[mean_absolute_error(Y_test, pred['mu'].values)],
        'RMSE':[mean_squared_error(Y_test, pred['mu'].values)],
        'MAPE':[mean_absolute_percentage_error(Y_test, pred['mu'].values)],
        'R2': [r2_score(Y_test, pred['mu'].values)],
        'MDA': [mda(Y_test, pred['mu'].values)],
        'WAPE':[wape(Y_test, pred['mu'].values)]}
errors = pd.DataFrame(errors, index =['THIS', 'NAIVE'])
errors

Unnamed: 0,MAE,RMSE,MAPE,R2,MDA,WAPE
THIS,945.889404,1874655.0,0.149321,0.794805,0.794979,0.156748
NAIVE,945.889404,1874655.0,0.149321,0.794805,0.794979,0.156748


In [16]:
sub_pred = pred.loc[pred['Store']==45]
sub_ytest, sub_ttest = sub_pred[out_column].values, sub_pred[time_column].values
sub_train = train.loc[train['Store']==45]
sub_ytrain, sub_ttrain = sub_train[out_column].values, sub_train[time_column].values
fig = plot_gp(sub_pred['mu'].values, sub_pred['lb'].values, sub_pred['ub'].values, sub_ttest, sub_ytest, sub_ttrain, sub_ytrain, samples=[], layout='h',
              name='MAE: '+str(mean_absolute_error(sub_ytest, sub_pred['mu'].values))+
              'MAPE: '+str(mean_absolute_percentage_error(sub_ytest, sub_pred['mu'].values))+
              'R2: '+str(r2_score(sub_ytest, sub_pred['mu'].values))+
              'MDA: '+str(mda(sub_ytest, sub_pred['mu'].values))+
              'WAPE: '+str(wape(sub_ytest, sub_pred['mu'].values)))

fig.show()