In [1]:

import torch 
from torch import nn 
import yfinance as yf
import talib as ta
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly
import pandas as pd 
from torch.utils.data import DataLoader
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset
from tqdm.auto import tqdm
from torch.optim.lr_scheduler import StepLR
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score



In [2]:
#Finance Indicators 
f_indicators = ["MACD", "RSI", "CMO"," MOM", "Bollinger Bands", "SMA"]
ticker= "BTC-USD"
start_date = "2023-01-01"
end_date = "2025-12-31"

data = yf.download(ticker, start=start_date, end=end_date, auto_adjust=True)
close_price = data["Close"].values.flatten()
data.head(10)
close_price


[*********************100%***********************]  1 of 1 completed


array([16625.08007812, 16688.47070312, 16679.85742188, ...,
       87843.984375  , 86143.7578125 , 84980.875     ], shape=(1083,))

In [3]:
data.isna().sum()

Price   Ticker 
Close   BTC-USD    0
High    BTC-USD    0
Low     BTC-USD    0
Open    BTC-USD    0
Volume  BTC-USD    0
dtype: int64

In [4]:
#Finance Indicators 
f_indicators = ["MACD", "RSI", "CMO"," MOM", "Bollinger Bands", "SMA"]
ticker= "BTC-USD"
start_date = "2023-01-01"
end_date = "2025-12-31"

data = yf.download(ticker, start=start_date, end=end_date, auto_adjust=True)
close_price = data["Close"].values.flatten()
data.head(10)
close_price

# MACD (TA-Lib returns 3 arrays)
macd, signal, hist = ta.MACD(close_price, fastperiod=12, slowperiod=26, signalperiod=9)

# Add to DataFrame
data["MACD"] = macd
data["Signal"] = signal
data["Hist"] = hist

# RSI
data["RSI"] = ta.RSI(close_price, timeperiod=14)

# CMO
data["CMO"] = ta.CMO(close_price, timeperiod=14)

# MOM
data["MOM"] = ta.MOM(close_price, timeperiod=14)

# Bollinger Bands
upper, middle, lower = ta.BBANDS(close_price, timeperiod=14, nbdevup=2, nbdevdn=2, matype=0)

# Add to DataFrame
data["BBU"] = upper    # Upper band
data["BBM"] = middle   # Middle band (SMA)
data["BBL"] = lower    # Lower band

# SMA
data["SMA"] = ta.SMA(close_price, timeperiod=14)
data.head(30)
data.isna().sum()

[*********************100%***********************]  1 of 1 completed


Price   Ticker 
Close   BTC-USD     0
High    BTC-USD     0
Low     BTC-USD     0
Open    BTC-USD     0
Volume  BTC-USD     0
MACD               33
Signal             33
Hist               33
RSI                14
CMO                14
MOM                14
BBU                13
BBM                13
BBL                13
SMA                13
dtype: int64

In [5]:
# Remove all rows with NaNs caused by indicator calculations
plot_data = data.dropna().copy()
plot_data = plot_data.copy()
plot_data.columns = [
    f"{lvl0}" if lvl1 else lvl0
    for lvl0, lvl1 in plot_data.columns
]

plot_data.index = pd.to_datetime(plot_data.index)  # ensure datetime index



In [6]:
plot_data

Unnamed: 0_level_0,Close,High,Low,Open,Volume,MACD,Signal,Hist,RSI,CMO,MOM,BBU,BBM,BBL,SMA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2023-02-03,23449.322266,23678.103516,23279.955078,23469.412109,27083066007,1658.371173,2013.486323,-355.115150,71.870229,43.740458,772.769531,23813.408952,23123.453404,22433.497856,23123.453404
2023-02-04,23331.847656,23556.949219,23291.794922,23446.320312,15639298538,1560.052550,1922.799569,-362.747019,70.054386,40.108773,554.222656,23832.374304,23163.040737,22493.707169,23163.040737
2023-02-05,22955.666016,23423.435547,22841.759766,23332.248047,19564262605,1435.235120,1825.286679,-390.051559,64.439754,28.879508,235.250000,23814.815753,23179.844308,22544.872863,23179.844308
2023-02-06,22760.109375,23119.279297,22692.025391,22954.021484,23825006542,1305.487771,1721.326897,-415.839126,61.672572,23.345143,-174.322266,23827.466311,23167.392718,22507.319124,23167.392718
2023-02-07,23264.291016,23310.974609,22756.257812,22757.267578,27187964471,1229.176130,1622.896744,-393.720614,65.755517,31.511035,627.822266,23803.674808,23212.237165,22620.799522,23212.237165
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-12-14,88175.179688,90498.109375,87634.937500,90296.437500,50465972205,-1380.345062,-1765.137777,384.792715,40.374697,-19.250606,-2219.132812,94392.774509,90643.962612,86895.150714,90643.962612
2025-12-15,86419.781250,89983.921875,85304.078125,88171.078125,45559514323,-1606.842662,-1733.478754,126.636092,37.152956,-25.694087,98.210938,94367.639440,90650.977679,86934.315917,90650.977679
2025-12-16,87843.984375,88170.093750,85381.687500,86424.406250,41262178223,-1652.374805,-1717.257964,64.883160,41.249114,-17.501772,-3506.218750,94359.598823,90400.533482,86441.468141,90400.533482
2025-12-17,86143.757812,90264.570312,85316.265625,87847.617188,44243392914,-1804.848263,-1734.776024,-70.072239,38.059932,-23.880137,-7384.046875,93989.460085,89873.101562,85756.743040,89873.101562


In [7]:
# --- Create subplot layout ---
fig = make_subplots(
    rows=3,
    cols=1,
    shared_xaxes=True,
    vertical_spacing=0.1,
    row_heights=[0.5, 0.25, 0.25],
    subplot_titles=[f"{ticker} Price & BB/SMA", "MACD", "RSI"]
)

# --- Row 1: Candlestick ---
fig.add_trace(
    go.Candlestick(
        x=plot_data.index,
        open=plot_data["Open"],
        high=plot_data["High"],
        low=plot_data["Low"],
        close=plot_data["Close"],
        name="Price"
    ),
    row=1, col=1
)

# Bollinger Bands
fig.add_trace(go.Scatter(x=plot_data.index, y=plot_data["BBU"], name="BB Upper"), row=1, col=1)
fig.add_trace(go.Scatter(x=plot_data.index, y=plot_data["BBM"], name="BB Middle"), row=1, col=1)
fig.add_trace(go.Scatter(x=plot_data.index, y=plot_data["BBL"], name="BB Lower"), row=1, col=1)

# SMA
fig.add_trace(go.Scatter(x=plot_data.index, y=plot_data["SMA"], name="SMA"), row=1, col=1)

# --- Row 2: MACD ---
fig.add_trace(go.Scatter(x=plot_data.index, y=plot_data["MACD"], name="MACD"), row=2, col=1)
fig.add_trace(go.Scatter(x=plot_data.index, y=plot_data["Signal"], name="Signal"), row=2, col=1)

# --- Row 3: RSI ---
fig.add_trace(go.Scatter(x=plot_data.index, y=plot_data["RSI"], name="RSI"), row=3, col=1)

# --- Layout ---
fig.update_layout(
    title=f"{ticker} Technical Analysis",
    height=900,
    template="plotly_dark",
    xaxis=dict(rangeslider=dict(visible=False))
)


# RSI reference lines
fig.update_yaxes(range=[0, 100], row=3, col=1)
fig.add_hline(y=70, line_dash="dash", line_color="red", row=3, col=1)
fig.add_hline(y=30, line_dash="dash", line_color="green", row=3, col=1)
fig.show()



In [8]:
plot_data.Close

Date
2023-02-03    23449.322266
2023-02-04    23331.847656
2023-02-05    22955.666016
2023-02-06    22760.109375
2023-02-07    23264.291016
                  ...     
2025-12-14    88175.179688
2025-12-15    86419.781250
2025-12-16    87843.984375
2025-12-17    86143.757812
2025-12-18    84980.875000
Name: Close, Length: 1050, dtype: float64

In [9]:
df = plot_data.copy()

df["label"] = np.where(df["Close"] < df["Close"].shift(1), "down","up")
df.loc[df.index[0],"Close"] = np.nan
df.label

Date
2023-02-03      up
2023-02-04    down
2023-02-05    down
2023-02-06    down
2023-02-07      up
              ... 
2025-12-14    down
2025-12-15    down
2025-12-16      up
2025-12-17    down
2025-12-18    down
Name: label, Length: 1050, dtype: object

In [10]:
features = ["MACD", "RSI", "CMO","MOM", "BBM", "SMA"]

label = "label"
df_input = df[features].copy()
df_output = df[label].copy()

#Transform dataset 
scaler = MinMaxScaler()
df_input[features] = scaler.fit_transform(df_input)
df_output = pd.DataFrame(np.where(df_output== "up", 1, 0), columns=["label"], index=df_output.index)

#Training data 
data_train_x, data_train_y = df_input.loc[:"2024-12-31", :], df_output.loc[:"2024-12-31", :]    
data_test_x, data_test_y = df_input.loc["2024-12-25":, :], df_output.loc["2024-12-25":, :]





In [11]:
data_test_y

Unnamed: 0_level_0,label
Date,Unnamed: 1_level_1
2024-12-25,1
2024-12-26,0
2024-12-27,0
2024-12-28,1
2024-12-29,0
...,...
2025-12-14,0
2025-12-15,0
2025-12-16,1
2025-12-17,0


In [12]:
df_transformed = pd.concat([df_input, df_output], axis=1)
df_transformed.tail(360)

Unnamed: 0_level_0,MACD,RSI,CMO,MOM,BBM,SMA,label
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-12-24,0.527516,0.486694,0.486694,0.491504,0.804062,0.804062,1
2024-12-25,0.527155,0.503631,0.503631,0.397613,0.802681,0.802681,1
2024-12-26,0.504532,0.402666,0.402666,0.340092,0.799550,0.799550,0
2024-12-27,0.476239,0.361299,0.361299,0.266256,0.794173,0.794173,0
2024-12-28,0.459992,0.392800,0.392800,0.292558,0.789597,0.789597,1
...,...,...,...,...,...,...,...
2025-12-14,0.350406,0.310256,0.310256,0.389246,0.706409,0.706409,0
2025-12-15,0.332952,0.263470,0.263470,0.445402,0.706481,0.706481,0
2025-12-16,0.329443,0.322955,0.322955,0.358056,0.703897,0.703897,1
2025-12-17,0.317694,0.276641,0.276641,0.264084,0.698454,0.698454,0


In [13]:
df_output.values

array([[1],
       [0],
       [0],
       ...,
       [1],
       [0],
       [0]], shape=(1050, 1))

In [14]:
def create_dataset(data_x, data_y, window):
    data_x_u = []
    data_y_u = []
    data_y = data_y.reshape(-1)
    
    for i in range(window, len(data_x)):
        # Sequence input
        x_seq = torch.from_numpy(data_x[i - window: i, :]).unsqueeze(0).unsqueeze(0).float()
        data_x_u.append(x_seq)
        
        # Corresponding label as 1D tensor
        y_label = torch.tensor([data_y[i]]).long()
        data_y_u.append(y_label)
    
  
    X_tensor = torch.cat(data_x_u, dim=0)   
    y_tensor = torch.cat(data_y_u, dim=0)   
    
    return X_tensor, y_tensor
X_train, y_train = create_dataset(data_train_x.values, data_train_y.values, window= 6)
X_test, y_test = create_dataset(data_test_x.values, data_test_y.values, window= 6)



In [15]:
y_train.size()

torch.Size([692])

In [16]:
dataset_train = TensorDataset(X_train, y_train)
dataset_test = TensorDataset(X_test, y_test)

dataset_loader_train = DataLoader(dataset_train, batch_size=24, shuffle=False)
dataset_loader_test = DataLoader(dataset_test, batch_size=24, shuffle=False)

In [17]:
#Modeling 
class CNNTA(nn.Module):
    def __init__(self, in_channels,  out_channels, input_size, kernel_size, out_features, dropout):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels= in_channels,  out_channels=out_channels, kernel_size=kernel_size),
            nn.BatchNorm2d(out_channels),
            nn.MaxPool2d(kernel_size=2),
            nn.ReLU()
        )
        with torch.no_grad():
            x = torch.zeros(1, in_channels, *input_size)
            x = self.layer1(x)
            flattened_size = int(np.prod(x.size()[1:]))
        
        #Fully connected Layer
        
        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout(dropout)
        self.classifier = nn.Linear(in_features=flattened_size, out_features=out_features)
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.layer1(x)
        x = self.flatten(x)
        x = self.dropout(x)
        x = self.classifier(x)
        return x 
        

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device


device(type='cpu')

In [19]:
torch.manual_seed(42)
model = CNNTA(1, 80, (6, 6), 2, 2, 0.3 )
model.to(device)



CNNTA(
  (layer1): Sequential(
    (0): Conv2d(1, 80, kernel_size=(2, 2), stride=(1, 1))
    (1): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): ReLU()
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (dropout): Dropout(p=0.3, inplace=False)
  (classifier): Linear(in_features=320, out_features=2, bias=True)
)

In [20]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)


In [21]:
torch.manual_seed(42)
epochs = 600

for epoch in range(epochs):
    # --- Training ---
    model.train()  # Only once per epoch
    train_loss = 0
    correct_train = 0
    total_train = 0

    for batch_x, batch_y in tqdm(dataset_loader_train, desc=f"Epoch {epoch+1}/{epochs} - Training", leave=False):
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        print(batch_y[:10])
        print(batch_y.shape)


        optimizer.zero_grad()
        predicted = model(batch_x)
        loss = criterion(predicted, batch_y)
        loss.backward()
        total_norm = 0
        for p in model.parameters():
            if p.grad is not None:
                total_norm += p.grad.data.norm(2).item() ** 2
        total_norm = total_norm ** 0.5
        print(f"Gradient norm: {total_norm}")
        optimizer.step()

        train_loss += loss.item() * batch_x.size(0)
        _, pred = torch.max(predicted, dim=1)
        correct_train += (pred == batch_y).sum().item()
        total_train += batch_x.size(0)

    avg_train_loss = train_loss / total_train
    train_accuracy = correct_train / total_train
    print(f"Epoch: {epoch+1}| Train Loss: {avg_train_loss: .3f}| Train Accuracy: {train_accuracy: .3f}")


Epoch 1/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.710398310235161
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.6331106948780867
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.144309633771739
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.14106719628486
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7548971144731464
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.6633271468077042
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.9731600039012522
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.4174187274539825
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.81456017411466
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6222535428738796
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6332157580953213
tensor([1, 1, 0,

Epoch 2/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.493967710242593
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7259735423139908
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.7077303300018785
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 4.593380637474902
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9274313576895652
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0051231780591534
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.987839595695394
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.885674966793082
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2200486700635413
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6240539884150214
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7735231817751125
tensor([1, 1, 

Epoch 3/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5357455856980478
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.940233425234248
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.509713391921646
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.372746870388001
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8472906484317582
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9616421591219606
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.1920535680396656
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.886115424840492
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1364340221198375
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7781619265564184
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8333221586609376
tensor([1, 1, 

Epoch 4/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.390549230958178
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8388910944459627
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7692551442735716
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.694191036400364
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.567077083565827
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.681271797570854
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 4.145074071380347
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.759716794766047
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3868890149004853
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6555748462934787
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7534042523331217
tensor([1, 1, 0,

Epoch 5/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.004925931730669
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.6628776245354635
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2580331915927587
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9673852091365833
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7068752208036764
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.927972491417446
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.16240744574997
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.8032122738409355
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.855264146525271
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.152624408265148
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9020459956944817
tensor([1, 1, 0,

Epoch 6/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4637735687554994
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2248738899636984
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.677364011965626
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.253812816562518
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9966778223227426
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.442796386505935
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7531996903002405
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.5737955220711437
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.7773794160121876
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.050382511430672
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5941866850497899
tensor([1, 1, 

Epoch 7/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.717913634708909
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8473953184884615
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3818556138775864
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4481304844122813
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6049590999049328
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2600077925671695
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.739504699282318
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2071089509755537
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.03642563494896
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9297827563316219
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7706877005740806
tensor([1, 1, 

Epoch 8/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5580794684700137
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9814499566338633
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8297439835698324
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9958549942843082
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7712375780907623
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9369612366243867
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9716353164866236
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 4.575442853312971
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.988285152959236
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5441003138846248
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.2877110225689195
tensor([1, 1

Epoch 9/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.6637150275015427
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9831283075130242
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.115515085626554
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8277533539349875
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.3107346327214424
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.845788450342745
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.1043163773605023
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.09092543552535
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.403231598568128
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.2861407210244518
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9350107485273937
tensor([1, 1, 0

Epoch 10/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.430062545072728
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.703159116461177
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.644775218460461
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.408821966553708
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7136434717146871
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.643046506606635
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.150102702231245
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9143246230701223
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 4.336625710673438
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.2755158655706387
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.701233324674942
tensor([1, 1, 0, 0

Epoch 11/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.773706084403166
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4459957586478653
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.661720491778863
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6162779032263113
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1768275138142106
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8072984286410048
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.26100715157573
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8874169829985923
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3468491701025798
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.954518477334799
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8341509240773293
tensor([1, 1, 0

Epoch 12/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.773130822554971
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9377205267054758
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.373418106723619
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.133478459722369
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9898227690840244
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0058067121940755
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.2715360189791913
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5247673924473166
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.945615585929341
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6839462029320875
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.255825227043806
tensor([1, 1, 0

Epoch 13/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5861126391104987
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1657394558536724
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1508899257323195
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.620354289741479
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2616109253440437
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8161632590684216
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9760399318133612
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4964212223338773
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.7946030371763526
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9598776455676603
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.4676092697984686
tensor([1, 

Epoch 14/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.497141452493709
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.6426330197124952
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4735408624539104
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.453448302033317
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7720547200718835
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.40314100352054
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.142685580964235
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3493292204092757
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.050535906100354
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0263570040589305
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.832108029667366
tensor([1, 1, 0, 

Epoch 15/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5890149074884112
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2488875628697116
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.032957108377738
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7347844289368526
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8172955932918342
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.663477059675714
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.988171352316016
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1800467133842734
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.32081050100769
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8172909706374776
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9768902491771796
tensor([1, 1, 0

Epoch 16/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.676509353811065
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0119484925172135
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.171877289928929
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.416933215359125
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1001416790027863
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1097998061097756
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.300880652091733
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4803198455518145
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3134362239941617
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9331622193244622
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.623373198996787
tensor([1, 1, 0

Epoch 17/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.470948891717648
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4775304774675107
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5762238173435232
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7441790463233318
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0322947532013407
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.061943498650848
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.192176999628607
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.982539397435115
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.748584154637839
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.850415658371754
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.238968219156099
tensor([1, 1, 0, 

Epoch 18/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.886120903749419
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.6985686215378224
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6687387768786093
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.965525763053191
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.075420080458626
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6381030267363577
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.166240061586043
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3751055005776207
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.225284421308061
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5841817404768739
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6330162915435666
tensor([1, 1, 0

Epoch 19/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1523385650069313
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1184447805846505
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5247916390624363
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9916788332037414
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.033019423595295
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1010637686493037
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4562391684588425
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.976893664290739
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3565668098175028
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7197245083890582
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7957310975530933
tensor([1, 1

Epoch 20/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.7998111550962013
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9573711740581454
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3719686503049737
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4287436220350975
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.791359839779702
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.240379252473053
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.309885296066523
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.012469802731037
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7281743647603394
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.961119077768436
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.810333229156844
tensor([1, 1, 0,

Epoch 21/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.0144448951326868
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5785071316199364
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6994999776372857
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5889995886843336
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0676012383431517
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7254071587431947
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1246328728076382
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.14556012456903
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.4761719724028577
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.942970330894821
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.2731633876824724
tensor([1, 1,

Epoch 22/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1597676076345573
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2561628095607826
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.125990821171471
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.9701128254310294
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.149422180878517
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7061867675951867
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8071804050055356
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.665536547687387
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.349352785274727
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.800664870963101
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.4499364573483833
tensor([1, 1, 0

Epoch 23/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.407134153843186
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.243904083119287
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.244970141277788
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.117855455724734
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0688623770765235
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.4860234243966952
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4302017316291216
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.754517330986737
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.8658544298616264
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.3306965603207717
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.6180703348677343
tensor([1, 1, 0

Epoch 24/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.795188150396015
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1315936056744453
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.113384515815016
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.4391354188454004
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.812799814527853
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.225021767836389
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5465052075466557
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1923876915993956
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.179958112854363
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.2219091799865587
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6424388865443866
tensor([1, 1, 0

Epoch 25/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9360420365746394
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.5121974011789998
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.674016682495729
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5534967801906108
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8970660635449466
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.180437361604818
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5464874025175566
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7766594496117314
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.5334540347747008
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.919662683040441
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.372532902556692
tensor([1, 1, 

Epoch 26/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4602251906870958
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.10552625959339
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8343857086106803
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8415533266335458
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8014745238195713
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4159143080899463
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6300259034309974
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9176512613868573
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.172625657298315
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6813753248699093
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.643327185651663
tensor([1, 1, 

Epoch 27/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.039519815797807
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.073720207399622
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.669310146455436
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.9102092450102592
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9658169628434687
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1840665698286976
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.8656156303736373
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.728213491096514
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.127552734790508
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.333487135308474
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7754270420974179
tensor([1, 1, 0,

Epoch 28/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.85234115328271
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5264261890910324
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0548330992357426
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.177383324534779
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.148974668794572
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2279059026959898
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.853299781173065
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0567536619734756
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.860376998148754
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0703760529002033
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5716841345169161
tensor([1, 1, 0,

Epoch 29/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4246621625583895
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.775782135703986
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.402269412615396
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7136633967330033
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.146001704671667
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9021772402392236
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.871118595807411
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.6732172872033884
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.801437821941312
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5421646544579968
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6839778004418864
tensor([1, 1, 0

Epoch 30/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.324936938129249
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9950624450863463
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8680146261178456
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.9680566231130057
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0438844719040143
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.320413380759448
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.43974957574851
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0428511890207544
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.5873342418014675
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5115528028271445
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8890156132033027
tensor([1, 1, 

Epoch 31/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.366971614296661
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.143125092940084
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.779853894493019
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.2712368576280437
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7318679597137017
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.284474692255054
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.014745868905918
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.8930420382024313
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.193906363271475
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0860997234735663
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6342699937529135
tensor([1, 1, 0,

Epoch 32/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2413238114425678
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.068162653460818
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.0330568881196056
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.962099285904063
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.765059307691515
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.943691699774139
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.587274957770449
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.6787399583153744
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.4234264912764365
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8347561959909704
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.756140656608631
tensor([1, 1, 0,

Epoch 33/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.248598439816365
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1896010468290865
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9220672815086646
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.0002233108868723
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8807608930737316
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5882435976624794
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0177470495013643
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.440340338128471
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6888168492835383
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6577003869724245
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.086942100196161
tensor([1, 1,

Epoch 34/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.55872122226071
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3015533226174494
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9713354729841357
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.398344532055606
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2040784939570877
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5581404565012624
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4432902260716385
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.6776317564473016
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.7940158168771667
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7276255653429442
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.018839157483793
tensor([1, 1, 

Epoch 35/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.0380624535339718
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.758203568944282
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7517761952484774
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.089121927577479
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.951957566501406
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2504612453768384
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.6057538488882708
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9383901764147677
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2293161187508743
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.873802239324035
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6064709402124853
tensor([1, 1, 

Epoch 36/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.057803366291214
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.569276717525437
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.7610624706489433
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.100996121321748
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9840208408369562
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4313250385568095
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0196331066151143
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2786590253115797
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.4187439879497212
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.769031025697438
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9947080454815143
tensor([1, 1, 

Epoch 37/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.680272010911117
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1955249571120574
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1530416709494817
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.1461446635560972
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.113139848673726
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.428015836919959
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.19087857814024
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.798512665527861
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0694356633280466
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8891117694907922
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.1116915418101967
tensor([1, 1, 0,

Epoch 38/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.615664212313958
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.535661248710004
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3086315037548615
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.0655911867306664
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.083988513926632
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.6302548714662481
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9151534915499657
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1584785241709064
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1507480555216434
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9004091762354727
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8654899237047526
tensor([1, 1,

Epoch 39/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.600299527761575
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0321817218201876
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.446622770518764
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.333480621417256
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.4252401515557396
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1607449525506652
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.690023370967938
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.777442004198422
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.739335063506725
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.037817925185449
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6948552697843402
tensor([1, 1, 0, 

Epoch 40/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.013610462595151
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.990927513973908
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.117114896388379
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.2672327025304813
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9594101718992674
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1536139820601465
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9037163106104398
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.930520579792705
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3685309502029597
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7484535365271225
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6484484057730866
tensor([1, 1, 

Epoch 41/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1582715357925935
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6030112015961033
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4757737516675604
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.7882613556901263
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.3918118700635658
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.715336997576869
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0129941165757828
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0578843447383575
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2463946419203453
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9277463984333243
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.346485047525456
tensor([1, 1

Epoch 42/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3037587207260026
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1682285272355197
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.243438400973388
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.087921476975353
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8127661485766078
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.563980931359131
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.275644654118531
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3194580540546683
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 4.193320187639894
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0267146029502863
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8191878654629254
tensor([1, 1, 0

Epoch 43/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2217297677812406
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3486653125311565
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4434199090898447
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5192395954606415
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8230305627432244
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2447711322958637
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5171337027466523
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.056369240762994
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.636054428932669
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1987654106519834
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8662661641310858
tensor([1, 1

Epoch 44/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.8020545405070942
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9429893738113502
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.554447145103864
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7724083111878404
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.761100329040057
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.152802421215911
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5292909492089737
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.48255877101686
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.288002381687983
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.868889790046724
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6091083877949988
tensor([1, 1, 0, 

Epoch 45/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.6508706675129967
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5673185862803076
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6435493077103964
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.023943784537119
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7748856594069131
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.639964643126817
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8498290616999515
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1801845197463554
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2567769485384375
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7715035948654072
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.199642101502076
tensor([1, 1,

Epoch 46/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.9168465906912404
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3892592892005844
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7758643813272936
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8917094747348795
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1765823260638317
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5990851091581746
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7315504356588125
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3394002933244624
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.165975643644488
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.4028546842218237
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.3125952428017924
tensor([1, 

Epoch 47/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2586199280159227
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.10183629137971
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5248683448533606
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.008755512113848
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7787161513600358
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7686901732702225
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3752898023323556
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.556032252429747
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.388106589147385
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.122567550765857
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9376118051352489
tensor([1, 1, 0,

Epoch 48/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2153252151768488
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8718877052307508
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.711085328347691
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.657367499292194
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.4492804137207598
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9544327196168025
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.6364726503973324
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.6325701770175334
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.4346665489488926
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9752584555997146
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.383727390137121
tensor([1, 1,

Epoch 49/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.107843761236007
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.198402825910425
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.772288282998073
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.406935430686572
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.924421960219529
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2315373073251186
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.433728636878111
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.814590108432617
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0705274489105743
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.994015304732661
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.69289357853944
tensor([1, 1, 0, 0, 

Epoch 50/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5154949652654057
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.388744676936022
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.34752679581197
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.1175597054506348
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7480700128295719
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2378914716228326
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.007611258951659
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.122733143769073
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8614286636770503
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7489986952033125
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8090920257564205
tensor([1, 1, 0

Epoch 51/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.674059258215565
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.080432874232769
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.6049919641198742
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.333616800943229
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.763468969966276
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.15212944791718
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.2152950209294238
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7935570534950496
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.088555495562174
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7024534525745365
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7022724150698405
tensor([1, 1, 0, 

Epoch 52/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.23876269417668
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0454204890690635
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.140115642904398
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7165032635944293
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9089326321573141
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6672288721417194
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.453871440039453
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.4455369469493404
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.003552464993288
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.062065884935629
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.1802545121496357
tensor([1, 1, 0,

Epoch 53/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7296381259631577
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.004515958388143
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.0204967539879553
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.1080196007667737
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8272437196878921
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.5667446734403627
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7784871537080313
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.945638939088888
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.703671506178727
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.2743250288127195
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8796784645442552
tensor([1, 1,

Epoch 54/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9410988487613725
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7043686638179234
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2635028982884164
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.853431503847415
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.3170075897293265
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3880177962195948
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2723399151315795
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6576881950230073
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 4.037733814194459
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7996826652536275
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.501334343142073
tensor([1, 1,

Epoch 55/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5621809007393965
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1966725178510993
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9515702756060012
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.712249667453284
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1597025236266583
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8771798388172214
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9990989017765655
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.211928872054909
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.041014855405087
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9975031834944366
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8523783832945326
tensor([1, 1,

Epoch 56/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9946395383401723
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.845769743355907
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7663380533494433
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.823270904024963
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9629825025069096
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1732151725045847
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.393884820169561
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.365698842609533
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.7608396812223535
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.172431196857757
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.923846243714809
tensor([1, 1, 0,

Epoch 57/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.711384330531011
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.833963096345343
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7237976398381223
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.9181368168772879
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.885174603875545
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.124092716151383
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.61057666573186
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0306909257843198
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8452158275600934
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.02109454814791
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.4584500360664467
tensor([1, 1, 0, 0

Epoch 58/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7747406879950325
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4388174345221043
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.068233976351385
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.0027604310369673
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.140009982970388
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6294147613332624
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6917668641338923
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0666064170442473
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.684517489221376
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.146991471622939
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.2175783834010696
tensor([1, 1, 

Epoch 59/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1495764335531162
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.122447520624012
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.583710106539615
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4463652694951117
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8768325393374476
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.185104646029735
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.4089041444464794
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.225617131190112
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.7822898068421917
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6853221873338802
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8816821373693555
tensor([1, 1, 

Epoch 60/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.981772175784159
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2736151186001385
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.44545945787635
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4860608455122337
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8557986514389324
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1868491135625314
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.521597577664535
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0466654640432487
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3904602910828645
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8401022568272547
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8856744519347635
tensor([1, 1, 

Epoch 61/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.656918211890418
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4271688695720486
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.0908628403248537
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.2020078123302227
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.067238094609985
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.6393261151593401
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.225859775071871
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1458891898242727
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9063804203121726
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0911691347390113
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0437132022485955
tensor([1, 1,

Epoch 62/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.244453924043934
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1644736720676714
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5781216820102992
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8172683622766295
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6901632409810075
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5068196545728805
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.71360721313407
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3336135049015327
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.947772624253339
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7202409609650156
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0138335517641877
tensor([1, 1, 

Epoch 63/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.2820017741931866
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0782694627591303
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.704544893321065
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.935597192733293
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.745436546050009
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4759248912512106
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5035189742154276
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.9485200821991544
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.14624483548183
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.911061801337143
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8572735833145955
tensor([1, 1, 0,

Epoch 64/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9364104440951744
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5304549660449736
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.309164445509243
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.30817908255759
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1822446038282797
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5095002974812353
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0706678577923765
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.039734048078002
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.225543201330142
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.4761816957677378
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5818764092220459
tensor([1, 1, 0

Epoch 65/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.080413808256941
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.407048224936939
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8360712906888703
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.220145784304747
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8995082092450233
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.421874489419953
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8790734866277012
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.358454639946094
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9995219555998824
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.5162555804008018
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.229294962002274
tensor([1, 1, 0,

Epoch 66/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.9419032439477997
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8320093718299928
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.333939213710179
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.9730932659914606
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7448831313200244
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.505174604012028
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1793487294879936
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0606276276236466
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.6248529032474694
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.054423249568837
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.01100025849505
tensor([1, 1, 0

Epoch 67/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.0524543711348944
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.808400808340734
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.28586201157346
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6084651283574374
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7427344485495753
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4183160105101904
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.107327356864232
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9389556374503742
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2539420307152884
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8978697437085128
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9054704882721405
tensor([1, 1, 

Epoch 68/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.7555915817667955
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4253747298877024
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1422205398994225
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.1478185054896417
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.5173395435780526
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9593649976175829
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.122753864636091
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.169657780606336
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.273428242058031
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5620652890836983
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.3565896291528277
tensor([1, 1,

Epoch 69/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2174925566655195
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5314701672587634
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.784595970812737
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.1730255563738576
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.036884495782101
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9787264997039031
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.2054915555263896
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.617231522926094
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.012214328868378
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8485498964862208
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.4360118224831693
tensor([1, 1, 

Epoch 70/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.459959490803588
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9215770136750832
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9219352317851306
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.6882664877962394
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9413369872281176
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.861510398794385
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4754882552489628
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.464615664495616
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3877063377457595
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1592410333264502
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7578243814239498
tensor([1, 1,

Epoch 71/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.609255166566215
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3906554131359483
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7483556811753487
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.395335855182939
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.78708948557517
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9371265896880936
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.36870155904409
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2170839369627116
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.060464128437986
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8486262245213425
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.869915206822668
tensor([1, 1, 0, 0

Epoch 72/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.78028103735032
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.237509075434093
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2451226547025236
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.187881670973267
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1419461490457548
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9516552597407237
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.060574255957662
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.403884474604339
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9867845743995534
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.438180803234395
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.006165011896879
tensor([1, 1, 0, 0

Epoch 73/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.168370484714024
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2058074521962237
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4315503672854533
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.1756163802188553
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.139575722217224
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1442719443348017
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.782080590154691
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.5724193264566724
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.4205634439287858
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.091079997776227
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.74260245321829
tensor([1, 1, 0,

Epoch 74/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.5315368333996178
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4338017803612972
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.282226916909429
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.787917836923411
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9444677889887583
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.7984131213744132
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.613383822450195
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.05381324012622
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3894912638740107
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5051038280133913
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6983053157683252
tensor([1, 1, 0

Epoch 75/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.6459907025462797
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.841965611013153
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9207137815801354
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.519252726874892
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6552779205163994
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.435904176046259
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8635490439127014
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5465536827321427
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.821463851956008
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8039255068248339
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.2608312730603033
tensor([1, 1, 

Epoch 76/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7505121641097174
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1384355603791456
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1185238432247018
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.107201186031196
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8623113339712585
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.363784223820187
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3066993373754827
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1027859774306084
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2505288061330053
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9067877221878196
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6300877817624078
tensor([1, 1

Epoch 77/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7620336872387443
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.347709524600734
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.639445448400229
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.514389790667977
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8343519339854844
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.7483297347009765
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7522148225163
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1280946826239275
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.325617516583019
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0630818453769515
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.485744035526049
tensor([1, 1, 0, 0

Epoch 78/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.9079805258573064
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8982482590288166
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4241572331522545
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3098707435946255
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1753057914208656
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.612240562039432
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.6455646336833483
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7712560734064313
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.872328116603554
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7278071812428668
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7623677958388158
tensor([1, 1

Epoch 79/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.7770622075295215
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.6086983365468135
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.0319381261288267
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9444466477251914
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.846475660248608
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1208530100796628
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4804809123450027
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.802044161135298
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.452607067560883
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.026526492501633
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.03662098390247
tensor([1, 1, 0,

Epoch 80/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.177242234722784
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2703122415895125
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5716300568558332
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5045655917776926
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0003941013985562
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6956454237348018
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.2044838205912236
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.4106586366666276
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2888520388450337
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7216743784967512
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5390334906031493
tensor([1, 

Epoch 81/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.634102182537095
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.961437528188739
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.53940905543244
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.552695460706371
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0717836197060713
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.4755217444047621
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.023792610469776
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2738209726041982
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.328417540632099
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7520774965162804
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6130828321326236
tensor([1, 1, 0, 

Epoch 82/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.6171516248453193
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9792042966854189
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.0753927795351648
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.578213912415931
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8255432092742125
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5097848977177324
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.465419478216703
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8452374756051166
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8792508406333277
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9339527546050017
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7843114410151302
tensor([1, 1

Epoch 83/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.160034424239738
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0493702322943665
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.196084391105607
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.1877542628988667
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.575344441980858
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6856246696273196
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3769302283994866
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.348227697109382
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0767905514456033
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.707576762937184
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9700843311699217
tensor([1, 1, 0

Epoch 84/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.8244504146570204
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1780194010600367
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3901840249760733
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.39622237186885
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7713024488255782
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.057532137703693
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.4793024297030133
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.5360547572181744
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.152176893258194
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6710146286152123
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.4622964930161357
tensor([1, 1, 

Epoch 85/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.306244899124678
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.407367139739607
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1143016699836004
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.0665567672080583
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.5816177177977395
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.081818861506716
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.99687710346112
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 4.025625173773188
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.926490740783695
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7030496896156098
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9551948220245778
tensor([1, 1, 0, 

Epoch 86/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.255783864620006
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.4486062516257632
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.418646227508533
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.9696807922326607
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7889092936576507
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.250600172770887
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.552088938080908
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0334008741597933
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.8114314654624377
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0979881052190223
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5176505926149033
tensor([1, 1, 

Epoch 87/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1537263295362012
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.234121635213933
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9688275322232252
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.15399394858959
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.946081539647979
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3774022862926674
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.370500558040863
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.699344363693957
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3986317000777397
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7908070459162453
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9764450508655917
tensor([1, 1, 0,

Epoch 88/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7789625290169706
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.399861744394914
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3595138822781094
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.3301035202499447
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.804854726653945
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.648624723016681
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.1344948141695435
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.955283215656248
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.67619679596041
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.83336962099581
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9267698732103589
tensor([1, 1, 0, 0

Epoch 89/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3158063114705554
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0958323378914927
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3406284080535658
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.617771680962146
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.840987048452801
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.204014907431899
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4999056836981812
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8073802807238235
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.955333945239721
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.084249159557749
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.042419819954804
tensor([1, 1, 0,

Epoch 90/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2073177563346733
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.877222696611998
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9401866785462378
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8048838851623508
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9263958800657734
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.867767445227301
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.165405416005981
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.843083328475205
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.416078344023948
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.029772838784024
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.470534221048382
tensor([1, 1, 0, 

Epoch 91/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.041305909704824
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9120810956463186
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6368606751374553
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.165156146991236
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.818920674997954
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5631784455856708
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.629831837431556
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6181737742965194
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.529119960269946
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.862598218404187
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5570256358364032
tensor([1, 1, 0,

Epoch 92/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.708815583510597
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0384863947261223
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.821881271431866
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5601734545140107
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6385000604821465
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.361924883821291
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.742877784643088
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1333661191792843
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3484882096362347
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6975503639758667
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5469899632589232
tensor([1, 1, 

Epoch 93/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.909399338511846
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.993149062725791
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.505234835329686
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8908545478951346
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9539631666686366
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9134465596398074
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3251270291833857
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2408077595580678
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.385288633303156
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7499653191008442
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6437607143059312
tensor([1, 1, 

Epoch 94/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5474835269972202
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.182918708082755
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.649950575418174
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.9315635797369306
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9604669966781918
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9604761730590847
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.708842727765792
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2616616298950047
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2506710028300594
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0038116668935197
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0376693499127874
tensor([1, 1,

Epoch 95/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.1839056691744165
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.203611920745407
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.410202280810439
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.0459767982680463
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.499978756292081
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5752816850984632
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.788006583499858
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7113951781408185
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0426482714057372
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8115021013839683
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0026461556124935
tensor([1, 1, 

Epoch 96/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9913409660879764
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6988181818290053
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.924474428719381
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.871510704351382
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.818876236808741
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.7352758861176436
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.506080421083628
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8602002615585436
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.484555510665211
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9225675536971663
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7809300697590398
tensor([1, 1, 0

Epoch 97/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2273222984936734
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.033430622335919
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.362851175726227
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9563604512127113
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0101273863081497
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.148726698339876
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.23555085532073
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.5685060210190347
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1388602441600013
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.059070664254082
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.885574710516436
tensor([1, 1, 0, 

Epoch 98/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.8647534885095207
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.22595803272818
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3294242350230627
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.911738341767191
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9380588629454336
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.5872900564949763
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.148579379176286
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1677552795598127
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3036461820927543
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.359926095516958
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5671556906688269
tensor([1, 1, 0

Epoch 99/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.698795221609417
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0140339839557306
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.742910008724234
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.394957787296626
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2844723788914303
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8555153159565634
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8173486718757457
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.418830662773299
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8981112679930057
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.051909770500435
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7974331152168732
tensor([1, 1, 0

Epoch 100/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.648148042940732
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3956450399089912
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.362129340935541
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.924308236702771
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6390500438098052
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.735897927051288
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.915929177583816
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.089800347506742
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.6379890624544626
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.025954783677839
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7312686233657393
tensor([1, 1, 0, 

Epoch 101/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1161887451844947
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.782222619846953
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9179491909493476
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.2433577050122615
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8875663480386906
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.941306707619961
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.314463090983223
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.593734412003748
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.971134359922701
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8268293400341071
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9114096465442674
tensor([1, 1, 0

Epoch 102/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.047211072894134
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5985233504578584
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.573006237930568
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9419695400532064
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.5655780398198247
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8649501074451018
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.97116324755458
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1770954443148867
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2740033012974648
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9352544977379733
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9533521827365983
tensor([1, 1, 

Epoch 103/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.23326664979527
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0580954333806343
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.354661134354172
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3499594505979977
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8039542890999887
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.5983397642413872
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.51222639674439
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.402991318905312
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.296785231104254
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.044890315056925
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6289752563091762
tensor([1, 1, 0, 0

Epoch 104/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.174405506343939
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.830082080230788
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.67556268626546
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8473176418246675
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.839037820016136
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7083031811390286
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.1543476826452244
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0616670809208006
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.54271116829228
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.837638040110364
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.75564397382708
tensor([1, 1, 0, 0, 

Epoch 105/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.755553308678344
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0153828806512504
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.155643895253913
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.2895665879682374
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7661654930175497
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5373668785550025
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.706802897491687
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.173411417568895
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9596272329831192
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7603512924867764
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.84284772077911
tensor([1, 1, 0,

Epoch 106/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.8749962975467684
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.918259225432245
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5859642260808076
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.646711310806301
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2011768797864493
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8962408254877143
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3001670366215676
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.542669958009274
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5302395111005085
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0224887275357335
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8382827171911935
tensor([1, 1,

Epoch 107/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.286630648518894
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.656307917932792
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4115098919661424
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.592769486358712
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.198182238055235
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.632052357110366
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.404520182760735
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9378275442394344
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.386650065982839
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.129361783590159
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0898051797013433
tensor([1, 1, 0, 0

Epoch 108/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.888238893691242
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7674020711513547
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.14639698818817
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.022387279862213
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9652056075418687
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.35918891487079
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.380445915497901
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4222317602801304
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2868966199602894
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.137043867504852
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7456002198858067
tensor([1, 1, 0, 0

Epoch 109/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.5024110023101858
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5441465300994883
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8950041795986403
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.235620108658297
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.204451708937528
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.223445502622023
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6695320441136947
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3079965034503553
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3279166793993356
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.84930234833414
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.016540106948475
tensor([1, 1, 0,

Epoch 110/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.218797925138296
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2009854095445216
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.182784371311589
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4422219399989458
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2454680325531866
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.6953459001300761
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.516901083177857
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.999693884717901
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1893389085709325
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.863700835498794
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7160349150267296
tensor([1, 1, 0

Epoch 111/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.985478336604017
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.179418086967585
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.789095934298216
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6438265275255333
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.470457971066187
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.5949855957032268
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.2297687689514043
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0182072520819534
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.7456750058132804
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.01047080563611
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.238350693886376
tensor([1, 1, 0, 

Epoch 112/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.304759388428578
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.745591977666983
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8123738307396966
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6689841599892095
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2455393933335372
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9209182531430937
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7337327849937343
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8360758444252383
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 4.09332127173527
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0323255227041397
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7921695931234622
tensor([1, 1, 

Epoch 113/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2541275064895245
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3045683787865117
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6246759962924298
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.993629175864933
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.975227777137368
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3727500824323156
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.717546274014503
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0720388382434094
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0676366813378797
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8711499892484658
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.5362005965457968
tensor([1, 1,

Epoch 114/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9201714470132285
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4040451275096464
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.328381383077763
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3560041171005524
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9724946000056671
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.156175877753094
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9177907323313383
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1913774807684443
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8474780264179462
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0328268498945263
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5298950069934922
tensor([1, 1

Epoch 115/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4638047341437206
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1972546513632194
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2026074018644395
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.494121097116615
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0834017823862356
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8485039788847781
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.2456408670817973
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.7759681993843404
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9608154558685813
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.908934600072828
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.2095450416080764
tensor([1, 1

Epoch 116/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.6203661921307964
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.513538861384199
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.101265146763846
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.834536112231149
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.543840000962321
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3860020711868777
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.292063644047221
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.922310486620326
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2816030349065564
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.2431768294787857
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.3193287607197033
tensor([1, 1, 0,

Epoch 117/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5474772278892974
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.4979422866985366
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8112287647970122
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.322354385412113
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7882718118023138
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.209862557103205
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5303998370968426
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.106798598127229
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2144813621007544
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7692330297156889
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.906054398385086
tensor([1, 1, 

Epoch 118/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3737631744127183
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.120485243640975
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8797955209687003
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.2509958031289887
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9358777268196934
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.776967000812163
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.6304579456147765
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.903205903635769
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.4032006171257336
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7916353824185263
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9111920230875317
tensor([1, 1,

Epoch 119/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.751015588016793
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3402031532352954
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.53783125046556
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.188281634497464
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.824581202176291
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9997160657960422
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3664134578757277
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.121104415804543
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.676482866282802
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.257264187569751
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5179716379153
tensor([1, 1, 0, 0, 1

Epoch 120/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1223656060592635
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2184016728793576
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4490942697327744
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7576659280352467
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.808288654960036
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.708170657224241
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.968285518407894
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.32796021037526
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2500504003099766
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9453401517457163
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8551318781862673
tensor([1, 1, 0

Epoch 121/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.680673356420996
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6033339692706012
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8731194881634494
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.9395557924470554
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.257634117794732
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.981126729642528
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.288781196599917
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2341041034149303
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.807550222413029
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8615213435480213
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8958342034365359
tensor([1, 1, 0

Epoch 122/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.865332976667241
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1121273356778323
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9759856860504597
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.513509476708051
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.5271731733123166
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.278443966108628
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.932508806300306
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0154152140227333
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.532324225655679
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.2893873923545005
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.028201501389709
tensor([1, 1, 0,

Epoch 123/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3576055633402344
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.185544893051302
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.6602841598452542
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4438772204739063
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8606262033635181
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.389551192681619
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.1397943554638443
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.7563630933215055
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.926133586947767
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.004739420644946
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6360609462391154
tensor([1, 1, 

Epoch 124/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.434500427027027
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.279108193179224
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.129005438726691
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5628979499269233
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.3331218503171924
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.158938005661629
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.156660880274932
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.6461055287135986
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2783650973501715
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8015603599961336
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.910331043715321
tensor([1, 1, 0,

Epoch 125/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.5961387332869235
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2106652629464203
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7109205147635977
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.692948424127261
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2167659139219307
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3607259276097117
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0698316649901205
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.4301357599519315
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.922947476998261
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.708972094618347
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8568089273598225
tensor([1, 1,

Epoch 126/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2772257209043723
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9872308395373972
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0880611050217226
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.608459146127892
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.3103808393442664
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9721950099393597
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1967754532888426
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1570907253943723
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.765202183960697
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.941353373936931
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8598019984703125
tensor([1, 1,

Epoch 127/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1107998672426818
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.6509841075580087
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.076511021915514
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4779764886137916
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9264239265425072
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.030934261740933
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7394983934723056
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.7569149609495858
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 4.392684118861592
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0432178647436374
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9073774209340901
tensor([1, 1,

Epoch 128/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.13843952002624
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6264545930125367
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.112022309530754
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.9946945674481302
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1174309252552126
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8472714451901233
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4855079793963464
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.66384789698207
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0194169958210026
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7334690722147854
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.4134895549661763
tensor([1, 1, 0

Epoch 129/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.301259806721287
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.429278695092431
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.0297950668835103
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.903463956841161
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.3457466672394705
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.394336940100084
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6356729265485845
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2603743042532383
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.003826126144992
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8148230864664332
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7741489128117358
tensor([1, 1, 0

Epoch 130/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9817305795477718
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.6951312773747718
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.0098631640534195
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5434497876539077
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.462195726071351
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.599230975354604
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9547586762737694
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9273085338669156
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.934051358592718
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1733412162722203
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7494069012539448
tensor([1, 1,

Epoch 131/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4760348252394504
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.144874105479058
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3468265228250735
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.900820041375265
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9067189630783339
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0586205360086525
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.403861038120214
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8824421057360743
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0850833934492092
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8238732031292413
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.1185548993372794
tensor([1, 1,

Epoch 132/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.688494216187304
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8526813700996907
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4798275586935112
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.950388584516864
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1650306876053502
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9498493906709409
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7864604342222963
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.278622369040102
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.4515702267078585
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.2246226859017404
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.65623601694682
tensor([1, 1, 0

Epoch 133/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7198840865280065
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8671144663258998
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.263204624763939
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.120970650268376
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.393924081715059
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.950918189775255
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.831808958282036
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8386187911352083
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.230147522159472
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7363364967032238
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8223309382789337
tensor([1, 1, 0,

Epoch 134/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7757045897471686
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.832974259456337
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3173048378207812
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.774125558545846
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.5676100515837914
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.463223446779473
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5622541995755426
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.437395441224373
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7725751719406437
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9077686809270524
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7688510549447614
tensor([1, 1, 

Epoch 135/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2439233815079223
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.094589993740499
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3824795868245228
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.122109037886002
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.003149189473878
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.75306378476268
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3368971232161986
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.4840013616830725
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.953271958249895
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.370882430556153
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8804377204811893
tensor([1, 1, 0, 

Epoch 136/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.945588316164512
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2186870421446288
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.678417515779921
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.675946633135915
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.104473223778595
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.5221225987224063
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5021492161211576
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3079757392898523
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.214153433949214
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.4999681743792848
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6815485656752975
tensor([1, 1, 0

Epoch 137/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.352791721066914
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0938673429459405
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9111905206118471
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4016002137488917
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.7290139506347115
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7288592783283425
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.2193176428868706
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.174757269418081
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.82031838405745
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.734173714910852
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6779111438883307
tensor([1, 1, 0

Epoch 138/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.2159067421452345
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.487319359760143
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.333180566115493
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.149268572267128
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.788578131790322
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4651025651625185
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8280278852739316
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9326914611128956
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.562578852010652
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6869664946818497
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6117649286746019
tensor([1, 1, 0

Epoch 139/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.237415851647271
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.425649260000869
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8045971333456534
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.254862236582254
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7738271885686308
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0783090572082994
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.965599783775757
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1152889460090245
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2829831212074656
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.748311774011874
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6034798591078232
tensor([1, 1, 0

Epoch 140/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.838359203527825
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4799838924967923
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5189363141993066
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.567287730925996
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.732836865878868
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.6077098710190032
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.969430611270718
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7711692713981617
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8546474205532473
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8529189214455244
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6387935718095454
tensor([1, 1, 

Epoch 141/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.827762902901278
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.371524044217299
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.449605001283828
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.179373899782876
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.895822396522707
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.668603879470338
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4829421997590417
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.923031290795042
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.6431419883327165
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8790480058392
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9239726319332642
tensor([1, 1, 0, 0, 

Epoch 142/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.358185607153003
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.428411076282446
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.7776588247719207
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.27367383350342
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.5469717615359986
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3238006184898725
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3598854357324397
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.695766004598534
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.992021528908618
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7994804013709906
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6177663605833947
tensor([1, 1, 0,

Epoch 143/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.208238373446434
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.5687547830149742
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.031633224466543
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9714623481709013
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6518109359073843
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.319001615900978
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1110691473779615
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7300868856934275
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9552381066528133
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9607448424183997
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.3614229191485476
tensor([1, 1,

Epoch 144/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9558739024311462
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7944051237134362
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7207440599457238
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.0353678633573025
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8802930132785933
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4448772322549757
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.071994760678448
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.7225677188152
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9543789076680667
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.748524423024724
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.1292321288828506
tensor([1, 1, 0

Epoch 145/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.6411520362223935
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0344871481638513
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5302233391154187
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.2633439701619396
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.3276653434801147
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9168643503653557
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8732839620578696
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.7062207912209146
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2910523284793065
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8454288252517086
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7736494586960383
tensor([1,

Epoch 146/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.845038181888332
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.863344053937579
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.7722544617173253
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3286301507493308
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.490583243726334
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6632138683283864
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4407324581125365
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2834237031082365
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0358322925249284
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6769537121637668
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7943610916826171
tensor([1, 1,

Epoch 147/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3677542231802167
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.062299606461698
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8891313363989126
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.248269126102438
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8011523471726176
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5558889104613334
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9264800663350874
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0967754475327167
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0881185041027774
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6850489703065348
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.776904642204141
tensor([1, 1,

Epoch 148/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.0951663948996444
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0277998472194088
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4642513792074383
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.727083141967511
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0412298943351095
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8579850733616619
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.758212676693502
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.541426318057585
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9103401490287255
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9597449594438845
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9066576028727318
tensor([1, 1,

Epoch 149/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5659358395762166
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7367718069610358
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.7579406988847182
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.873187749523952
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1530258868741967
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8688751870206106
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9948905528821963
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.794921741383538
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3082836625496266
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.888450345470622
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6505097387414727
tensor([1, 1,

Epoch 150/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.8220989875955533
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.324795906072509
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.491956380899589
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7005611914372207
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8080396037284612
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3619557665353432
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.6088083019857937
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4165259626589455
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.462253956277647
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.039564826886598
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6783221449867582
tensor([1, 1, 

Epoch 151/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.290023562827458
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7076220549047076
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2098633930870126
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.23760499602943
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9968024615733642
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8168360860052255
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.070519148978174
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.6531605652279096
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.186674832920321
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8815286799899655
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.4173016826121103
tensor([1, 1, 0

Epoch 152/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.460451417991003
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.580153021213635
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6774170362786975
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.1801722928035794
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.366134826347006
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4536354496171944
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.862262977392711
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7272256195378137
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3578665731740585
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.55381786208406
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7556488161733779
tensor([1, 1, 0,

Epoch 153/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.6930032975924165
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.271368356159772
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.844458484405113
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.0658862818299193
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0410958169962257
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5179383608263
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.582182952355605
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0979288835187178
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.5393317271900138
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0522017730962343
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9869204852144091
tensor([1, 1, 0,

Epoch 154/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.514324055937111
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0233788336837577
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6900165071312503
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.503539570276072
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6064539769945358
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.550854535897354
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.680925303086182
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8548506034431425
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.502659885474841
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0053156826470095
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.026427827512121
tensor([1, 1, 0,

Epoch 155/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.6115716460320777
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8859474537047753
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3032704787735185
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3226655209295517
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9707272777760045
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0726597211287445
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8624521968215606
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7558251017457986
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5063061946116867
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.186444441970411
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.1937719416409056
tensor([1, 

Epoch 156/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2832278816891654
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.103318431052123
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.7106541666520878
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.9496649861450532
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9616686790902547
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0639478682442243
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.882649554391474
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 4.044825364671599
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.077418249283576
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7106440597611672
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7821830009290651
tensor([1, 1, 

Epoch 157/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.7910115159225186
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.349583829330107
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2661301289440625
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.110292157251505
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.398286614239293
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2565404310739328
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.4299008828504256
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 4.291621443194777
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5985487428583816
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.818089135215854
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9860093742304172
tensor([1, 1, 0

Epoch 158/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.979398247793191
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0147871696338244
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2819880623695146
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.52203300380709
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7454347382527549
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2018331424153583
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.092038990807694
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3252381059822
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3991400973518764
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8766756837716192
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.347414839641252
tensor([1, 1, 0, 0

Epoch 159/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.424062264065128
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9524849862589588
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1866877850252164
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.874119102209864
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8048557546198591
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9697795001208616
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.976058409891641
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.4501481865048036
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.600331420329003
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6711750149040838
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6524433092832431
tensor([1, 1, 

Epoch 160/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2499604203678945
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.707573947784626
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3078268035110443
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.0443611481228436
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8768620681588828
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.292615399585406
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.787019679637706
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.791480728726259
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8379151263042792
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.716431830082536
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.5653873398042855
tensor([1, 1, 0

Epoch 161/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.870852578229442
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8305424044324639
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6011179424393616
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.008583863380443
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.772450307010697
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7020301929972015
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9126844654552237
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8055942717158313
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.363280863830938
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.920630026068497
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8235894103416135
tensor([1, 1, 0

Epoch 162/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.601604897467011
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.446672877687913
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.0684608625563587
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.33274675691173
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7003869398303852
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4048244272468993
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5311143735921657
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.586063653789277
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2964495994319796
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7778751174013325
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.978470136301332
tensor([1, 1, 0,

Epoch 163/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.5697291815306462
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1093968156013387
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2821769131750096
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.572507142006665
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.207664857275268
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.466548584729658
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.640992050016115
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.689373464870376
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.266298958962738
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6228247606380752
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7056422390077386
tensor([1, 1, 0,

Epoch 164/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9031176509036722
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9965533431059086
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9106622813585747
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.6226164531631935
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.614072333723526
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6426965714033757
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.1938566347882342
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6950693207706125
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.304996876904087
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.830729385497344
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.771497625009849
tensor([1, 1, 

Epoch 165/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9730952760065685
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2767349590548784
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6693983457506953
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.5270946131228746
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.585755372532505
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6503747007060072
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.90118562312768
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.772983277315315
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.741949624753588
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8389144714673293
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.731256771651027
tensor([1, 1, 0,

Epoch 166/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.410167103413322
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.291378840655542
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1065473055760826
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.624794859442133
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0786531796090615
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0490376393675995
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.2099537636036928
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0371143576097976
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8264192565821653
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.3456138196823093
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.84353277736025
tensor([1, 1, 0

Epoch 167/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.089944003875216
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.200587976552858
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6825709968227915
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.719705311744801
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9324202903417542
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3875482534222185
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.8591071825475383
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.868345614900056
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 4.079781654736966
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6757936115804142
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.013744564294347
tensor([1, 1, 0,

Epoch 168/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.078895131942314
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5110814319488095
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6242404090283
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.71894532530138
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.3297078918468825
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.840125795736999
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7957171811776185
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.7039951110410487
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3541727116811693
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.025804675867137
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.733338997528465
tensor([1, 1, 0, 0,

Epoch 169/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1868751943595965
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2539762407810153
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.292429135100924
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.375481904424644
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.334168172348054
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8233498084975952
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.678969346363531
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9984848711161964
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3247259389821457
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.113745556409947
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.4847035922301672
tensor([1, 1, 0

Epoch 170/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.285139626547539
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7842002095260907
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.665408833279439
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.4114655012832005
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.175798839006549
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5194604939644942
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.452073118236563
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.25129592164658
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6375739161499543
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9315764245661673
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5610223024278644
tensor([1, 1, 0,

Epoch 171/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9885832264116914
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.648796902483185
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.339575927466236
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.0144597456491127
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9233192737783706
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8390875263623827
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.97643238385244
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9121635233525067
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.488120452520552
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.83180853193168
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6671523037684688
tensor([1, 1, 0, 

Epoch 172/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.271677853399367
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1608218990306285
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.7682272362642926
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.117014916184215
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.3416564446341916
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9166314648547722
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.685285617060388
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.4737866145700096
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.8797675121293542
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7213365195232486
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9246744637446886
tensor([1, 1,

Epoch 173/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1580838292399744
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.545390665775055
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.0963234335186076
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.50599316976767
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9249827151495906
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5296266416666655
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.643139761294009
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.544577415807604
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7280580232313847
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.3747671239914716
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.82995745775084
tensor([1, 1, 0, 

Epoch 174/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.81467733252999
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2822192324132993
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4026915756821454
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7689466962098965
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.4799010540345625
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8589118336922716
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3302581082862655
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.417677695859325
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.226539493796798
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.824982402230488
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.3107411934077424
tensor([1, 1, 0

Epoch 175/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.386882944262737
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.564663402340187
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.610065272133503
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.918351592251057
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.122647338267998
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3894706289212424
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0469462455627516
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2611842309359047
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2933123328285934
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7614122673462143
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7434660673432865
tensor([1, 1, 0

Epoch 176/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.0428623329105893
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.266692663810644
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3002879397560836
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.505325803547292
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.5817192557836495
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1045509776124263
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4440537921247967
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0756518691789827
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.828499737894914
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9619078238528496
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.1165086334340217
tensor([1, 1,

Epoch 177/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4618672057912807
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.205852178695642
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4739941727372177
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.713246384613339
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.4121717690681663
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.602964126174073
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8727429526676658
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.4504843614807723
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.7329169876991113
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0455536900029014
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7734073590326622
tensor([1, 1,

Epoch 178/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.86600231796381
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5196667075717385
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.129984546665741
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8790020402795014
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1214380237804216
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0969481617033026
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7410086295578697
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9918931224441847
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.830573994056073
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.5181413526668615
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.027923292433164
tensor([1, 1, 0

Epoch 179/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4592485192395728
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.498271863500578
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2714856795104876
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5708927810249813
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.052915078119941
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1919050390786463
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.271479429614846
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.934139818109379
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0291364302149884
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8909343760840307
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6368343873949014
tensor([1, 1, 

Epoch 180/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.973708384111472
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.833304673348054
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.148295454465351
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.084759792077463
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7776793604015162
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.965924731765127
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.2925143995764286
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2340852275016565
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.232354626131717
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8157408613697685
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7273447289984862
tensor([1, 1, 0,

Epoch 181/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.136679538859917
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.970431927326917
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7318353256449734
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.343444561714664
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.940901598196316
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3241234825131487
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.065954151467627
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.374112322268496
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.649020921530893
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7612761838645654
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.217593412622364
tensor([1, 1, 0, 0

Epoch 182/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.252229487543954
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8771970895234982
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2984274312624096
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.2118935713259473
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6333721386737605
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.122022327435988
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7657757502949276
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.898991712738066
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0097466820219183
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6981057506345618
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8303659675904596
tensor([1, 1,

Epoch 183/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4452043543543436
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8247413682979177
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1966017556069786
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.842780986416235
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7994277609298954
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1654325361699627
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.030740507448111
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6868103999542474
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.488991549364059
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.152509215672574
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0056560984778167
tensor([1, 1, 

Epoch 184/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.8900824645664094
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5654091215844885
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6812744742340704
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.588178214052848
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.4274735167425825
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.731841587039324
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.316477985848002
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.5940146436091913
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3978830378383473
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0694584018234337
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.752027299449584
tensor([1, 1, 

Epoch 185/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.609452136255174
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8732989689154604
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.303110349080814
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.714609434467585
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2763205371840036
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.602156869878547
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8874633333895665
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1815090915162156
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8845154160273583
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.023173261022802
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9716033671067181
tensor([1, 1, 0

Epoch 186/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1425229310246197
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.6364872597784004
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.778708461835169
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3160288305215904
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1391658845663124
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8841716381824791
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0718516141387147
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.951115742626595
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7547936598374556
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6477991790074966
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.3970389879694955
tensor([1, 1

Epoch 187/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9707223602421804
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1086942813491016
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0180589769635717
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6688730794266045
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8861190252240392
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8389403060268779
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.1544511704822575
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.9203534780044356
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.74263474115629
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9194146754243708
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5046823964740634
tensor([1, 1

Epoch 188/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9232404540000334
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.895848920764302
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3041061907647324
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.485076242846613
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9655025488276918
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.383518485373059
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.5265101207868144
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.836683910035288
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8597496627571304
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9268303325161666
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.21612651381488
tensor([1, 1, 0,

Epoch 189/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.921938621684576
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.055022269008027
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1430750420541034
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.50952314556907
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.4741769954629533
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0516641809529177
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.724981370402215
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.606985266941795
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.202767516562787
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.657674059602998
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.3075859802277696
tensor([1, 1, 0, 0

Epoch 190/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.677094623252074
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1201797966420624
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2925177774457905
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5963233205034775
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.433967295144567
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.25026691770101
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.355173654271347
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5693849212755104
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8666267430578527
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1298476619448463
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8637434388772118
tensor([1, 1, 0

Epoch 191/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.421837125847739
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9493056074815034
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.656155236772683
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4220422791793106
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8880891001049103
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.266058175443976
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9630387284490536
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.4473032909496397
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8707456071845945
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8711245335005573
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.649874452642867
tensor([1, 1, 

Epoch 192/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.0497397844509555
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.307448986306852
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4983325223993207
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.0892882273724815
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.899035985774031
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0800561523587655
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4961689494135046
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.156958429092681
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2879486494032326
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8769634311687982
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9434044241361013
tensor([1, 1,

Epoch 193/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.793893847828202
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8128920326108924
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4784266595088305
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3726248918378325
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 3.0072901748153282
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.087893922534262
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9974070046955386
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.085642113949769
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.010195993073057
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8087671748848584
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.888824939991657
tensor([1, 1, 0

Epoch 194/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.71844683752831
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.452031629639444
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2544248245937384
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8523332066697504
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9403235078393042
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.302085017661826
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8331439927023667
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7343503433860423
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6309336168032504
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5647242377421975
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.698824559239111
tensor([1, 1, 0

Epoch 195/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1723432900845308
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.834471550893534
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.733848494191549
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8892038491819423
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7103170435600341
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.91788349807368
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5378438324372956
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5604502035294203
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7565866674814643
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.472697910988385
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6442766089928933
tensor([1, 1, 0

Epoch 196/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.420402447838827
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1220532533830045
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.279590386674937
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3752723310691484
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8882958613338785
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.6453085349328649
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.055529244499561
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7144528660093266
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9850346343866305
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.2349128603943154
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8591622077409664
tensor([1, 1,

Epoch 197/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1237346087345705
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0807141075131907
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6612208154416677
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.139942863804034
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1272550265794306
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8879099848617404
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.102715857908761
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3074260723612867
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2212469193409228
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6519354389201353
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.194627728490993
tensor([1, 1,

Epoch 198/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7842266838467356
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9046979281576009
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.341613764407584
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9744670982297943
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.3608397832341566
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3655499448762094
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.641005854751219
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.9859493861019475
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.706394616082981
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.826494315864816
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9915928237342038
tensor([1, 1, 

Epoch 199/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.6691030816275054
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1934387516013567
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.250323751989357
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4222706763702893
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.023490783254333
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.204586486955629
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9835020729631885
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.516434535573036
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7197719587050604
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8454305726276736
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6710737655624057
tensor([1, 1, 

Epoch 200/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7449007274712085
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.401546564335909
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.969524218070508
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.9780451049869758
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0392836408666586
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3306886499414863
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7308492551543173
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8659345443311746
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.032633986214915
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0727546015398306
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.1959519066317954
tensor([1, 1,

Epoch 201/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5034290370397683
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7800643185093057
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1892441409141137
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9023354468110085
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.062206343843165
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1823136236034486
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7049993937855112
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4682952117526065
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8875487145749603
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.819206562828228
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6984638305182171
tensor([1, 1

Epoch 202/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.817209002022787
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2912470903820825
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3037559344156624
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7038786055844337
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.142157490962679
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4904335650359974
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.8323855147515675
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.627506718372372
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4281811731112435
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8700058914324562
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8598888017760848
tensor([1, 1,

Epoch 203/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.27224545368891
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7741888013249656
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.705966007779321
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5473791557457623
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.5996286369692978
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.7278920916877378
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.726095679575304
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2362287310230964
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6695421718921963
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6664049593823576
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6872732753275088
tensor([1, 1, 

Epoch 204/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.043156569800387
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9302110662364724
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.199399191822066
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5091182794797344
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.220784442734636
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.050854960115767
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3217440328177354
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.973594187796088
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3020192602816554
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5899463343938904
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.2593567748216503
tensor([1, 1, 0

Epoch 205/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4171650321888785
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2371159162873626
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.0315962052188805
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5696094353384855
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1625919683713977
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9406919580967632
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.51290062596508
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.709554618630448
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6419594517070917
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.657730482292048
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0500391322968206
tensor([1, 1, 

Epoch 206/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.665663176563413
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0880068674698533
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.0940959155420487
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8573564815665966
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.5364884851726996
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9020012635229662
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8461709193159193
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.916718799518268
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.4469531712056733
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.734174953700523
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.1280561438910293
tensor([1, 1,

Epoch 207/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5218564302385533
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3045620801824453
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5690856141089853
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.528725829194538
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8544017122240606
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.7773075527215356
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4321279725588103
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0346460307202214
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.841500670453851
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8543841111692243
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8296685082852804
tensor([1, 1

Epoch 208/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.424641330189079
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3254555020152976
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.6554245763825592
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.781610910960449
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.910794698639064
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.5928472376668665
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.702991781070948
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.399338832979244
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.785033110230482
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8073256492142897
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8929409552259011
tensor([1, 1, 0,

Epoch 209/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.078374637705316
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.743969818233871
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9938548624238206
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4917058529343508
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6209689034947554
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1379478537486323
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3680089591769438
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7557423252603575
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.467433583989819
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.862919868076542
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8014353905278293
tensor([1, 1, 

Epoch 210/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.793180863010573
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.25234198792951
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4510934302392084
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3401635607868005
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7793664100148046
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1409062127963905
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9279829385690057
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6330091242014255
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.736565628311093
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.374238851269658
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7206468049347208
tensor([1, 1, 0

Epoch 211/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.865143988742161
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.030321295321414
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1763787361251867
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.8300561042275305
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8562552000590256
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.5589911377461931
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.57376077487764
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8637630761270385
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.180899425016832
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8513421047629843
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.264936242862058
tensor([1, 1, 0,

Epoch 212/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.799758313786826
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.67746734983773
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2950252995798004
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6177088483401674
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.5511238870011304
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3489942166640234
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8363090934059008
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.907725128952713
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9726303187352703
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0044445473896273
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.412033130185228
tensor([1, 1, 0

Epoch 213/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.518172603582712
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.183470549842633
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.886481999951131
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.362092434851877
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0055963084933532
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1624124427081854
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.089998056634242
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.795663110801101
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7541670605291615
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.136531064589682
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9919723838202112
tensor([1, 1, 0, 

Epoch 214/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.6047597223716905
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.495288856792037
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1526591263777304
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5982959964865313
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.5372820211353537
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.6656096384751016
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.547423431838311
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.000189519299897
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.6951068133644394
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8448587609652747
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6152264804388217
tensor([1, 1,

Epoch 215/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.247049538491802
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.560916604838933
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2150893073072027
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.1694016052789813
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6025916619501033
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.872769074874176
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9447556000045028
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5733639006267817
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.269164732403337
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.148442836008316
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.3578771780960452
tensor([1, 1, 0

Epoch 216/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5579205208846725
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0502203698048866
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3193975700957545
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7305493127945857
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7550763397320763
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.044650383134523
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9193029543381703
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3739433823863685
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.4482722117509312
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.888038978047879
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5253240469331637
tensor([1, 1

Epoch 217/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.711361735329444
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0365912651462406
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.551250063527567
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.357216071364888
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7697513821844164
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9986426354297202
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9118666418731722
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.6070265788963125
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3668038306591153
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9214290685991904
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.2866820455910046
tensor([1, 1,

Epoch 218/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4082158903494806
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2171574859039698
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7781069443301143
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.629347695349144
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.945478819067449
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.172075320433823
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.938082773894008
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5442530416225035
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9190778113952964
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6963713880393971
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.062478443776969
tensor([1, 1, 0

Epoch 219/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0265372753454893
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3087955268158455
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.0753739501526347
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.0081016136222787
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.865553486469666
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.727417292735899
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0887465580860294
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.36534415373058
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5595571354044164
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9093193964269812
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5857968303363676
tensor([1, 1, 

Epoch 220/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.136143850968621
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1144544343181972
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3303990580601353
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.1029364893219817
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.5703289103148712
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9706653299421504
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.320375529452902
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.469327069829997
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3883182661969014
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5861363778409832
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.1313181169604576
tensor([1, 1,

Epoch 221/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.854147622150124
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.5762148702153826
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9658906090982902
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3955691106263495
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9140021600132913
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0529012758945386
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6155021567554932
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2714976772052826
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2567594657427694
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7224720056476657
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5286026769152297
tensor([1, 

Epoch 222/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.395188512184164
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.184115497343703
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.033317341320322
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.2874768013650058
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.071898658968438
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8016759718614952
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0818259622144897
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.85749096574763
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.5509307675811783
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9927754660604329
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7981150721528316
tensor([1, 1, 0,

Epoch 223/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2453835500959407
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.07292544704878
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.073485251791879
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.9795737832402094
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0759459088130994
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.724753303924687
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6463270174772404
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7367195247756158
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.6566329681267344
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8138775939377934
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6055490521983649
tensor([1, 1, 

Epoch 224/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.119715768410211
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.038708010574044
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7101387145493647
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3193550514163146
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9003829377387012
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.299977185203835
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7839634232759236
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9373700725375675
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3574427242815355
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9881157542492076
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9631834243951298
tensor([1, 1,

Epoch 225/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.186140804977997
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3466223384303246
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4741304365437546
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.1913145313432403
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8806565088486014
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9392021850529386
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.953032939660669
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.403491767699842
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.081792005473748
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0202288597147366
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7364585399094732
tensor([1, 1, 

Epoch 226/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.436121037975425
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9232590552576578
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4621601858610873
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.396938505525653
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8613956087935968
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9733234584533272
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.154045288650126
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.207604000216128
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6423301459155266
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6328809808867921
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9411319242083676
tensor([1, 1, 

Epoch 227/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.174729561881511
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0868153943697543
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5595481694714968
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4248952138569755
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.056021076197301
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2846048009000977
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.640242948849754
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.724160579659468
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.435169026432609
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.900584017079027
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.1689604208731508
tensor([1, 1, 0,

Epoch 228/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.217713654196594
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1394991751167707
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.210264672477296
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3610336369504834
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1504547959693774
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2762062410895223
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.1204791624541692
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0272140592078274
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6039076264453223
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0097335337550852
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9158938032705743
tensor([1, 1

Epoch 229/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4917263839404344
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.434795462501798
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9558160401910019
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6513932050396316
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6479153520485406
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.210259952429091
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.2028401145672096
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 4.172325325260342
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1822938924140365
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.694800417214213
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6753010830691164
tensor([1, 1, 

Epoch 230/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.795723645186186
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9047460527550883
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5719674587641737
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9651365144619968
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0953415513867237
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.325287190640294
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.132661218115186
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5597513571248376
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7446701404956255
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.3669478301623377
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8099801671745135
tensor([1, 1,

Epoch 231/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4040803860775632
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.043915549192165
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6097230101727917
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.388359863611331
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.6301558239088134
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.330746077244375
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.721043412371567
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9238018642641976
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8218885800762
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6064940999981971
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9815411583475682
tensor([1, 1, 0, 

Epoch 232/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.953338514848922
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.070522984141711
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.06079369391656
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5773613719720636
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.5143691938953456
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.013291904952491
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1820592978531526
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1957962104149296
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.235142077159392
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.893561641769789
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.2632151390805495
tensor([1, 1, 0, 

Epoch 233/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.159748883629868
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1099630719523867
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.549781235327601
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4446481747357414
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1701513348874415
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1242420465862963
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1916778619692505
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.940125051609914
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5485155275737563
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8363112406422517
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6915288985966355
tensor([1, 1,

Epoch 234/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5181712402648424
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1673582026044214
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.257398464239389
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7460893332706995
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1224208575440064
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0787655567172703
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.964856905657017
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.964473631868341
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.843974358574922
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.126285315247097
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.4484420413455084
tensor([1, 1, 0

Epoch 235/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.911475593382331
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.157989120841046
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.654526817022313
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.465921919719623
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.768862788080311
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9232119565036296
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9976348932694474
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2517533946568817
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.414266241744635
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9071540453180216
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.108651365648429
tensor([1, 1, 0, 

Epoch 236/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.974755466744004
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.698129300993264
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1405575235250236
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.985420365261217
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1013637603739466
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.5997835692722269
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7314646270367073
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2298097904723675
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.476703075787467
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.766317898823681
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5634370937598192
tensor([1, 1, 0

Epoch 237/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.611710235220226
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.379813953310636
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.7910293049832058
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.2526807135276266
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8571160900153358
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2408188057328635
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9796177276667097
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0456985269063503
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.448227871838942
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9434839826615866
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.4268856930619607
tensor([1, 1,

Epoch 238/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.6331233971724815
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7720327755097054
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6856736240620003
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.1982697315372004
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8537015501006833
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0915404149524353
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6060577900477946
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.4256697524862716
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4953893026370695
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8798114105885142
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6922262153840288
tensor([1,

Epoch 239/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3493432216076364
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2657616342887152
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.025056544448994
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.445354570388132
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8093145637787689
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1086647605689066
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9887877930422087
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.383235396552305
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.446195904878604
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.386234375426207
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9542647892893827
tensor([1, 1, 0

Epoch 240/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7837585277422443
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1338133683589873
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.076910756946929
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.992108364614848
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.4971762749800948
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9431155837021064
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.022335742353915
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.88692550692656
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2725226583246125
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1315436483189942
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8919475253718951
tensor([1, 1, 0

Epoch 241/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.809959473007419
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7266657377324695
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.468541935326795
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.617647202771821
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.110166500073069
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4427270546668822
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.94563717537429
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9557160021278777
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.619466522409029
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9699806860797606
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9141309765592487
tensor([1, 1, 0, 

Epoch 242/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.342726989140909
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1958104037262616
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.373621591210646
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.594940796193468
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.936352002354166
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8881727783582756
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.88435404786208
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.365758349061971
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8535889112284063
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.29822302854725
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7156818989201335
tensor([1, 1, 0, 0,

Epoch 243/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.634263546162099
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.237236230726231
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.290201561450854
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.976712464009222
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2134554612247372
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8129900194073427
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.3511999178088097
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.439556358621364
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.081628813033891
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5738441056913268
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6896695223272462
tensor([1, 1, 0,

Epoch 244/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2281888328205466
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.945256949336622
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.300723132019144
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5719517693736864
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8466318344501635
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7984381746160474
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9202684453422854
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.474552677343695
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.192213556708966
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.75521386122781
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9952346876048392
tensor([1, 1, 0,

Epoch 245/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9432768395721527
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1210018258013066
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1714412284313744
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.8824231191949619
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.345948326709986
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2825761242853764
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.120906200704236
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.036661613176491
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4156862522340985
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8599898335622342
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5032077353631839
tensor([1, 1,

Epoch 246/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.111957649282714
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.515201949445295
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8008469781625154
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.1732159947660548
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8757431659264758
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.6974767151055488
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.688661429259611
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2520366134314
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6674046941090936
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7373617211885939
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.4150976493428216
tensor([1, 1, 0,

Epoch 247/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.86230052818999
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3417797626453036
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.24598004254997
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.002139063750771
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.467479367976068
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9923006522069477
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.528540147286275
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0020231070828283
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.590449277501688
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7544172806307903
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.3887037966879587
tensor([1, 1, 0, 0

Epoch 248/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.923853863103471
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4460628130887914
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2938536468907103
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.557452098843412
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7558858781902744
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.829814390829191
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.598701825810354
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3059214172444835
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.071406804595466
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8256404532840476
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8376534709240424
tensor([1, 1, 0

Epoch 249/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.648686942254699
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1214631148493033
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.736298158544859
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.0510123788127985
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9159223703254058
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9749061304023094
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.863152381503202
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.207535626109146
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9253835104115185
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.651408921098871
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7160742827278732
tensor([1, 1, 0

Epoch 250/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.0159625398830037
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.150579439628885
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4979926327476285
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.694614350647438
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.295407478292865
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8551041114511593
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5231854313402824
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3575334689541694
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2150911188047555
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8475059849171755
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.4563715353622784
tensor([1, 1,

Epoch 251/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.480628366965864
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3836350048187724
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.709522310557067
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.546645639438005
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8503763198473473
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0797905309432925
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.1943400765176917
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0054382000279865
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.4784102108488555
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.042045907996871
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.922744246312487
tensor([1, 1, 0

Epoch 252/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.898665132810572
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9689821033373425
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2808166574736415
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.061687255439039
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.4941761574743098
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.15910693796631
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.4100098559053458
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.8650670441702877
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.102113737692168
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7532703170715103
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.083534350755187
tensor([1, 1, 0,

Epoch 253/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.6919689126979174
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1487838726608883
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6914770769607514
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.1217336386946193
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.123044924659161
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4330151189069866
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.791681348364177
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9068832446901935
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.200092943039457
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.2367090600400537
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.4498459227148985
tensor([1, 1,

Epoch 254/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.8016495527692404
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.078098585494261
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9683187375850646
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.567185704710369
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8128175781836382
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9391869692486952
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.6875734729807013
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3935872554712163
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3684701765401788
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.469243750841792
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.3998643115245815
tensor([1, 1,

Epoch 255/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3169204603840474
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.586681949996853
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6953642333429326
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.059168765074816
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.3666267552118616
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.795867711165941
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.2641843504426213
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5541000505450686
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.4770442785109252
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8187274146981058
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0750582970646625
tensor([1, 1,

Epoch 256/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.492489313987749
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8527581211790427
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4278102021835606
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.754258522137486
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.062580095372222
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3731731073537934
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.086127231883597
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.492382390103211
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.798899961860871
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9882347282927886
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.1862767444662468
tensor([1, 1, 0,

Epoch 257/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9902102061211706
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7128993925671194
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.624946697364877
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7034335429622613
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7885879333912738
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8187247876842683
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8957357349003012
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.021284420436359
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.757640770491325
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5850105129060574
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.045958862077721
tensor([1, 1, 

Epoch 258/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5303870348872346
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8337617676102798
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.404034851329057
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.318087122204914
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.6073110997604068
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0692661443140916
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3855115152478676
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.312800971045815
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.234224414413569
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8124987088539242
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7728377291058734
tensor([1, 1, 

Epoch 259/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.7068649357749437
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2862634579710446
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8468230159316832
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.979677682621678
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2770981877970065
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9149222647293092
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.989584063569293
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.278091731765987
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9006141415483326
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.93833106109607
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8271251453224315
tensor([1, 1, 0

Epoch 260/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1407932446954154
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9515041446936072
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.246163800825325
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.8956084291956006
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.315004888552711
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.075438594569121
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.699734419411314
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.377608824023528
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6700249083963263
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.743733469213415
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6477593900407355
tensor([1, 1, 0,

Epoch 261/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.229230955277745
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.610120726106286
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.579392307286035
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.491723297174352
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0521481652853213
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5001259809302905
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9402580745254947
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.8346270810647596
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.881831274082958
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1101756712640314
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9236140212043051
tensor([1, 1, 0

Epoch 262/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.173296780729474
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.945268276077504
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3696594527653145
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.737527473371141
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0601901057969494
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1467665286667144
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.99091656279966
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6280879319813786
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9546765285156225
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.032237999234495
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.678657654106683
tensor([1, 1, 0, 

Epoch 263/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.8066166740357654
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.110338802318911
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.728355790406957
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.0387465065214845
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9904757795705614
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.6669156802015417
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.404661749859796
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 4.11232489379957
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9820313648203562
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.2926956998849914
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8851295758348656
tensor([1, 1, 0

Epoch 264/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.727650300918903
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.063008461924481
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9097225661473518
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.328873503402481
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.807522807962557
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8922109387736998
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8945490465669046
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.276208487802742
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.139690001241405
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9671104445453413
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.647302868833066
tensor([1, 1, 0, 

Epoch 265/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2218119497103617
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8877528523930134
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.427014465002546
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.370653293227246
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7329273284078262
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.47353226799543
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.449114482336621
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.533038254084879
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7552230003947566
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0572736632966206
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.744639920676878
tensor([1, 1, 0, 

Epoch 266/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.411063861348029
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.4610613429701615
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.027591170383536
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8937569231060354
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8268252070503794
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2392819246133255
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.3437991157547495
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0288961137396417
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8878487890233178
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5535184751788507
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7913870906268072
tensor([1, 1

Epoch 267/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1167644243959156
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5597533053595534
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7495986806668906
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.378828992287546
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9466244107434298
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.992038084661814
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.829019588743727
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.023552264895301
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.770714474078947
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9168251337957773
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.664572348325184
tensor([1, 1, 0,

Epoch 268/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9629574262769585
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.075335773473594
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4747384816094837
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.687331053542423
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.5754810246602435
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0637921089452207
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.2438569214005732
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1482193272130847
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.754861050425978
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.74524802939305
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.94937428430054
tensor([1, 1, 0, 

Epoch 269/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7002066961624593
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.167056939053611
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.193325076310024
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.949277346346511
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.6154733786981765
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0776483949685023
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9345308533601595
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1983136904848553
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4991924595233614
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8707202114970978
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.971278242989269
tensor([1, 1, 

Epoch 270/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.441804166339925
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0995456146305598
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.111362235066452
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.132836451743488
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.26737273259034
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8637583530177895
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3888789011240603
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1378292443227327
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.5712008237419077
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8483733641285889
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.93413257436259
tensor([1, 1, 0, 

Epoch 271/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.683280212074799
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8747266316415734
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.91728658490051
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7873478713792488
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.889243734640508
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.568980592228848
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.96906177216561
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8340587472412353
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.755310525792716
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9529783551123752
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.414000406565555
tensor([1, 1, 0, 0,

Epoch 272/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.487001234661165
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1422190270146073
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.135022136228942
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9777764899467747
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1434020822300734
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9465177131186409
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.955699203040862
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8750853448264926
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1359373552537964
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.808095740622104
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.910793980514682
tensor([1, 1, 0

Epoch 273/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9997904569592007
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0594935873374167
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6401746553287286
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.384750044472958
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.042355501803718
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4424599428488567
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5795358101688386
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.648309548366393
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.641242761101587
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6557882506194948
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8781621946439728
tensor([1, 1, 

Epoch 274/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5585438223389363
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9212074740771532
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.6101143990984106
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3332507116531738
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2163533879423287
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2625562498951033
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.310320320222743
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2683055838553705
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.297971866110519
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7041134917532956
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5725786959502928
tensor([1, 1

Epoch 275/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5795609029257984
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.226533384954697
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.7008712771707648
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9824416320625518
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8277522690656978
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.2381644068849913
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.425868792131262
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.5499874476073203
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.234263128812899
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7243798030455546
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.4662729854231977
tensor([1, 1,

Epoch 276/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9079882857665904
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.115916197023545
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.534629887343919
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8056327852718645
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0320810105171385
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4692399952608373
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5196565281024705
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.4460270676658693
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.845072321615086
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8449917932838498
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.073847328060873
tensor([1, 1, 

Epoch 277/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.8055152337668883
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.143473883681347
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3904916951532655
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7212080838643633
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6227777776283872
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4189925157107552
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7125517433059425
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3959995698743346
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.999802811548866
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.736294587406176
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8990212596473273
tensor([1, 1,

Epoch 278/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.387950273769016
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.6945382338729729
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5503397461487607
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5507746287562005
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8281104894627627
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.316294195420911
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9581924107632664
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0734422730200026
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7446980128051854
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9062977149478635
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.8273763258460782
tensor([1, 1

Epoch 279/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.0783608282705424
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9770104261017794
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4449102035649934
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4470113946280336
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6540463721301188
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4390993903199276
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8517975458558253
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.090293184135618
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.208324571226341
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.168909191811326
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9333327711259498
tensor([1, 1,

Epoch 280/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.6602744359626893
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6113963329044707
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8718561300097183
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.2975391448796345
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1509441648201197
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.278430216852176
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6273810865667655
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.4747107876168135
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9928696883850345
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.829461920370483
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.864919454651989
tensor([1, 1,

Epoch 281/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.109317275022385
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0294312567290467
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.279240523331649
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.1415777571096593
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.760626870295747
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0606205058995637
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.1414353484318727
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.398329697374233
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8913149065901256
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.894426629538326
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7383854258771199
tensor([1, 1, 0

Epoch 282/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.8060448379001164
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3031984642582586
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8757028636142021
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5747911343059546
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2463532501275916
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9998965595179956
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.881596227994193
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.5287615858270014
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8593523384483888
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7061706221687813
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0574182185573853
tensor([1, 

Epoch 283/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.385043719449372
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.530323512556674
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.088988085072328
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.9411346152312963
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.062845412771944
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.72077653623768
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8014819491035095
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8235745538477826
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1409294285429614
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5872997855537678
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.09130995832379
tensor([1, 1, 0, 0

Epoch 284/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.084532632300884
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7053360655203738
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.789936024828522
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8085153637196605
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.110280167815118
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4957125526222987
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.365758338529779
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.220057045351339
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7549823304210013
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5608028731790418
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9021170025708898
tensor([1, 1, 0

Epoch 285/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.173341133186348
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.225468079245238
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.214279104556168
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4752934610034525
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9310097966918323
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0854114046294536
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.220268859028967
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1417745832416646
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.6086189278513383
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9485058362516037
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8470436103673136
tensor([1, 1, 

Epoch 286/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.475876914526535
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.090089953947111
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1332427523829787
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5306300566602093
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9655426439981176
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3827415764047433
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9643516932640948
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1779999854032246
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.49295505470366
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8708473009741424
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0635286084157904
tensor([1, 1, 

Epoch 287/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.694138310980823
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9443777780898692
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.021304456715377
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.090357252150958
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8152155561136676
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.6213320068216737
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6372874944202693
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.7400500099068243
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8649705884484677
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0445970026160314
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.4076515140327244
tensor([1, 1,

Epoch 288/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1316941419591378
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9080634608119071
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1249202400811624
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.991878809527038
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.240711909583378
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9141004239816577
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.257219577335543
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3195080742018273
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.076175053863093
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0324515155022773
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5225820704739825
tensor([1, 1, 

Epoch 289/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.362367494776071
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1290778352479847
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.363081936081229
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.351903363972921
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1429150654334923
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4216166833185904
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0522453483137624
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2992423652463754
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1922745323123287
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8048355008047856
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6822713165333902
tensor([1, 1,

Epoch 290/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.234710958261876
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.563533790283073
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2472576236610977
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.4485129530841565
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1005917087430497
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.342956820914946
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.451131547835271
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.506401787611565
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6055939899896674
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8574341458747805
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8123189795332117
tensor([1, 1, 0

Epoch 291/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.6680269610763716
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2649513540026787
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.065635516222017
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8038633354883946
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.3116171007694435
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.167147969135548
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4755166261135977
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1864621261964574
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.957849846351686
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7852216683766438
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.1010148526330648
tensor([1, 1,

Epoch 292/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3097974468743185
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.507222994705415
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.369416849742638
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.653652989381901
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.088734429407561
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3541741886112666
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.5480974546433615
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.11252901140557
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.389535426272821
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9242694322140126
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9049032431919164
tensor([1, 1, 0, 

Epoch 293/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9589984394354607
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.072208632051096
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.985130614202553
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7375745335834307
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7839527991274997
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.808168790404988
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8327160868808443
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.868860477628259
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.688000200217453
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.673553456957054
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.873267289254698
tensor([1, 1, 0, 

Epoch 294/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.0022809021539025
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6550708181628426
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.512544147162087
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.1536656223389867
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7700280388048806
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.333644632439708
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.292088847587207
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.742406368985983
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7622090211841552
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7432689290104335
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9692127180155374
tensor([1, 1, 

Epoch 295/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.479800445767652
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9873442164798798
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.018891476362979
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.8500039138412498
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.709633264664258
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8945309213173258
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.987789905398829
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3975548712124812
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.634944835830833
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7419723573887327
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.801782025731684
tensor([1, 1, 0,

Epoch 296/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.815184212965873
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3676665528990033
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2990450810792775
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.883488860254955
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9325808357211003
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2615004931496983
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8019137006549344
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.6341450044448766
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8937471183185624
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8608023477617022
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6716648644541214
tensor([1, 1

Epoch 297/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.963477027928518
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8132170787558337
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1794084255700064
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3612262813293383
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7179532437375102
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8023688924643815
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.229191072779492
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8889386364577185
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0234308135905694
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6676258290970356
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.4678558594647362
tensor([1, 1

Epoch 298/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3779867036966933
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4164841036921905
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7161572082188252
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7101330926446185
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.314846990880939
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.536352778765256
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6527719722835306
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2959467164621166
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1086141867255757
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.829920622284007
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9250785173822589
tensor([1, 1,

Epoch 299/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.8867279958478997
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.954998110657811
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4001327994598523
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7617700878178746
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6996695948421754
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9042429920778514
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.457035317156635
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1651722966636977
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.759646207200849
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6076602033694296
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.703609227032015
tensor([1, 1, 

Epoch 300/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.474961752404977
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0020667642585988
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2090782650000156
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9901243649539206
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.5123167621399416
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.847788486228184
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6315411472070624
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.824816580825829
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8096178390307998
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7026966590406314
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.106933508452102
tensor([1, 1, 

Epoch 301/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.969531984219756
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2619718428961595
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1657221742412776
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.1014776242310815
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9593187121466045
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8938978551473946
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.629703212873099
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.914836961326945
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.4360956487527115
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.017716994656093
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.28814182738072
tensor([1, 1, 0,

Epoch 302/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.8635844055197746
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2250728439399596
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.5125329445422384
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3657228347203643
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.82924739524061
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9591795297643948
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7687409986104456
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3624738022957557
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.746770590433353
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.986914961086067
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9209639075855394
tensor([1, 1, 

Epoch 303/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5747700171060766
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8298215814868144
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7402385232496753
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.639385461043995
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2787899206796944
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.025349865374802
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5807093304683937
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.866002451722336
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.7797546905894266
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8434763017478175
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9056125078149238
tensor([1, 1,

Epoch 304/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5805459801923036
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0655453235872896
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.268583178999827
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.0386490758711595
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.262200572318946
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.5763780764583115
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9415661160985853
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8624067712588097
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0217885436906684
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7998177714442045
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9562725817501083
tensor([1, 1

Epoch 305/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5046790121025566
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.944389435031557
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4761059876775167
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.2791767923951842
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8695350921896359
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0357533633927893
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.5281720636439817
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0878268916425977
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9270428615601647
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.187676698746934
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6541451218231302
tensor([1, 1

Epoch 306/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.591594060277833
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0326409276460766
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.312502261016422
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.918319455599564
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7995276582900364
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.341399647080406
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.2190880625070593
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.576730212962961
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2126327587602295
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1311330055173454
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.4524271540481988
tensor([1, 1, 0

Epoch 307/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.8726761840083292
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4897310106351824
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6414165406739847
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.3042816637975037
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7458372561159075
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0339509689043713
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5794815190907947
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2424618721567877
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.780294753861349
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9058539097452953
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8740883299184257
tensor([1, 

Epoch 308/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.017289863141033
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7279399302339942
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.974004692926818
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6017603988333518
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.830724759501209
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1678719319190147
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.2818646530143276
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3949247700614418
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.710644401451736
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.833804796182945
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9525179209034742
tensor([1, 1, 0

Epoch 309/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4677778618115167
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.343359216190766
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.727867838274343
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.251458354290033
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7978392422189124
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.456835939712954
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.1213595114096373
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0343935811499794
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.8450342115417424
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1939592869824995
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.3061157837723893
tensor([1, 1, 

Epoch 310/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.244736266917493
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.524697551273812
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.264008700925379
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.57489697614777
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0053760327644747
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.6738213333525092
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.244842817987475
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.837243547990063
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.31203989368168
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6056445195014302
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5821084205004892
tensor([1, 1, 0, 0,

Epoch 311/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7567872978783026
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.533022015717911
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5534183372651196
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.144975126612261
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1412533910485747
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.497585164118975
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8901587266495405
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.592074977232994
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6404796098926404
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0847990420888074
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8414032640606315
tensor([1, 1, 

Epoch 312/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.329995645626754
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7591923585146194
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1453896461604867
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4199912207227743
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.4995911784396383
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9824250189536003
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.870581633557558
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.407296524201151
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8395391710017783
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6788073038785891
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5169869450092346
tensor([1, 1,

Epoch 313/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9013536860562894
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.6548455489105074
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1752110143260106
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.115904632656332
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.138905768410803
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.058506781978908
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4486870283419058
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8104659646430963
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.532368144688218
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1381813949668818
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.3408596330967895
tensor([1, 1, 

Epoch 314/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5485534811101127
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0130979892861687
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6223658282809046
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.722267453196486
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.021966769525092
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.7762924790719654
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8054528696992125
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.922389390518148
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.827622179519265
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9887474544411148
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6072022715302066
tensor([1, 1, 

Epoch 315/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.9377771687137946
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9114949493886206
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.286203144897887
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3289631632219123
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.04236370543889
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.269972787829544
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2794522156129355
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.459479014887502
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.112955595740177
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0128835747772036
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9759444335181096
tensor([1, 1, 0,

Epoch 316/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.453974244761211
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9227085895459293
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9916760812037924
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6580611338907585
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2201940459515304
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.378430429619156
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.876764667474418
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9435081558385576
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5507516056787836
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.744782076800046
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.789232828568579
tensor([1, 1, 0

Epoch 317/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.596396746902578
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.6371747941524901
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1497251686453995
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8251136987678285
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8426943596003562
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.94278427438033
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.170836345976518
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5092284431463985
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8109581850233933
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7784509936849169
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.2233888327981424
tensor([1, 1, 

Epoch 318/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.527083077807044
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.031352534193649
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8918513692511076
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.168009740008754
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7485588982479614
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9930880612798096
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.1177777693397073
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.322414108541908
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6107931352636387
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6764792776773283
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9330318333657213
tensor([1, 1, 

Epoch 319/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.580323209523583
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.063821394761185
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.543967782423252
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.744383381587881
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.162685305629772
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0564377078228944
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.436157079657015
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3118400922378957
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.496183763240393
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1424898169248943
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.4603564343151725
tensor([1, 1, 0, 

Epoch 320/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.8453218287431086
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0089237432712825
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3092120459778793
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.0742147282642143
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.946079294517723
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.300289564868058
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.175445049263524
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.693097085127274
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.737475302472837
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.207853519241775
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9814597986034337
tensor([1, 1, 0,

Epoch 321/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.334073539826824
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.574944078068065
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4472858461533615
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5980336969195297
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.770448374345649
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.6871527555265213
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.535389044093388
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9718467417559995
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8381136537247533
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5910051757222456
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.559180865465104
tensor([1, 1, 0

Epoch 322/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.354655151462184
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7406442562393125
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.536918106663295
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.92309366497105
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7508046875726797
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.963220241276929
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4848539058528036
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2604380048960118
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.527985942852691
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0997478080286727
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0000847643868203
tensor([1, 1, 0,

Epoch 323/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1290525722436353
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7643807835282193
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3408022770088603
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6154620629108454
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.072397550075262
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.857614769271744
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5379788908054013
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.123652353480603
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0162106177637944
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.914138559999912
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.4899863903734554
tensor([1, 1, 

Epoch 324/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.770304488023305
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.401872004705938
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.739698898200988
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.2253963225868687
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.085860404426314
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.413922873500696
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.964590020729415
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.384530796164575
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9523949812311043
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7824032165846455
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9429775383390107
tensor([1, 1, 0, 

Epoch 325/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2533397829057256
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3403066473860057
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.209365495579345
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9650281413060475
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.888447208732656
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9181492845652526
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.396548326753746
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.961089983803964
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1918679644225354
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8241124115075475
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7862665448303778
tensor([1, 1, 

Epoch 326/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.402065339041681
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9434993499575308
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4114527160966777
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6286170858091986
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.155111784847524
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5043224609948362
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.3149103490825333
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3714613659181487
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0912681095461734
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.885496551153297
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.2479400994313705
tensor([1, 1,

Epoch 327/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.635220248346274
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.031599109000992
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.6549201598162215
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.327354530021765
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0694649063329598
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6908753320395715
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.40106091670909
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.75043582898586
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7164421971079036
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.067955796867806
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6470787694099809
tensor([1, 1, 0, 0

Epoch 328/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.750916641084226
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.69231758581608
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6544528515271937
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.465821236093112
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.3991529219467114
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.7602149650290477
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4645324868898886
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2395553444722855
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9921510509180025
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5928387739141021
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9333692657532553
tensor([1, 1, 

Epoch 329/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.242229503272489
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1800333433580583
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2417936283282263
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.44266404131543
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8118582511336425
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.697655276890542
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.250306122386291
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.903141918382137
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.5490197483251817
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7425289426149386
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.848962241450003
tensor([1, 1, 0, 

Epoch 330/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.6797448584989887
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.033517736076265
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.7553502875309244
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6170646788131178
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.677952167342093
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0000773324973493
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5386328592173815
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.501288645624485
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.972682402213169
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0025037513169055
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7668708714838819
tensor([1, 1, 

Epoch 331/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.737550428577967
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7588123926951613
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6412458406116106
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.0848078420271867
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7732276322365261
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4386511902268997
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8166990039822477
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.17451932390509
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.969169267558554
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7599226716938812
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7522606691182856
tensor([1, 1, 

Epoch 332/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.29908288977614
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9962358821072
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.390984842999909
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3189544564440387
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.5457338578503732
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8317601173712617
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.923900159388533
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1072384581616435
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.052307611136776
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8626275265968395
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.5011459820922153
tensor([1, 1, 0, 0

Epoch 333/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5394871935601278
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.015035515077379
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9948327404748356
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.12593970746929
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0661851782119984
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0019639065623624
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.3851039781958665
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3201527625425418
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1474541787847135
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8968846623017708
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.4166957658327646
tensor([1, 1,

Epoch 334/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.0160364562915642
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2731426598579465
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.365253183323435
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.2740058697388847
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9414902190442995
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.357400532613402
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8363628606884914
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.057975290265363
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4734431336222618
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9562787552642553
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9151886153223427
tensor([1, 1,

Epoch 335/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7702388499731168
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9949607926190684
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2031189683842807
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.578032296915251
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7353151198219037
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.94657893969038
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.2101053866508464
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0386408866848718
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.247787333184297
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8785584049068063
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8905937236334616
tensor([1, 1, 

Epoch 336/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7700046723071945
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7349904579136166
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4023933694700466
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.0649516262820815
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.22515337527833
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9251507000554748
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.924080715562254
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0338356721484465
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.754878680632719
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6109870017996903
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5861200715875452
tensor([1, 1, 

Epoch 337/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3330578646773374
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.256336313734606
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.942550545215506
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.434086268943291
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6794342959387505
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.6557576889669132
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9531548592023324
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.452987627452906
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0974585533933925
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7172699157607598
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8230866116455935
tensor([1, 1, 

Epoch 338/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.689316005139605
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8548478905272403
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6418221661375254
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.317507208114661
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0851502969823876
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9566538650678367
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.960145173579286
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.090855845584244
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.621515595158364
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0869447589193864
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0550909054041275
tensor([1, 1, 0

Epoch 339/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2854376590024805
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7170641337968766
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.318259863266146
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.769647275280278
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0676592468083315
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.824771031795242
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8626093672097603
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.480519086640522
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2294766639837484
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7619727191942627
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0065548896185335
tensor([1, 1, 

Epoch 340/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5980864800533747
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.263848760278053
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.249941471916011
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.0638018524555357
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9825353299694206
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9303168137676616
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4988734072089596
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.8244062414603026
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2205526648554472
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.241052373713629
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.2130790758107803
tensor([1, 1,

Epoch 341/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1278314969032204
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2380922522583244
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3412852448636188
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.387752756431354
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.907916459792323
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4527485331284433
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.280776464144924
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.039681451343086
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.082583240912696
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8822326928557136
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7234501550263734
tensor([1, 1, 0

Epoch 342/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.031342687172325
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1319256990650333
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2983705282976015
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.1915939436759024
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.5589184920586425
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1958149139384355
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8178886988748677
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1792787820760777
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2137156032799212
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.184612857380824
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.1543076591352324
tensor([1, 1

Epoch 343/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.365496180666443
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.668648611399074
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5182911584900483
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.0436256853793133
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.5732502164082887
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0287358454590505
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7008436574710277
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.329519407377031
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.858633174257152
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7209139991668194
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.195295532993357
tensor([1, 1, 0

Epoch 344/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.280819591655489
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.129267239864917
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5080788563802368
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4456883435145165
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2296386017534338
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1244568096089584
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.901374568697704
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.275655290694267
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8480990842745886
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7982838519822388
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.3901308400993937
tensor([1, 1, 

Epoch 345/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.399231568256843
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.198870217413819
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.555844523156973
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6413178510620408
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8419465073156815
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3985604196764196
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4436347846182462
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.203782582010204
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.649235918661018
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0074936533877956
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.852127305942738
tensor([1, 1, 0,

Epoch 346/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4728069247315894
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9376109431705884
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.346188375494916
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4281765574606147
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0948867838291108
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.066217515720152
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.121190615919609
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.46294902032582
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.419756044910397
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5353241119210415
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.51045090200281
tensor([1, 1, 0, 0

Epoch 347/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.477029120038407
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3996251944709743
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.0571650561831225
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8604253447754497
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.015911432025496
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.019864805798212
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7613598446551944
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0046098006390585
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3366046097969795
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8062881799123172
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6611090017293912
tensor([1, 1,

Epoch 348/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.9465026372219087
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.511275663776401
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1406475505911957
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.033850065647759
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9589467644696106
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3492663901536934
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5470482885786976
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.5734443052262836
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.4792377027767545
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8264840257126418
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.882250955199892
tensor([1, 1,

Epoch 349/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.926997370901591
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8792584845956974
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.216576706476286
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7178483161261267
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8168185257454896
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8464387552712989
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6874112801301027
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.061297992260363
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.774072731155817
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1284623735575043
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5140763721608066
tensor([1, 1, 

Epoch 350/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.385098825767677
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9415573440863851
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.0087947540169395
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9253426960918127
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9665387695605545
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.208451198406782
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.3781129816851316
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.225562590900344
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8894637323191925
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9398797056006327
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.126424235529703
tensor([1, 1, 

Epoch 351/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5878819469069696
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3812902876888993
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4464690926914865
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.567690519293924
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2492942288856397
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.992089772053922
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.278564430247898
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.721478061268248
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1151179574866266
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6895371827031425
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6321593884982268
tensor([1, 1, 

Epoch 352/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.970576961740229
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.14164696121125
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2222108902445394
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.1510371502890626
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7557487894265753
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.313246954611776
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.382479638243262
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1159090532146383
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.969436048532696
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9247968653184193
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9021471042107316
tensor([1, 1, 0,

Epoch 353/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.0401190651432684
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.20825083967149
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.372348064161743
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8055324404935664
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.5837273057681607
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9279583598582297
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8830387402424864
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.88320766277749
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0942786411854972
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7673509453469252
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5458982728805404
tensor([1, 1, 0

Epoch 354/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.647623774046893
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.23605708074928
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.167339377939137
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.7632329679404795
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.601935057190776
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.760308439403534
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0808549821787685
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9013603499354703
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1746065452460237
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.064737973749014
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8551259534088957
tensor([1, 1, 0, 

Epoch 355/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.9791734710566415
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.66708044798631
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9846603765632045
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4622513584312626
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.839455801430808
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3857439253208423
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7252478967646874
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.267840096147
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5642057569144256
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.4070791619547705
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0077861459796833
tensor([1, 1, 0, 

Epoch 356/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.420032668642404
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9655094085012985
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5666766448680867
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7194581180786495
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.5749894558193116
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.7420101369840137
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.561811329119102
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.702810027047634
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7146129337321976
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.572981109080209
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.686902643660735
tensor([1, 1, 0

Epoch 357/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4912640035016342
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3500267028669013
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.0522442929900833
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6602691630359114
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8523056720135445
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.739867029468656
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.899851827752264
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1252771298330937
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.017255098827024
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.088645405750834
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.1602074754910916
tensor([1, 1, 

Epoch 358/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.107861880915677
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0443618435955373
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.254855245876173
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.515712998124894
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7106616385099902
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.7387831282575823
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1130482988134793
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1154068915179547
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4013260076875076
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.66915700584418
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9648557637312294
tensor([1, 1, 0

Epoch 359/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2507407401644106
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.09188608561057
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.203208042181023
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.895448018019913
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.807092111727457
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.008098871518383
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.798769670760846
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3347877552547467
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.683057487590026
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.676857922763058
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6692856861311858
tensor([1, 1, 0, 0,

Epoch 360/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3335458182384854
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9266270362068063
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7369330573430704
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7733011458865966
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.026814405020924
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1704967745963164
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4747674220921985
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.6979572864508734
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2878707034519774
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.821411193276397
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.639707984145671
tensor([1, 1,

Epoch 361/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.848544903496237
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2651429797043434
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3640947825873804
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.302087244878414
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0866870553961308
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6067631350345324
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3651156519892824
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.90741878838293
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.11302393185794
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6034523881236336
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9719842592011136
tensor([1, 1, 0,

Epoch 362/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.117527010989102
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.219857028094151
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1022018753116507
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.092682084369135
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8571816215630865
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9846967029563347
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.1186380846557737
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.003550671492344
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9188031793011056
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9284388463487263
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.065182341150028
tensor([1, 1, 0

Epoch 363/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.7420418395319057
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.97821532151859
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1607936876704588
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.077177347509048
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6899768672025564
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0449084931641806
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.836257472387352
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.865041682557587
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5101979612116807
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9750938770109072
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.3973264361150122
tensor([1, 1, 0

Epoch 364/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.548669625039916
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.501569347327628
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.429398654872374
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7205976517212505
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8591604259135683
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0470222946833108
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.729712599104805
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2403254206075687
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0947352928246343
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7823892569960318
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.7637961787430756
tensor([1, 1, 

Epoch 365/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.528514925821686
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.151578193194207
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.459232046643663
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.683414473980273
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9248690651973421
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0212125638511593
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1897346813024865
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.385695358698605
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2722214004155967
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.816666151920347
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.31690075617631
tensor([1, 1, 0, 0

Epoch 366/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.555443624344896
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.139640770017587
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6657502970534095
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.504977544986024
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6778031901677817
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.532239683936956
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.706378575373076
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8946321911795323
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.190315595094329
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.761561571114095
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8250092667291422
tensor([1, 1, 0, 

Epoch 367/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5915322129447538
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3448616856051396
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6874800734726643
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.879618281559055
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.204652885320574
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.823504366705813
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0681463236499726
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.6203118506350913
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.240404181502586
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9909923727794143
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5110921770849453
tensor([1, 1, 

Epoch 368/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.415012865320835
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1607318384814187
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.800403458961595
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4272586433106453
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.166179757509403
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5911681860965246
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.103932248876332
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1818986449323776
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.526687860064057
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.242825526570045
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.006127418387805
tensor([1, 1, 0, 

Epoch 369/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.658153922686332
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.6650374537906119
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4707825568299095
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.814087929003036
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6411628712026576
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9903646384029874
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9852152177972793
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.565132642973986
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7151686804996387
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9951656403816522
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.129033962716189
tensor([1, 1, 

Epoch 370/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2427922016545607
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.6410998548815539
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2880689144717694
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3785621030214337
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1351766452428467
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.737747131713923
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.646770974978106
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6721368584614402
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.665744036997736
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7206327470626066
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.1833135727635864
tensor([1, 1,

Epoch 371/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.580376026265853
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1395543295715966
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.287368549067491
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8998442596981016
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7545895908816287
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9666372164443584
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.588811791587888
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.822364027343213
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.795028478526191
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9481940798345723
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.105657622416179
tensor([1, 1, 0,

Epoch 372/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.8786862395774695
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0658488146622513
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3436739121413215
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.1785214339628123
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9795711971325949
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7706128417014866
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.727655574945674
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1963345980442934
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9930850872905044
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8273983352994678
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9140052974657054
tensor([1, 

Epoch 373/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.167292319593816
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7897291407707145
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8934116537303587
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.111728799029773
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.4989755270528387
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8781003847664657
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.9373716746763683
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.889788616027187
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9797742566783016
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6568648103091277
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.801862448919145
tensor([1, 1, 

Epoch 374/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5500461667959096
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.141728533369271
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4455891020717884
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.273648683448653
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.890169055986774
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5077250747767037
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7703897565919107
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.701186747088724
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7872297833850874
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8317862570471188
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.4783620083557798
tensor([1, 1, 

Epoch 375/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2504757335514842
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2356767692368598
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.952987687778116
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7943169398429326
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.014856634610143
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6251473103484546
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.443706278643401
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8504097516982956
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6613387651820295
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7583059813898074
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.1334062879362543
tensor([1, 1,

Epoch 376/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5842747546686917
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.089365790122059
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.681248828030278
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.0415699164308307
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.03424037899981
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.756519543443219
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0949924206942487
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0808657299387203
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.171313643458408
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.2332516637538777
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6646639898426157
tensor([1, 1, 0,

Epoch 377/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.906881329686948
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.155680028653618
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.447666612981147
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.1327911622900406
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7169541566830544
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8960151847521822
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.495788429152596
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.350319819037219
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.4968533784368834
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1732266385064403
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.103633233384602
tensor([1, 1, 0,

Epoch 378/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4996841624493986
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5154278426605416
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2773733842967583
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.3457243595869444
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9741539585202244
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0264479299493017
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.649697763665389
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.105553823041457
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.531103694687193
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0203498013702883
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.1759962602441116
tensor([1, 1,

Epoch 379/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.985423921308677
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.369670815802129
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9753531382066487
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.27193972133101
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1452998570986206
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.62308410982193
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.1407444921639076
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7067285053969634
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0584266064602845
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6752598720328375
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9560108776173135
tensor([1, 1, 0,

Epoch 380/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2018770895485327
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.521141808273229
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.928018830906784
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5779005803150166
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.5740444857181806
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1863402425055516
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.5429642137212154
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.181246760157411
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.696945873915745
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.932026233662737
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.975838947811389
tensor([1, 1, 0,

Epoch 381/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5518788996085218
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.166841879802156
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1847306471887347
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.412427452962223
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.117385386906015
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0885860880777845
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7106297963323893
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3496358159268147
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9516536813783025
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.621879393155161
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8094928998873623
tensor([1, 1, 

Epoch 382/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.563250705228694
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9248664741534134
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.483640181163534
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8255077326614293
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.733346036398095
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.073920590878104
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.1799212513188317
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.983927273572987
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0677128511770375
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.764565922793354
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.2014551943961065
tensor([1, 1, 0,

Epoch 383/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.098640600492034
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1125945749272126
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.077765645162476
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.1290975905249683
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8326461607782079
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.294661913991413
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8606704377500987
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.396814290942529
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.694331551888708
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.2893221334535294
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9349606704332973
tensor([1, 1, 0

Epoch 384/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.720473988374531
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1007075073378987
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.823892226623655
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.0794517783246254
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8322681219591836
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.7946010368946503
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3812340750085284
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5782090002485964
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7568286213770743
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.3238354377513812
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8195145500047056
tensor([1, 1

Epoch 385/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.01971992088458
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7448143582137283
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.035622704912829
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6971466227569816
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.178131292542207
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.404061514718364
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8615529150834886
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2785474484054253
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1948322999430934
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5186188246645203
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.586980486847707
tensor([1, 1, 0,

Epoch 386/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3102874327898144
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.774654489872268
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9793966907612677
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.23432404223224
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.065949187262913
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5357579440059097
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2463545694371776
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.821840932182299
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1419453414865446
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.932200733352891
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9684490763394737
tensor([1, 1, 0,

Epoch 387/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5655970006218776
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3896060351369135
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1759425577656164
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.0255130398796206
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8461738412790119
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2506589509066437
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7478427992763903
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.09638694622028
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1725928408958706
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8727159892570306
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6664896048917601
tensor([1, 1

Epoch 388/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.274528369194085
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8017742022463223
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.527891378276322
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.1484106582237823
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.712036980527004
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.168509048655883
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1374977620249114
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.352597144670712
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.931533955277226
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7229747257537344
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7528335460914632
tensor([1, 1, 0,

Epoch 389/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2889314222078094
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.496378336421634
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.330441082080148
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7175101041893934
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.691092072424932
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.030972578718638
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.571635083583608
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.126663460163403
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.119067785883323
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9090165143537456
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7085794236007696
tensor([1, 1, 0, 

Epoch 390/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.891137059559998
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4683522823346293
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5608734599583345
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9534919665558403
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.941405687135487
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.037473851424752
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5890881477284453
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0886080212792737
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2763331119766774
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.73370401491465
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.113230628719355
tensor([1, 1, 0,

Epoch 391/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.859780959146092
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.376937507232543
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.821983684522047
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.77618432783297
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.592585760687538
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9276851252804863
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9300170705371515
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.755927337486718
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.026187575788179
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6314453053037314
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.1969499670785377
tensor([1, 1, 0, 0

Epoch 392/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.7498432925585408
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.194091911103297
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.581447918446099
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.0293389288296835
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.003416101431643
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0572922045529953
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3887710420708985
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2666199927926507
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0723653927601684
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9766450594840272
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6347714281443446
tensor([1, 1,

Epoch 393/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.357860635124154
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2224408617866342
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.04631759046969
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9219295243006296
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8944326703615517
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0782271709236646
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.619843126811512
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.950529760695889
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8592537491132552
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.926737593276691
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9380978104539839
tensor([1, 1, 0,

Epoch 394/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.79425444792726
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.903201377723257
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.74040301926922
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.1579232991642687
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2417266912898377
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.068840784519879
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.012304255107277
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.490826086980292
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.858388756332794
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8486778846595422
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7073565899027416
tensor([1, 1, 0, 0,

Epoch 395/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.9112549259378437
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.457097232609304
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2100889828548587
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5561274104041183
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6753112029140707
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.326776916063847
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2935357793655853
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8813256813994266
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6176003974443565
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.793859691866389
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5668540665308917
tensor([1, 1,

Epoch 396/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2641841812509362
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1957934666051195
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.736101490611018
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.2777577921369807
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.5519446813666153
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9941110177457995
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.68252324149764
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.970964337104354
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.859050189924356
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9342019382376914
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9338798036319496
tensor([1, 1, 0

Epoch 397/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.487106518844485
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.057720665895108
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4106972812357226
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.534142246281062
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.922785851142716
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.564221467268997
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.824336913540594
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.310266506871198
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8074973255527746
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1124441627211
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.739217702931551
tensor([1, 1, 0, 0, 1

Epoch 398/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.063042993275267
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0495620488081783
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.29681193051056
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5700480380716333
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.594970774442401
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.071342163763472
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0584106961898097
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.903970465524219
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1940225360057943
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9258270858031767
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8691519832963424
tensor([1, 1, 0,

Epoch 399/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5972075355405146
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2245496440263457
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.6467966827440614
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9426515296135736
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9556381291976521
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1575358028523386
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8312539353145656
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6765217022546315
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.379724491251734
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.2110392938555354
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.594814131766637
tensor([1, 1

Epoch 400/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2825665334708023
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9241264646182374
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8526839326446056
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4948900686972206
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.08473524641608
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6040628911251824
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.2568666491387193
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.035474837394073
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3855551664176677
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6858836479617594
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.491435611489911
tensor([1, 1, 

Epoch 401/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4248569907218416
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.051789036366052
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.527438688084756
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.22599276634
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7098756163347781
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.315141919752399
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4349583040269693
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4917894619194234
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3725626156098745
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7850387246738075
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8387040254386005
tensor([1, 1, 0, 0

Epoch 402/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4308686050778747
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2523466053336154
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5059681735868597
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.974836061295943
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8353764191474693
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4633082706197635
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4298011242155564
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.17388535039195
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1828341550185186
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7817680482878586
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5195549194516311
tensor([1, 1,

Epoch 403/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3081299704752714
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4489207665580555
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5168553673726617
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.780241672038472
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0086054199806003
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4998744987999526
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0556576424278212
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.700512160717578
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.5895145171141287
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9492071382096643
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.261031834735156
tensor([1, 1,

Epoch 404/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.128041716282324
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.308874860673388
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.073073776793991
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.902053566916435
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7465781338317687
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.285128199704033
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9404507596060103
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.861741575562756
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.652568198069062
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0380501037784744
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9103498719599645
tensor([1, 1, 0, 

Epoch 405/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3181269971521217
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0576811148118086
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.389370982165887
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.441595821799401
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7113217544758745
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.456081518738263
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6437978143439063
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7768026560572907
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8498909692498273
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7374672764042494
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.4514994019484093
tensor([1, 1,

Epoch 406/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.6408937898226013
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8657886054756563
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.437680258988858
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7717638633264747
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7595294821845127
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9386731875483136
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2024350428143356
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.197228385860592
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.8124735527285205
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.776537159987257
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.691412312494536
tensor([1, 1, 

Epoch 407/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.8866482349625833
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1554947835410343
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8698576698479168
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.029637353258408
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6109759970573723
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.065325559021968
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6829572220641196
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6527906675334623
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.48662332036166
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1191557542828914
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.078536282295986
tensor([1, 1, 0

Epoch 408/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.415546202832103
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.6611177244741444
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.327153538524154
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6332891160030956
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9934373176740385
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.131420360470685
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.1193868510100664
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.4062632963842976
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.432303784407373
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1262254721835103
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.056918157234395
tensor([1, 1, 0

Epoch 409/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.683128629718264
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7357648762570652
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.722578849074826
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5821682439284706
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2067923964210356
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9361138504746513
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8982399690614407
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7425554681691664
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4705913256622956
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9797081749955348
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9798723559586904
tensor([1, 1

Epoch 410/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.184887825186216
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.229553774358387
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.672271560235722
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9596719172951227
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6100379872735453
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8293711716540737
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.003296599459798
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.496770531579031
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.892640929691789
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.214598224311754
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5582261444336494
tensor([1, 1, 0, 

Epoch 411/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2462158206005802
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.186077887492841
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.036532792799791
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.886229896929124
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.777009054104421
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8955304744115586
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.3940974039988943
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.562526131257382
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.720430689874199
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9398461233890236
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8620471420311673
tensor([1, 1, 0,

Epoch 412/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1803164202224035
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.037066188306406
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.220009741563895
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.936164473052561
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7976165420897043
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.202291732684048
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.949252506051282
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4389798940883147
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8260930117380676
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8854653599480593
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6266584024099713
tensor([1, 1, 0

Epoch 413/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5894893087461894
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8817242010773692
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3507437104989686
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3793661459804696
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2127847929762945
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.5825226559421537
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8013112394517683
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.5786391314041732
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9985141646764815
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.81053115164993
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0878504250197683
tensor([1, 1

Epoch 414/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.735150228250152
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.152040735794565
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3341662884535697
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5636951299059323
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.018377514639456
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0512733819624693
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3250363493513486
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.53124934663852
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8653233850328985
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8835154854226497
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.602785264104435
tensor([1, 1, 0,

Epoch 415/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3559557644097397
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4646587873086174
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.352528817557835
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.3301685266401493
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7347557100270312
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2812339339782737
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7940239041100474
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6815896126881364
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.969617334383376
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.890044772196458
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9847734939872497
tensor([1, 1,

Epoch 416/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.6438760287821896
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.077614079824652
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5431352533804663
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.827092359099832
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9933415588284469
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.239412139872092
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.266629400639148
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.457571779654061
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.913304283796385
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0627639944475913
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.738818215286626
tensor([1, 1, 0, 

Epoch 417/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.0332582366329954
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.977913390164059
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.383232764044822
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.959942165374089
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9443753092829499
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.41052874426816
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4103409665151663
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0468670550047676
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3314431515799914
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7405630743604399
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7766064723380472
tensor([1, 1, 0

Epoch 418/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.393474499195402
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0934469503170465
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8687372971054939
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.631762472072024
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2412283580699897
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3459445158133514
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.525726612122683
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.550634009971298
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.754242814288292
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6562072372659145
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0419695395435937
tensor([1, 1, 0

Epoch 419/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.8451160457231484
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1388014601009124
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9075725711917006
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5080873905635155
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1056268909266467
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.365752789710148
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.523697305108459
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6470658054393414
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8290023714114154
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7868146679440862
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.2605028475211633
tensor([1, 1

Epoch 420/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2050288939786666
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.996891724412502
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1084545040060982
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.129620939695525
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.159471856206541
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.7456923529263249
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4993208669344553
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7973501892319637
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.4350299971498965
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7744294689493538
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.3606068265124533
tensor([1, 1,

Epoch 421/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.18377960454962
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.704769976632118
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.270754074185715
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.775976707502594
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9399497047203815
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.902568206236096
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.549213572641888
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.662626905620484
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0588878707145093
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.905397161263813
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7831269395763443
tensor([1, 1, 0, 0,

Epoch 422/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.8906619520483456
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0981146133894835
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.091791055483178
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.772176651657571
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7301043857667135
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.179015261191451
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.163779389131825
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2635720752225676
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.876137123580752
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.710730037305334
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.859064254631203
tensor([1, 1, 0, 

Epoch 423/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.0062068877835766
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1322105287196353
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.599341068826391
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.0471436831055865
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.539061856230882
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9752425058937146
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0495490063116995
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.887987140923933
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3999405673711767
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.868552891856976
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5396191774036279
tensor([1, 1, 

Epoch 424/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5856302832960143
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.6664326717804545
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.891291878918513
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.39975709777704
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.110078701552127
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4924163137871154
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.3451972454708034
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.530567713880837
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.933519742873151
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.881962336266896
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.414721051928609
tensor([1, 1, 0, 0

Epoch 425/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.194465121758671
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.32432987153261
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.001134885665903
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.906901643775037
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7409517533485226
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.7981589059445964
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7170318401066935
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.48466194798801
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.914441922248295
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8055763055376568
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7022070262258293
tensor([1, 1, 0, 0

Epoch 426/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3684433987073152
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9439061018179586
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.913890984737465
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.948792108213714
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7822884217195818
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3139120469194623
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.1183216037762316
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.541673687991499
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2186328263026023
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.855553271264372
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7861836097097323
tensor([1, 1, 

Epoch 427/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1830697997016806
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.113245694911936
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9312288922196217
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8730403973540017
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.572193693926537
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1323082414165704
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8404128077444826
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.429290726371036
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.665713723725161
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9356628691033324
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9254516307867833
tensor([1, 1, 

Epoch 428/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.478350512384801
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9088075219202134
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.362616725134222
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.495204737650982
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6643680911037029
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0436033509059555
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7526611762554305
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.4346055292933655
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7818091072909072
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9236423260982307
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.584509168872639
tensor([1, 1, 

Epoch 429/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5637117319557703
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8704135570067786
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4213861853490366
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9389946703028773
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9936047289557481
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.313042214306938
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.2572528791088273
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.814936513381685
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3606796333601476
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7765268762579502
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8721422205181586
tensor([1, 1

Epoch 430/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.0006996646916364
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1037506370776606
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.368665834346747
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.791476065465698
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.748715898817443
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.075518323131986
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4790193990849088
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.111856190446495
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.4709361665459424
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8650794933832018
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9745616305670595
tensor([1, 1, 0

Epoch 431/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5412580521654715
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1071764161583992
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.606664875226423
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9789905270018986
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.320381752843906
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.394430634754902
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3824187228372113
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.4618830476750495
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.718044791672637
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.133645784727132
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5810012037860461
tensor([1, 1, 0

Epoch 432/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.8469448834216515
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.314384938514043
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4583665885185235
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.833686813857887
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.047055436192451
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8867927114094651
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.139933511461728
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.445978532394583
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.749034871773357
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8115335709154636
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9718955462226886
tensor([1, 1, 0,

Epoch 433/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2865240308650794
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8802080847369236
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.473276249901343
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8524188019113605
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7151272726860987
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0484271875774933
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9582722177901504
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9524885870955817
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.684614397944782
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.823898859699582
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5962709737523215
tensor([1, 1,

Epoch 434/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7972146738220616
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4524419373618906
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9142888600666499
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3309024788641266
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8877369881219426
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.398432057968526
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.211828464450895
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6238422257943155
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1914244961708222
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.066333168240759
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7855629251996783
tensor([1, 1,

Epoch 435/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.842262215794936
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0396536952760194
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1004268458409787
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.104537488114358
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.455366373749842
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.644886704758715
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.22252631291502
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8408567047714772
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2251601295708916
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7321051653424488
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5635306561982076
tensor([1, 1, 0,

Epoch 436/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.584715497738236
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7607759484011676
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0057413534103437
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7020936445900965
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0072712953219702
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3330738378659834
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.955777809767856
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9904534515892722
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9882248247576237
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9995821833765917
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.4803957620384387
tensor([1, 1

Epoch 437/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7322779385532328
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1239813662299354
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.13091139034368
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3789025448366843
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1335847349081005
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.16698364639323
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1385215345088415
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1367844344978613
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9660505188868136
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1364698433621023
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.4891911329554615
tensor([1, 1, 

Epoch 438/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5250580383633783
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9440088837826857
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.956683505865022
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.087556479908802
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.903662458417883
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.071011214547416
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8778417493805515
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0747989815366448
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.206704658302148
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0224137856779763
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.5363419503337035
tensor([1, 1, 0

Epoch 439/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.6114215498288598
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.166265507322434
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3646393106440975
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.757897965855027
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1831454944265705
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.7817752799252515
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7668362816952894
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.160834017130646
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.930125692305086
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.661050209862244
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9084911593956961
tensor([1, 1, 0

Epoch 440/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.888206213111749
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2136077371096294
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.451852546603062
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.139856089698473
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7178434609158169
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2377591405805157
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.683449778947504
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5899907272919425
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0196452595384513
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7964828602191012
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.872115276470982
tensor([1, 1, 0

Epoch 441/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.308834090660088
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.229839204459359
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5551188667240363
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.0190291404777247
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0324174679826226
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.455102888750115
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7875493612665654
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8194765237864092
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6422606389606407
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1015050868529572
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9675831403422148
tensor([1, 1,

Epoch 442/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.996530853970477
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.6540213678813827
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.411049911432442
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.392621579355363
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7962642669393978
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2718159384079786
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.694760667067273
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2939304468562716
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1801141305535006
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.11282502540745
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9536579584457534
tensor([1, 1, 0,

Epoch 443/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.095299895250359
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7384312154167498
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5206595411177357
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.0796021897511814
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8319886256223146
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4045322000980804
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.113032438030678
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.226567601752783
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.500011496376612
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.885297667931621
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5433577008393995
tensor([1, 1, 0

Epoch 444/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.42667009052641
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.939759366235967
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2448645299827215
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.535116383202975
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0982391351925114
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9786140034859192
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.130578265844531
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.4888572855724234
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1134300439568134
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8842783030812857
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9677884242704067
tensor([1, 1, 0

Epoch 445/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.73871770165944
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1845183242044
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4027256287638887
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3877226640524243
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.857654978033668
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1332036207785308
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.855343048437879
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.282550328613532
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8804407909298377
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.116227117904045
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9713317535520765
tensor([1, 1, 0, 0,

Epoch 446/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7172116553198937
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7635891338856347
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8779679712830353
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.001139799513717
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8783357434389718
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.245023197982162
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.380471213243916
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.117443145902337
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7263722771677554
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8399070034431138
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6088770933518246
tensor([1, 1, 

Epoch 447/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7131855212174063
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7560013782096633
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8487939084941034
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6841428426962692
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.4012960652669209
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1198879610853143
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.794802433494312
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.498866517220316
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.795430744655267
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.732317370609092
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.063700731555232
tensor([1, 1, 0

Epoch 448/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.784814685883431
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.184007729832542
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.176915277173181
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.9559893071908272
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9399562007880689
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4620062676496124
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2309803147052305
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9632101723293425
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.364202127593209
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9310850485247062
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6517293651680034
tensor([1, 1, 

Epoch 449/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.819710595531089
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9990724719685211
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4005634480831444
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.877993488379423
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8921309160279942
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.676246244135653
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8433124807834576
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1242551126457796
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.480097269136026
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.838357349970559
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.067530859209255
tensor([1, 1, 0,

Epoch 450/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.934047497885601
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.6886736661656632
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.294415800781146
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.1935404005322017
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8763042871091982
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9317956877555973
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.2007384933057477
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.7244036226474675
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7532570091845265
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.505428810597955
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.717117528358724
tensor([1, 1, 

Epoch 451/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.405344863453856
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9875139289389114
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2271866419083013
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.500659724632626
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9308627608373832
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.011698095540957
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.2022776206928514
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4292690783903383
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6605203599241514
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8036453580058485
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6607272209380408
tensor([1, 1,

Epoch 452/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.440778715910319
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0352995404295227
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.236033592759617
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.434765949276137
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6828529624075408
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2177009122648315
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3916993005044627
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.321434805699509
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.926475289931134
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5713198838342348
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.999468900145224
tensor([1, 1, 0,

Epoch 453/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.700071617867685
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1452773819998145
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.079006728660376
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.361044708365069
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6712674097909677
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2532640394614005
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6770338472616126
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.791657984259707
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7208008514729105
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8773522131265068
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.544840013591418
tensor([1, 1, 0

Epoch 454/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.027317116966604
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9736766654132083
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.474052354858494
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6922265513052848
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.753600349910397
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3575598903886763
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.707509207108789
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2816240842403954
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6456620803751036
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.428880479127627
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5584521877634185
tensor([1, 1, 0

Epoch 455/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3027275536738405
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9699890542437766
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1271690038334263
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5046563664930863
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9804581028952357
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9673664222566418
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.37344093285184
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0772984861492176
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.848800383624942
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.668388687117393
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0383857877038682
tensor([1, 1, 

Epoch 456/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1842397883173943
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.050745758111251
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9787966248753288
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.658115453943274
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9684656503895268
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.285008967706484
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3771469031619157
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.43343085240074
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8616488479953848
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9529754914656925
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.4884547786919713
tensor([1, 1, 0

Epoch 457/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1272674783841725
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.131518182363835
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2841126053361758
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.9847328110000484
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6163815599311782
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.19245693856449
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.474858733358976
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1511676124075905
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9923835731573667
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7751620881555965
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.3284669262662043
tensor([1, 1, 

Epoch 458/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5745656627155045
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.194842332556317
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1422478696273317
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.673019989486576
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.3478716717692203
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.99344760582294
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0636914917949403
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7045468199517613
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.607453595476675
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8180587423471124
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.957017019256808
tensor([1, 1, 0,

Epoch 459/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.698380091429004
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2850756118249236
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3362202320914327
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.553024657014217
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7440445271268086
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1396207843956736
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6871619449053172
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.873351890263085
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2106877123089066
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.139613685385917
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7800682520491662
tensor([1, 1, 

Epoch 460/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.549007959058738
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8971452264225457
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8753935985849874
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.538312492520258
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.5699256598021403
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.092063719683401
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8540461987373025
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0355464688561367
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9936747053680945
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1047247067856034
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9307812757008098
tensor([1, 1,

Epoch 461/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.687096728323503
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.63028392152005
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.529013153335338
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.828217840075193
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.174944449017231
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2075789579855174
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.316485411644711
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.435674332012106
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.6261029814965813
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.954385057506648
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.1084515618598276
tensor([1, 1, 0, 0,

Epoch 462/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.372638170107656
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8709335819055473
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2839196025844046
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.768706540210892
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7701690205150609
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3548936015697466
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7910915958889047
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8116200701332654
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9161154902850677
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7343383559003644
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5838781286274386
tensor([1, 1

Epoch 463/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.047933289028109
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0241689651088675
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3360241072292287
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.230921842782024
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0764784791689443
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8529200125505645
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.587436482338617
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0861514706714117
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7949065033274345
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.024346315229635
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.025976648640158
tensor([1, 1, 0

Epoch 464/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.408461599431447
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7282148388416325
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.041482580265314
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.975334805740282
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.703187482086828
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.456789909906513
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6193268096187174
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4457608990503634
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.802847031065321
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7091809086297713
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5156616371477523
tensor([1, 1, 0,

Epoch 465/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.327292380905686
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.36202167034607
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2419593299016074
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3870202580920443
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.770371928545021
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1255939963046733
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7613380274530193
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.999553782295616
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8053830281345715
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8242073904348877
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.08031884916114
tensor([1, 1, 0, 

Epoch 466/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.8721972685450905
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0475367496471097
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2821489551428122
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.1063511834097284
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.192503320827902
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0211137333244054
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.155946978096244
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.164573220884374
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9036504725811545
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5704762807692423
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8452678420837314
tensor([1, 1,

Epoch 467/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.821830491103008
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3238599690828425
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.5104934281571587
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.212394011137378
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.431328457969714
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.405032088088059
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3534223618920618
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1833496401873678
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.115262207556503
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9512766091966591
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6060821475932
tensor([1, 1, 0, 0

Epoch 468/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2765708204186708
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7302241009992514
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4665489915124756
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.86734242898633
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0100649173389247
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3584251470613324
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3922117899786355
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5293285960493277
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3181100591151904
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1367843151353503
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5680672278384018
tensor([1, 1

Epoch 469/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5240908999497322
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.167582847146685
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.426173554594271
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4608430089038826
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6914982496785762
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1167710247109377
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.348035294448554
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.118911890780507
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3077677875083538
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8757313986124555
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7706331704918365
tensor([1, 1, 

Epoch 470/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.004500992091071
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.756930962923077
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7338162187075628
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9308898295283683
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.851220557443815
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.503722838667803
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.471926733051211
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.5930232612758686
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0064779202414496
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.878336637758553
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.913382157673838
tensor([1, 1, 0, 

Epoch 471/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.885663511886246
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4091039291337006
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.7440609783643841
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4955950664314366
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.807986582178759
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3471483583412787
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.853053709428596
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.817846953949032
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.927826552567013
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.733144871182405
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7519656461792183
tensor([1, 1, 0,

Epoch 472/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.478981312132611
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9946822834116325
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.239744200377555
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4363137582611945
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7377406303223222
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8532552908936126
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6550803594904573
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.753924596462585
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8839483118622633
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0299101799125325
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.995107477174833
tensor([1, 1, 

Epoch 473/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.371478478824498
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.013005399840357
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.218568446148878
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.830694506333143
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0875643406982247
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0499781155905255
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.442227227519522
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.09300097882163
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7458979178212792
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.007979410975915
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5605431696361602
tensor([1, 1, 0, 0

Epoch 474/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3053646899588895
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.712456637088564
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.203196808174375
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.0056929757489574
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9112787260806514
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8175024925403318
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.3662849612492938
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3388629984850966
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.4034651653565002
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6227953116349279
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7309961647557712
tensor([1, 1

Epoch 475/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.983672635852649
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.011055928918614
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6683599735831067
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.109129973527233
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9109017889808277
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.016480906503568
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0985409089376845
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.4628945074688544
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7518196540571376
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7601497859228092
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7384208549602527
tensor([1, 1, 

Epoch 476/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.9243095794715686
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5678433728873786
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2883924122309587
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.66239031152354
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.830429361124182
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.322881177553114
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6644625158580966
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8496708752595894
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.00171643825622
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.2987474813300013
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.6490990855061147
tensor([1, 1, 0,

Epoch 477/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.646087586218087
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3578168230492302
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.014787747383025
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.868036082223799
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0881171411886825
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2302488759413754
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.332814633374302
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.854461251438107
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3902701835991116
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.4260165676732464
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7946282507713323
tensor([1, 1, 0

Epoch 478/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.412764452772107
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1082030923035275
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7746170098060423
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.477367190070051
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9017006346709833
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7371327917198234
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.019734876866766
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4251944774120098
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7703259883010807
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.893738768165199
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6611189923084104
tensor([1, 1, 

Epoch 479/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.312055910435844
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8331712254224348
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.363141330872195
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.0338490956009916
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8459882614454268
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9994302672514617
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4511688674553445
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8046210419867856
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.379745540468468
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9234650068627408
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8424385678575301
tensor([1, 1,

Epoch 480/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5402528248930047
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.593838816063722
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8086641224025886
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7193878542307464
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.332794962654506
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5536911541938343
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.873957268970474
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1445949640066684
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6548722036361423
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.099753251562598
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.654363098223013
tensor([1, 1, 0

Epoch 481/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4572700773684453
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8320943846095452
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8221391317615916
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5305839431131636
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0067373186466484
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1868495726183177
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.4214984606146412
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2253724821367387
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.059606229855848
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7352881933433286
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5747406034219864
tensor([1, 

Epoch 482/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5062514080027323
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9652513850148263
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.153231557604211
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.006018860494078
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.079421350309202
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0783668724957045
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8385283324162067
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7988030868311053
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8879799116515477
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.917746295986023
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.949387296687692
tensor([1, 1, 0

Epoch 483/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.593299287394946
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.440543233782588
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.34053396189887
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.2097220030944986
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7397351923698121
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9379972273206307
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4427671435645855
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1449116756457243
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8131065896553245
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.609004449696617
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.114307247511394
tensor([1, 1, 0,

Epoch 484/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4078965916556063
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9607431488337785
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.26065173309347
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5244062492314656
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8080270121666413
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.766311738314992
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5646004001777585
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3524993971966315
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7109717764423986
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6418249127621756
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.382166132549573
tensor([1, 1, 

Epoch 485/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7047499916833297
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.910801534375387
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8153825590406214
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.907854032547469
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.5152505208154565
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.134072348530911
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8134369896502975
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.20467210439763
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2366904046158838
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5862555816681922
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.459830393358324
tensor([1, 1, 0,

Epoch 486/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.796104366496191
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0727007189982394
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1471890662162556
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.88328377997156
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2768754349715192
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.318876452046147
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.44333697773893
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.5840678056073014
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.4826851409200854
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.721217380332972
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8839731825082187
tensor([1, 1, 0, 

Epoch 487/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7504981796135333
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0519274598949373
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.229437525698413
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7755501587007765
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8672088329013878
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5011206723187556
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.433355356713483
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8292154581392466
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4205408020726806
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.276553735613073
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9428021726880778
tensor([1, 1,

Epoch 488/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4048590774089886
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7248561317289262
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2872116487962133
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.0207349811734847
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.83072062672381
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8293547882351417
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2904348089536923
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0301794675576637
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.032809592784922
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9805780944831062
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5296660587338926
tensor([1, 1,

Epoch 489/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.592265427866924
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.211282450002027
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9643984248057462
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.2660341990793578
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8511465201397967
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2189508505205824
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9155621263862193
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.037462377646271
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6126375600178706
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7329183973666784
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.439332330735415
tensor([1, 1, 

Epoch 490/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5713591102928466
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.833465192702925
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.408293363678849
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.1748391194924688
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.715480440446207
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9876568254306701
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8805936034019193
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.145672953222385
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4281168675518914
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9578974937310216
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8806410492814964
tensor([1, 1, 

Epoch 491/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.712640105995808
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7451194813217032
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.0624159442206604
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.456943023530127
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1041582550174165
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.209198539676468
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.1263675016646357
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.916790071435434
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.558880179626829
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.731342696740661
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6726586753233548
tensor([1, 1, 0,

Epoch 492/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.477250554012984
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2091552883377936
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4792162459226295
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3253933692149404
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.124717116390198
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9821688702490066
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.836838824426906
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2502454161134446
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7915694920834873
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9607887777296715
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5723189178959627
tensor([1, 1,

Epoch 493/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1905187319578756
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9105152953752524
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5645876074000395
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.213987781079021
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8275954084748942
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.440720512557136
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.799544814984891
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.371926632809195
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8124692966751104
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.300737214005041
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.868058608998655
tensor([1, 1, 0,

Epoch 494/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3304732615046646
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4614706076325734
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.7300388666159767
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.1987427152740926
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.5599168863125048
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.027621217699787
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7857988522607045
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0577388257699933
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7256739638834118
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8466664982428724
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.088072831632247
tensor([1, 1

Epoch 495/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3851186077230753
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8212687151686497
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.115413065306322
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.2965934718373555
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8409193265260433
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9152864713918665
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.525104964703136
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6961107966028983
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.875058523018631
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.96079191915992
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.120925140490502
tensor([1, 1, 0,

Epoch 496/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1498933764561268
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0800320757981456
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2497182472746977
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3131485403005647
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.770164162866414
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8197512886115474
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7578041090638106
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8659457201563763
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.6890692785074624
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.047035805798631
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.865009246864047
tensor([1, 1,

Epoch 497/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.888765905450266
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.946863196571101
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.184860248685327
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.7633383388942674
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8684191611023508
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.008059133202487
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.81203198416018
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9790188790875347
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8264141122527735
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7760740948551144
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6633613596220291
tensor([1, 1, 0,

Epoch 498/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9140079979886977
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.291594795123435
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8034659802198023
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6993645015708796
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.937457064540838
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.965057572119915
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7605410109354387
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1043107132834766
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.380637628216284
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7133546126098111
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.664792110487513
tensor([1, 1, 0

Epoch 499/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.206245702312186
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7506620816668155
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6534860530378306
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.0587939246239104
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9363775893735347
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9496387858593023
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7377534170374234
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7852517340769647
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7997845446952354
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.018347361704221
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5428998611761047
tensor([1, 1

Epoch 500/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.743860099582835
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9407202261004457
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1306843017883232
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.888498631528435
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0892896800440695
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1238376375900243
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3973402086923
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8841677759055955
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.5271238975058474
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.088166831417912
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.712511659459382
tensor([1, 1, 0, 

Epoch 501/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4168791780288053
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9548442122340854
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9816896958752526
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.796621043151901
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7529672733067754
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.574061828010889
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6245020878160297
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.143625077827348
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.564018911671238
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7847921026808728
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0681848621423846
tensor([1, 1, 

Epoch 502/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2404578316376917
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3260165459203135
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6654776131605478
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.2742144957431285
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.3119047280111897
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8745259684617308
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0708541085972594
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.29733508472301
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9358741177221903
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7716163525185082
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.886043276264867
tensor([1, 1,

Epoch 503/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.244093762821198
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2940513084618317
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8464535108024154
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.502289044033708
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.4157787383781426
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.02497371898215
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.1763815454697233
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.658591231254971
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.239406704252822
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.852044246007891
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8716549157817637
tensor([1, 1, 0, 

Epoch 504/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7386764281521088
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2101323511301585
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8571085272387373
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.1779557066109314
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6872234510075634
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9204582213252375
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.3960645362781037
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2247918889695777
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.020760686230735
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9892348806182099
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9497330515172722
tensor([1, 

Epoch 505/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.5461165434438864
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.197656176474049
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6490432245786457
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9711933359123646
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6720719894014098
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9479435095201276
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8856135233806013
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.237514982025097
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.024298297761666
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7411311838345689
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8601039098481842
tensor([1, 1,

Epoch 506/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.243793707930524
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.888566792938744
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8953397350501826
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8519606916042863
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8647520235712238
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.770124036278097
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7084641912329825
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1460400612209423
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7551637612517497
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8714293684337298
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6683751206720416
tensor([1, 1,

Epoch 507/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.436211647077482
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9049698646770539
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2446296507501566
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.227935327043805
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.5325583902932842
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.525737495664562
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.660478391972114
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.692148389761106
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0404206675617593
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.3068700689323918
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0580672259708503
tensor([1, 1, 0

Epoch 508/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9024175743578042
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.023700895729684
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.092902192973726
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5921990153069814
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7627686809591494
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1380648365407375
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8516014195893504
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.6112812559042786
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.092330090193062
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.95628887503578
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.531024123522548
tensor([1, 1, 0,

Epoch 509/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1491949864871795
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4953335321471886
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3465641498507375
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6047177633512937
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.077614662182697
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2584408410367636
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.946015001631369
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3250020723948213
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5518255389489655
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.65442479506963
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9177266434524762
tensor([1, 1, 

Epoch 510/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.428434464571111
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.364391358801344
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.173116552740911
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.19188824380222
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1560921708196776
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0697035612838666
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.435318386652294
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.5170857280587917
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.852088504657462
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7956986910967008
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8031906289916309
tensor([1, 1, 0, 

Epoch 511/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.9277673996304903
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.076362713540476
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3697022058413317
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3798081093842884
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.130489458322766
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.605290976607589
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5282458723537715
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8510417337053275
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.898206231472831
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.957206168887346
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8304730243668466
tensor([1, 1, 0

Epoch 512/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1390781445702083
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9907542540266707
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.165788731839666
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.0992043430639926
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.860832199230627
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.674103302273231
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0531194010271103
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.624955975352012
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.657529106859131
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9784049917109188
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.777232625901318
tensor([1, 1, 0,

Epoch 513/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3965801134603946
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.5804971425939995
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3959521006305966
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.779031072150198
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9094982358658783
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2560479179925825
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.25583096358888
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.073586355985524
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.532340721771167
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8597233208735438
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6474415916464424
tensor([1, 1, 0

Epoch 514/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1983728441111143
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8807982861462595
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.845828094469158
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.858050914310865
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9948435150613895
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9842127789500372
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.317571170675728
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.5742270055274803
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.469764590112434
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8460208898278452
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8241155816546684
tensor([1, 1, 

Epoch 515/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3637507588113778
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4128999713768597
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2456964060303672
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.924085077488277
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.775203947164277
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0291929375900404
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.478459719320851
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.043036505530434
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.7488338690295047
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7211801011683163
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9699798620592337
tensor([1, 1, 

Epoch 516/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9356715303905894
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7272575240441852
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.0408865454425786
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.923639495461497
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9834149815168027
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.6880949616241174
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2807735967132037
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.201595243121773
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.4998361455187306
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.546267634857959
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7785220919577098
tensor([1, 1,

Epoch 517/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.053652689408925
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.917375714036628
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.469757877858397
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6956786628947085
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9822961888262292
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.7677060903916286
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9219767371160503
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.186499894231937
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9060834536074744
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.861074265686341
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.1462744541277354
tensor([1, 1, 0

Epoch 518/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.6608387416958164
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0190749017822616
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.204085915447092
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.559260842054175
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.062815528245728
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.7074125148442225
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.39535586401946
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8640255741799328
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6283154150708663
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7763977582434098
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6100910309119079
tensor([1, 1, 0

Epoch 519/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.672183053817826
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8704584299401177
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2002533272655147
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.054236524289375
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7251795443289075
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.056763984242676
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.589845175179998
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8555484349759404
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5719156446133074
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7557571935924325
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8114177381710919
tensor([1, 1, 

Epoch 520/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4875628008825412
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.313374742046216
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.671969960310446
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.118784650056842
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.045976132625056
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9212268624011135
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.128833068979269
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.173620649764921
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3628871150118074
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.012556621861155
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.820025162076332
tensor([1, 1, 0, 0

Epoch 521/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.925604029187938
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.90077083904513
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.876912153834014
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8524683206812047
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8658456982153102
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2357466014129446
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.3607941244550084
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2784818068877057
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8446434339582862
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9577033612577903
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.064462077055943
tensor([1, 1, 0

Epoch 522/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.287359080561028
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8208296200450516
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3846637458845334
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6536872601002073
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9721886959933543
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4183056654159802
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7392864858147066
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6520164804651825
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.005444457512484
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9847481896330132
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0817142539881357
tensor([1, 1

Epoch 523/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.259808396442873
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4836352646726043
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.644266173671344
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3408309920941335
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.093965588822437
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3207582570229572
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7442170775814905
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.769582771672317
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2139663795956133
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6668431039676062
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0467146966026766
tensor([1, 1, 

Epoch 524/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2236277847590054
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2656680039296986
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.272894259155993
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4225873441215775
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9465687261559586
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9958439335663303
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4187163014879878
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5748169162643406
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.227505362246213
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1392592848786567
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6678822900505654
tensor([1, 1

Epoch 525/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.876086664898197
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0779246596741068
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.052771108862021
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4314381504836278
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8995558268260542
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8515051610256779
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4505913879860133
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7326370884751583
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6521988306471544
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.108530489565529
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8442148132589635
tensor([1, 1,

Epoch 526/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.0882611216618523
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.906368087626674
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8393660957591473
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.9920177896615168
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.748090305548131
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1038353591150414
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.592432019800013
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8431560544259726
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5614728262868858
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7722828548443395
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8565199074840326
tensor([1, 1,

Epoch 527/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.740682285983027
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1023879669842755
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.6451589014217727
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3068086783660666
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1474315129516777
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.31704928243897
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.247389579505161
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9579797881487417
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7222492081865037
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7481431544941144
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.662541962189102
tensor([1, 1, 0

Epoch 528/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5413988656590787
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2115873414722595
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2732212098112754
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.453479321260913
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.008429317185099
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3396077403730584
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0508754700087497
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2615642270012644
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.354873203089602
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.380791570070269
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9333247067449122
tensor([1, 1, 

Epoch 529/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9048564633822265
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4553403538102954
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8370938608431584
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.219771906902073
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8405005198950395
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4695825576509347
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5750244627932504
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0802354857825143
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8507144540558356
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0215562687535455
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.065165519001372
tensor([1, 1

Epoch 530/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.063576540716413
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.955502697836521
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.366144730315542
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.909001054432357
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1403269111224814
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2250588077529576
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.818909295542687
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.203489564979082
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9516338984853574
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6633134875285565
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7138701590512657
tensor([1, 1, 0,

Epoch 531/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3310398242110604
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9365713960256001
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.0855184883002362
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.531981586518318
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.323755588802078
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8831720650016839
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3670673551362458
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7433688369659386
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.0617363581687957
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1521650888402863
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.307144006953272
tensor([1, 1,

Epoch 532/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7713453894607225
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5034629637433916
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0097447002225364
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.783588350649566
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0289126562634983
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.435285640924848
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.296101928070006
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.181256310679477
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2876741278921706
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5988847914982318
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.058354395331181
tensor([1, 1, 0

Epoch 533/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.459392216720924
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7512324502514993
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.7378867183776543
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8838767101781353
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8069812405198995
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8659640029493203
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.250732033366426
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.818101245107925
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.136516286591763
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9996543831734306
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5852719897720926
tensor([1, 1, 

Epoch 534/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.44337024674609
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.536282899346594
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.423196959515134
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.578123587694696
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8227562500813426
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.894253002519348
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.538180817753843
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.5030960854696307
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9018340522324406
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.037358990131611
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.475502894827324
tensor([1, 1, 0, 0,

Epoch 535/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.702282785986639
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4027422402241116
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3057179753446193
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8907008404030052
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.4934378841169558
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9981434149174793
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.247058984308627
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4096925486308374
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.470551956958085
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0317129619305874
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0357794885782226
tensor([1, 1,

Epoch 536/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.6487776999957826
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.049334817821302
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2112239856913076
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6425803515366724
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8357949281607633
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.398721418006627
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0664442204314177
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.1845714550872053
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9500704536480753
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0280594693765446
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.800101859392167
tensor([1, 1,

Epoch 537/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1764279717595394
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.052549553327401
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.46219373276446
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8327221194282934
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.5975480336554675
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.191250922014768
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9000861296837233
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.938859019322071
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2415376187110785
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9889829349857464
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7573797657409376
tensor([1, 1, 0

Epoch 538/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.216465778585589
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.848333864594977
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.467201918379274
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.2635607703255687
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1424022947363857
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.97064277132518
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0790473320420397
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.97990151895367
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6932291527317833
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6297318153513287
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.614187575476601
tensor([1, 1, 0, 0

Epoch 539/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.788520796389122
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.5840399559602982
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.0865827033629416
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.804012786441889
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6311461720889102
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.473093892600221
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0735697897369048
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.511373404510532
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.067387257734402
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9302787611370413
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5514272258101656
tensor([1, 1, 0

Epoch 540/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3749217053170755
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.021561321834907
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.7561531658411844
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.80931506445714
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9802325105655123
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2631767385972443
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.000030797699733
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.442404307233469
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6574303552291374
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.862422177700588
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.931408787212931
tensor([1, 1, 0, 

Epoch 541/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.114135151912615
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.00761033770168
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.146709540653008
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.549291150369562
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2511921463370936
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9057317526785222
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9354608670222622
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5691604516896205
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.17621515927148
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8370706352744703
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9063845754916988
tensor([1, 1, 0, 

Epoch 542/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.231491100513851
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3852526615107053
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5317550849727604
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5894335974444807
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8092262596242885
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.251195947307766
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.170624793077276
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8954491408393666
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.812775295021375
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.030259041209997
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5229073585304318
tensor([1, 1, 0

Epoch 543/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.110509301603381
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.7201089023001868
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3237910358736205
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.1181214701755215
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7543061217733213
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3944504507363478
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.568596198049131
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9384588287840234
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6006210716749987
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7173674521985025
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.966801626084615
tensor([1, 1,

Epoch 544/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.6631454554501275
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.04619567000132
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.751335515519881
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.342857326610849
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.313088538916981
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9700225334276469
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7058771063161826
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.479212477074935
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.838573120870003
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7390296249105905
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.027377891083161
tensor([1, 1, 0, 0

Epoch 545/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.94309498293273
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7217699239542161
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2626315054305026
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8790337250742866
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.827317283816191
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0842989916372363
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.776274427191434
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.5454582829180485
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5685399548487915
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1853542861292583
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.1872112040402905
tensor([1, 1, 

Epoch 546/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4071864036882147
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2731549511113034
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.7399898893314893
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.0057860886697014
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6281801207883297
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.782268430607222
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7356748956430663
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9143133426568166
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.838749351174576
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.051633539798689
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9571472756341965
tensor([1, 1,

Epoch 547/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5596203544050082
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8990779647233598
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.402744212066649
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.015912557981245
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.867843597579571
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0237455256787906
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.64346302523504
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2157406093101923
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.955196661794764
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.094152084411874
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5735557753059577
tensor([1, 1, 0, 

Epoch 548/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.655136928852608
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9334795927281059
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.5631743864154095
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.59888845558672
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9422019458495159
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.936877082520049
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.848776277144793
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.94135189697367
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.037320735268656
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.767031563989833
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.598400080952376
tensor([1, 1, 0, 0, 

Epoch 549/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.98896169525443
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.282657775522581
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.424331958157819
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.955206779969931
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9222439612256068
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.372728375160582
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.1511057903727355
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.200181992552229
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9307881362072288
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8200433359296573
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9292929054840342
tensor([1, 1, 0, 

Epoch 550/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.548438253152295
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0416888440479752
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.332999156035104
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.8442244739504536
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1728015972719277
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.794880440190001
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.66445898124617
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2170711792478053
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.086771081818445
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.814686313216446
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7806204587230023
tensor([1, 1, 0, 

Epoch 551/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.877878293855284
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0330860102606496
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.272132012345563
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.363435401959444
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.552018933223094
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.314963985626473
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.2420285799558712
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9260639821104437
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.141339579761341
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.602821168480062
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.205067135386619
tensor([1, 1, 0, 0

Epoch 552/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1925532886915606
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2202768015063605
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1284387440557553
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3370789995929035
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9697630552068728
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2200480277780263
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.699926629980403
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.101982398785176
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8309091889008657
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.015512536556077
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.0684677148583646
tensor([1, 1,

Epoch 553/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3657747755119978
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9280276388836208
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.069351830768608
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.3747352186243775
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0221108524222244
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9973116090800682
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.1953334557167468
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.777158562173562
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6621791681719014
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9911347675020954
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6047784659314228
tensor([1, 1

Epoch 554/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5312290833331534
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.6915050062191788
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1765610522605745
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.2429601217838533
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.82435857981098
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.476902342255499
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.773105356015396
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3337021991217815
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1824382071751556
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.796319460765014
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.485776335498682
tensor([1, 1, 0,

Epoch 555/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.365791634138568
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9798222739931022
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.424214551698037
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.981674286762668
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7687102716498417
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8350845214633156
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.458577163797658
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8725504856047217
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.219638697403727
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.156256652864703
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9298596271369322
tensor([1, 1, 0,

Epoch 556/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3077401496206464
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.165465949857803
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3831228246951173
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.107723737013534
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9092034883670022
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.194772825665257
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.974332011481869
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.387128273215954
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.4303340730790812
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5439141017908207
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9373483620308127
tensor([1, 1, 0

Epoch 557/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.101515092558328
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9528288205844289
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.0224329810209922
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6715108519489132
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1410229718078644
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.859894970207312
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.639603261633655
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.18003553825116
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8788005453629677
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.745956584107313
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8700296043578286
tensor([1, 1, 0,

Epoch 558/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.651446758177134
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2916435175533683
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9520136602345932
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5118243752416114
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.183713563636985
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.000438528579433
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.867670858644987
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.953748253158486
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.2329924575363753
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.2083480836444873
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9466923451385718
tensor([1, 1, 0

Epoch 559/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.868356708928178
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.014621056618077
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8412274261592634
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.787091336782457
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9889995348494398
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1439174026393575
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.842483488310075
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.064612931525532
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6995395775205306
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9079977575743972
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8790877260887566
tensor([1, 1, 0

Epoch 560/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.522349514200094
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.27489219212694
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8674869269083034
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.3520170245970906
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.509084073151075
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.466092948575955
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.788515962683283
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8923202604241047
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6661234749277485
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6779080675604112
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.4571315394020292
tensor([1, 1, 0,

Epoch 561/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.97237115356642
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9656327870186137
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.236034116384259
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.9122199307831207
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7433820728949276
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.349203229262573
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.1784616558323777
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.382076822070913
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.700333850428715
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7327889347618899
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6071001658759987
tensor([1, 1, 0,

Epoch 562/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3545177744445684
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1532080873979034
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.9100101735963395
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4557742614603657
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.3010549334754593
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.051131152759534
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4113523812167417
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9176036812384964
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.429884336936221
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.037114360974567
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5024659745073072
tensor([1, 1,

Epoch 563/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5062330686254377
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0605770427020333
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3841738902502505
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.597431426430959
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.6130032376325631
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.074018936153875
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9674901956777273
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6824690467660175
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.628374263473456
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6767428437938003
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.769137085259768
tensor([1, 1, 

Epoch 564/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5642807292320455
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4274920331491137
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3429561201717712
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.715377810905197
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8262401467449219
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.7243279279248618
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.433381781197983
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.465808744370767
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2195968978431293
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.670670396084166
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7406416654816215
tensor([1, 1, 

Epoch 565/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.7775216016319075
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.872697731411602
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3211263511719102
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.2849037060190645
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.5625579650640105
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8597108656860148
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7238285530904545
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.303448516501
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6253915347848853
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7971045636726521
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6842655760329466
tensor([1, 1, 0

Epoch 566/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5177203730279007
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0111455039098707
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.293337214212188
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.339886361512041
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.250786156444318
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.927000555469876
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9494305642178387
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.566773951426823
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6860019772070576
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8514225284002603
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6981974969115483
tensor([1, 1, 0

Epoch 567/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4766076357375706
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1815296095569674
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1231799588249776
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.2924746323310035
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9530324667007353
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.6471149758936583
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8357013033319287
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4217983786238375
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.042788537138755
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8068899541839187
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8906690432106374
tensor([1, 

Epoch 568/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9507453974362683
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.584166143542598
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3938164549271765
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.1304354356372466
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9596105246590105
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.193155225063056
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3540277742905737
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7470138421444243
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9935549173350635
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.834329127149878
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.005157445295192
tensor([1, 1, 

Epoch 569/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.132933614125793
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.742436780205992
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2198755528301977
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.2879098503739295
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.793795825495622
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2694535117016565
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0485123342981018
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.4120707227909386
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4873402779705645
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9215092723750888
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9922071281035045
tensor([1, 1,

Epoch 570/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4164443252662444
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4773893120006174
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.814116763286404
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.640628661072904
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8781980520482104
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5888073923918347
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9938523686131777
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3714010903758607
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.203229938812146
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0621754638676046
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9121725290534402
tensor([1, 1,

Epoch 571/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5492106117227755
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1807227940069946
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.029753746734607
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.0126267764669716
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9454431718185998
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0726324347056617
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.307992624232333
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0856105523952544
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6765752754299035
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9088084298007746
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6481790921207957
tensor([1, 1

Epoch 572/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.3132856458909705
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7996474696620977
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4140906471959247
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.055162034440497
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8560984938480705
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.901215955202297
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.60024758839431
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9166822670182735
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.596903750192388
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.83108062155969
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5943107377159011
tensor([1, 1, 0, 

Epoch 573/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.193105728383282
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2495864064051037
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.066361437298344
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.551749080392901
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.050559546058034
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.365103515018083
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7816927721404996
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.767824933310966
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8906693541471267
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.773899333471035
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5897682419640784
tensor([1, 1, 0, 

Epoch 574/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.846153419670182
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9884827726066503
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2342693673004943
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4560202089944774
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.956291329249624
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1452195239646006
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6686081181175867
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.755518839584261
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.096889508262015
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.0808006321612504
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8385545276383242
tensor([1, 1, 

Epoch 575/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.442956017338482
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.054509406662401
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.310153270822456
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5865372371247486
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7783443378131212
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.8176521463637338
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.4846097439399135
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5517779502999534
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.3267827850720586
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7315665796359638
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6624211475955057
tensor([1, 1,

Epoch 576/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.258415866550056
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.86731599936444
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2770796137961113
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6595866231766587
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.731615433339434
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.143176840135781
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.2252141153553886
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.912552996984805
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.845458271338052
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.5807639258990895
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6152258161135877
tensor([1, 1, 0, 

Epoch 577/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.991066997856704
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7997230830143423
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.17138265038048
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.2832990907032844
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8108041525376721
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.308961071746796
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.733368398951711
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7344600877252785
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8991925112285593
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.107555648464479
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.1010406173907694
tensor([1, 1, 0,

Epoch 578/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5731488130386406
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.3023047864540294
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.6028649649655793
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.342890934578193
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0458476169755815
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.051345676128607
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.95874314863605
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.187674040680975
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6151585148854286
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7699135034510194
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.5878359366370263
tensor([1, 1, 0

Epoch 579/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.361384575076265
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.194175283839019
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.7265814872066307
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 1.9318409940719499
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.633509489003023
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.42758207406151
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7881815989214056
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.5095310762407856
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.6406108045672876
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8642585086412153
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.040947860893665
tensor([1, 1, 0,

Epoch 580/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.55712037278535
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7185607749797889
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.263415875904946
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4499362753658973
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7127312684084446
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9139668527263616
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8120877919818845
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8706945622655837
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.658719230380305
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7594925053866628
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.270808368126066
tensor([1, 1, 0

Epoch 581/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.187783304792021
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9981072355303824
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.0069123519077015
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.6469062079709307
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.920254934025692
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.1403791891237933
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.18810069033981
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.882526091353162
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.248874830124728
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7197492347489733
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.983659621652879
tensor([1, 1, 0, 

Epoch 582/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.0057070054563404
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0586539249455917
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3980302591224802
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.398265739354329
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8680291230490829
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.962227863965371
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.290049453542409
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.5526849793000093
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.5854522124672883
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.665216330052932
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.2749903530568107
tensor([1, 1, 

Epoch 583/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.7960208575380014
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0200221899503825
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.8361259807938901
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.8064193646250066
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8448085888249421
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.413194503294528
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5284066892596004
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4436779297504616
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.923690909850243
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7391608478264367
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.120300864007562
tensor([1, 1,

Epoch 584/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2755980886927256
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0542632242498193
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.107839649387651
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.156925493645666
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8496766950959942
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.5495968928736947
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9892929711306238
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0531091712395684
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.917267166289968
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.865843547326255
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8666686300204824
tensor([1, 1, 

Epoch 585/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.1774374707306614
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.2460563300845804
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 1.5371128135451702
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5047159111577453
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.1465306163661455
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.2193416054028736
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3306734868897534
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3977486289029257
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.974778462873225
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7629679195175791
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7638174060472707
tensor([1, 

Epoch 586/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.6046954307163053
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1861781225534505
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4257228090003755
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.5111402018471423
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.701619761644793
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.710323151250403
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.997656307939165
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3591400275924532
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.862457464709921
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8063327561218794
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9983531209074934
tensor([1, 1, 

Epoch 587/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.25849838475054
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.6728027446776914
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.345441445669001
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.456010791707902
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.333830700274085
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4608855859433056
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.864372877346328
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.041439834478053
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1240731050264214
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7330098650660022
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.97285290508641
tensor([1, 1, 0, 0,

Epoch 588/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.4916706802095083
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.02952358282412
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2060599822158635
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.065827313939606
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8658149678763114
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.7127391350119066
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.5208624340930306
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0053628034185933
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.128663741802722
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6131847095205438
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.90409007249692
tensor([1, 1, 0,

Epoch 589/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.067128373442211
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0602545621936903
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.925167503585003
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.144991072849625
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.969603699750684
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.9196631581106256
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8795620306935175
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9891210225775713
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1737656731601507
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7507273261094354
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.7001599505576308
tensor([1, 1, 

Epoch 590/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.31855732676699
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.7685735029506235
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.172358555464822
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.301515849579806
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7958842705518159
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.155067794340193
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.0513368345715963
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.879948423004855
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.948549911808921
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7495979746818549
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.202919726252592
tensor([1, 1, 0, 0

Epoch 591/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.5728290162568035
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1970918543910978
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.4003412495175467
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.626389257996808
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2554082090864838
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.0183415682260617
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.973315889245779
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.066573884482024
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.873514993744709
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9285343391082037
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.725200960243616
tensor([1, 1, 0

Epoch 592/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.73920783035942
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8495563395615833
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.169597278033728
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.740754574049339
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.00089214551881
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9820716194258994
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.052812022116275
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.0422993506835843
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4288019536744527
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 2.1948586183335292
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.030810102363816
tensor([1, 1, 0, 0

Epoch 593/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.894561827694385
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.8413255786671672
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.39766688146887
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 3.1753790358806055
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9232980612597321
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.142766941633168
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 3.079573560858332
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.7706604178804137
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1924936148298158
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7802390736206881
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.734739092343157
tensor([1, 1, 0,

Epoch 594/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.679573795925017
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.089472643046375
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.1935499658873816
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.889265694447913
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.7409914427723996
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.6871767879096633
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4255514927855737
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.8386742154268836
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.9451956557865593
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.9614477451894863
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.9016016646119598
tensor([1, 1,

Epoch 595/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.2481284679089995
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.1124228827660962
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.010842079052979
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.6970093709785288
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.8344347735744289
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.082084039812509
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.6111434434522574
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9625851440426967
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8972193298765414
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7719023741034117
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.608622305442957
tensor([1, 1,

Epoch 596/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.0820617611157823
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.82899960160829
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.378231518138998
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.280168100756904
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.2998948658876364
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 1.976093765206917
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4174483052036337
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.316903146556853
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.8654903156681337
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.8828081347249603
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.280531905656814
tensor([1, 1, 0, 

Epoch 597/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.979634464431872
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.562237321163169
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.3483411146652324
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.0855669540610164
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.9015873080089158
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.9876463542596574
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.4472846052561144
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.596203399763102
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.1148474789458507
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7634557265143769
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.8234175204425882
tensor([1, 1,

Epoch 598/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.267002077165086
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 1.9639504503005774
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.114356984584047
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.4012576269109207
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.0419959086992114
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.3995259402596725
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.852280778282008
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.3733210686757813
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.578906119780568
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6890964761154545
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.116764354848153
tensor([1, 1, 0

Epoch 599/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 4.473581651634564
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.07324907458245
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.2443158871625104
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.1792479029065146
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 2.349371133746907
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.491545217211541
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.8208146612191234
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 3.2317232359264816
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.4673836224875125
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.6389847398483228
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 1.6176399833612054
tensor([1, 1, 0

Epoch 600/600 - Training:   0%|          | 0/29 [00:00<?, ?it/s]

tensor([0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 3.9051805314146426
tensor([1, 0, 0, 0, 0, 0, 1, 1, 1, 1])
torch.Size([24])
Gradient norm: 2.0205520245579325
tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.466817182311929
tensor([1, 0, 0, 1, 1, 1, 0, 0, 1, 0])
torch.Size([24])
Gradient norm: 2.744554214209602
tensor([0, 1, 0, 1, 1, 0, 1, 1, 0, 1])
torch.Size([24])
Gradient norm: 1.652677347018679
tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.071578419059116
tensor([1, 0, 0, 0, 1, 0, 0, 1, 1, 0])
torch.Size([24])
Gradient norm: 2.752053842581713
tensor([0, 1, 1, 0, 0, 1, 0, 1, 0, 0])
torch.Size([24])
Gradient norm: 2.9774921367129537
tensor([1, 0, 0, 1, 0, 0, 0, 1, 1, 1])
torch.Size([24])
Gradient norm: 3.103189388041927
tensor([1, 1, 1, 0, 0, 1, 1, 0, 0, 1])
torch.Size([24])
Gradient norm: 1.7674544357943576
tensor([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])
torch.Size([24])
Gradient norm: 2.056018222290116
tensor([1, 1, 0, 

In [22]:
torch.save(model.state_dict(), "techinical_indicator_cnn.pth")

In [23]:
torch.manual_seed(42)
model = CNNTA(1, 80, (6, 6), 2, 2 ).to(device)
model.load_state_dict(torch.load("techinical_indicator_cnn.pth"))
model.to(device)
model.eval()

predicted_val = []
true_val = []
loss = 0
total = 0 

with torch.no_grad():
    
    for batch_x, batch_y in tqdm(dataset_loader_test):
    
        predicted = model(batch_x.to(device))
        values, pred = torch.max(predicted, dim=1)
        
        loss = criterion(predicted, batch_y.to(device))
        loss += batch_x.to(device).size(0)*loss
        total += batch_x.to(device).size(0)
        
        predicted_val.append(pred)
        true_val.append(batch_y.to(device))
        
    aver_loss = loss / total
    print(f"Loss: {aver_loss: .3f}")
all_preds = torch.cat(predicted_val).cpu().numpy()
all_true = torch.cat(true_val).cpu().numpy()

    

TypeError: CNNTA.__init__() missing 1 required positional argument: 'dropout'

In [None]:
precision = precision_score(all_true, all_preds, average='macro')
recall = recall_score(all_true, all_preds, average='macro')
f1 = f1_score(all_true, all_preds, average='macro')
print(f"Precision: {precision} | Recall: {recall}| f1_score: {f1}")