In [1]:
from datetime import datetime as dt
from itertools import chain

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from torch.utils.tensorboard import SummaryWriter

# Load and preprocess the dataset (assuming you have a CSV file)
df = pd.read_csv("../data/diamonds.csv")
df.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [2]:
df.columns

Index(['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price', 'x', 'y',
       'z'],
      dtype='object')

In [3]:
cat_columns = ["cut", "color", "clarity"]
num_columns = ["carat", "depth", "table", "x", "y", "z"]
cat_values = pd.unique(df[cat_columns].values.ravel("K"))
target_column = "price"
tokens = list(
    chain(
        cat_values,
        cat_columns,
        num_columns,
        ["PAD", "[NUMERIC_MASK]", "[MASK]"],
        [target_column],
    )
)
token_dict = {token: i for i, token in enumerate(tokens)}

In [4]:
embedding = nn.Embedding(len(token_dict), 64)
cat_values_emb = torch.tensor(
    [[token_dict[token] for token in row] for row in df[cat_columns].values],
    dtype=torch.long,
)

col_names_emb = torch.tensor([token_dict[col] for col in df.columns], dtype=torch.long)
embedding(col_names_emb).shape

torch.Size([10, 64])

In [5]:
X = df.drop("price", axis=1)
y = df["price"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Preprocess categorical features
X_train_cat = X_train[cat_columns].copy()
X_test_cat = X_test[cat_columns].copy()

label_encoders = {}
for col in cat_columns:
    le = LabelEncoder()
    X_train_cat[col] = X_train_cat[col].map(token_dict)
    X_test_cat[col] = X_test_cat[col].map(token_dict)
    # label_encoders[col] = le

# Preprocess numeric features

scaler = StandardScaler()
X_train_num = scaler.fit_transform(X_train[num_columns].copy())
X_test_num = scaler.transform(X_test[num_columns].copy())

X_train_cat_tensor = torch.tensor(
    X_train_cat.values, dtype=torch.int64
)  # Use int64 dtype for categorical indices
X_train_num_tensor = torch.tensor(X_train_num, dtype=torch.float32)
X_test_cat_tensor = torch.tensor(
    X_test_cat.values, dtype=torch.int64
)  # Use int64 dtype for categorical indices
X_test_num_tensor = torch.tensor(X_test_num, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

In [6]:
X_test_cat

Unnamed: 0,cut,color,clarity
1388,0,10,17
50052,3,9,16
41645,0,5,16
42377,1,5,16
17244,0,5,12
...,...,...,...
44081,3,5,14
23713,3,9,16
31375,2,10,17
21772,0,9,14


In [7]:
input_tensor = torch.randn(10, 6)
repeated_tensor = input_tensor.unsqueeze(1).repeat(1, 4, 1)

print(repeated_tensor.shape)

torch.Size([10, 4, 6])


In [8]:
X_train_num_tensor[0:10, :].unsqueeze(2).shape

torch.Size([10, 6, 1])

In [9]:
test_mat = torch.rand(3, 4, 5)
test_mat

tensor([[[0.0636, 0.8199, 0.6281, 0.3663, 0.6602],
         [0.1109, 0.7213, 0.5941, 0.9433, 0.3055],
         [0.1782, 0.3100, 0.0353, 0.2705, 0.1160],
         [0.4531, 0.0281, 0.4697, 0.0220, 0.4760]],

        [[0.3182, 0.3856, 0.6484, 0.1468, 0.0170],
         [0.9080, 0.3354, 0.5932, 0.9195, 0.1159],
         [0.1039, 0.7766, 0.1595, 0.1899, 0.0124],
         [0.8718, 0.6338, 0.5656, 0.9191, 0.6315]],

        [[0.7445, 0.0606, 0.9957, 0.6818, 0.9350],
         [0.3552, 0.2315, 0.9873, 0.0667, 0.6262],
         [0.7692, 0.8921, 0.2996, 0.6761, 0.6816],
         [0.4189, 0.6757, 0.7876, 0.0845, 0.2168]]])

In [10]:
test_mat[0, 0], test_mat[0, 0] * 10

(tensor([0.0636, 0.8199, 0.6281, 0.3663, 0.6602]),
 tensor([0.6361, 8.1994, 6.2809, 3.6626, 6.6018]))

In [11]:
(X_train_num_tensor[0:10, :].shape, X_train_num_tensor[0:10, :].unsqueeze(2).shape)

(torch.Size([10, 6]), torch.Size([10, 6, 1]))

In [12]:
scalars = torch.tensor([10, 2, 3, 4])
scalars = scalars.unsqueeze(1).unsqueeze(0)
scalars.shape

torch.Size([1, 4, 1])

In [13]:
double_test_mat = scalars * test_mat
double_test_mat

tensor([[[0.6361, 8.1994, 6.2809, 3.6626, 6.6018],
         [0.2218, 1.4426, 1.1882, 1.8866, 0.6110],
         [0.5345, 0.9301, 0.1058, 0.8116, 0.3479],
         [1.8125, 0.1124, 1.8787, 0.0880, 1.9041]],

        [[3.1816, 3.8555, 6.4840, 1.4676, 0.1699],
         [1.8161, 0.6708, 1.1864, 1.8389, 0.2318],
         [0.3117, 2.3298, 0.4784, 0.5696, 0.0372],
         [3.4872, 2.5353, 2.2625, 3.6764, 2.5261]],

        [[7.4446, 0.6058, 9.9571, 6.8184, 9.3498],
         [0.7105, 0.4629, 1.9747, 0.1333, 1.2523],
         [2.3077, 2.6762, 0.8987, 2.0282, 2.0449],
         [1.6757, 2.7030, 3.1503, 0.3378, 0.8671]]])

In [14]:
double_test_mat[0, 0]

tensor([0.6361, 8.1994, 6.2809, 3.6626, 6.6018])

In [15]:
class MultiHeadAttention(nn.Module):
    def __init__(self, d_model, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model
        self.d_head = d_model // num_heads

        self.q_linear = nn.Linear(d_model, d_model)
        self.k_linear = nn.Linear(d_model, d_model)
        self.v_linear = nn.Linear(d_model, d_model)
        self.out_linear = nn.Linear(d_model, d_model)

    def forward(self, q, k, v, mask=None):
        batch_size = q.size(0)

        q = (
            self.q_linear(q)
            .view(batch_size, -1, self.num_heads, self.d_head)
            .transpose(1, 2)
        )
        k = (
            self.k_linear(k)
            .view(batch_size, -1, self.num_heads, self.d_head)
            .transpose(1, 2)
        )
        v = (
            self.v_linear(v)
            .view(batch_size, -1, self.num_heads, self.d_head)
            .transpose(1, 2)
        )

        attn_output, _ = self.scaled_dot_product_attention(q, k, v, mask)

        attn_output = (
            attn_output.transpose(1, 2).contiguous().view(batch_size, -1, self.d_model)
        )
        out = self.out_linear(attn_output)
        return out

    def scaled_dot_product_attention(self, q, k, v, mask=None):
        matmul_qk = torch.matmul(q, k.transpose(-2, -1))
        d_k = q.size(-1)
        scaled_attention_logits = matmul_qk / (d_k**0.5)

        if mask is not None:
            scaled_attention_logits += mask * -1e9

        attention_weights = F.softmax(scaled_attention_logits, dim=-1)
        output = torch.matmul(attention_weights, v)

        return output, attention_weights


class TransformerEncoderLayer(nn.Module):
    def __init__(self, d_model, num_heads):
        super(TransformerEncoderLayer, self).__init__()

        self.multi_head_attention = MultiHeadAttention(d_model, num_heads)

        self.feed_forward = nn.Sequential(
            nn.Linear(d_model, 4 * d_model),
            nn.ReLU(),
            nn.Linear(4 * d_model, d_model),
        )

        self.layernorm1 = nn.LayerNorm(d_model)
        self.layernorm2 = nn.LayerNorm(d_model)

    def forward(self, q, k, v, mask=None):
        attn_output = self.multi_head_attention(q, k, v, mask)
        out1 = self.layernorm1(q + attn_output)

        ff_output = self.feed_forward(out1)
        out2 = self.layernorm2(out1 + ff_output)

        return out2


# Parameters
d_model = 64  # Embedding dimension
num_heads = 4  # Number of attention heads
seq_len_q = 10  # Sequence length for the query tensor
seq_len_k = 20  # Sequence length for the key tensor
batch_size = 32  # Batch size

# Random data
q = torch.rand((batch_size, seq_len_q, d_model))
k = torch.rand((batch_size, seq_len_k, d_model))
v = k  # Usually, value and key are the same in many applications

# Model
encoder_layer = TransformerEncoderLayer(d_model, num_heads)

# Forward pass
output = encoder_layer(q, k, v)
print("Output shape:", output.shape)

Output shape: torch.Size([32, 10, 64])


In [16]:
torch.tensor(1)

tensor(1)

In [75]:
class TabTransformer(nn.Module):
    def __init__(
        self,
        tokens,
        numeric_col_tokens,
        cat_col_tokens,
        token_dict,
        d_model=64,
    ):
        super(TabTransformer, self).__init__()
        self.d_model = d_model
        self.tokens = tokens
        self.token_dict = token_dict
        # Masks
        self.cat_mask_token = torch.tensor(self.token_dict["[MASK]"])
        self.numeric_mask_token = torch.tensor(self.token_dict["[NUMERIC_MASK]"])

        self.col_tokens = cat_col_tokens + numeric_col_tokens
        self.n_tokens = len(tokens)  # TODO Make this
        # Embedding layers for categorical features
        self.embeddings = nn.Embedding(self.n_tokens, self.d_model)
        self.n_numeric_cols = len(numeric_col_tokens)
        self.n_cat_cols = len(cat_col_tokens)
        self.n_columns = self.n_numeric_cols + self.n_cat_cols
        # self.numeric_embeddings = NumericEmbedding(d_model=self.d_model)
        self.col_indices = torch.tensor(
            [self.tokens.index(col) for col in self.col_tokens], dtype=torch.long
        )
        self.numeric_indices = torch.tensor(
            [self.tokens.index(col) for col in numeric_col_tokens], dtype=torch.long
        )
        self.transformer_encoder = TransformerEncoderLayer(d_model, num_heads=4)

        self.regressor = nn.Sequential(
            nn.Linear(d_model, d_model * 2),
            nn.ReLU(),
            nn.Linear(d_model * 2, 1),
            nn.ReLU(),
        )

        self.mlm_decoder = nn.Sequential(
            nn.Linear(d_model, self.n_tokens)
        )  # TODO try making more complex

        self.mnm_decoder = nn.Sequential(
            nn.Linear(self.n_columns * self.d_model, 128),  # Try making more complex
            nn.ReLU(),
            nn.Linear(128, 6),
        )

        self.flatten_layer = nn.Linear(len(self.col_tokens), 1)

    def forward(self, num_inputs, cat_inputs, mnm=None, mlm=None, task="regression"):
        # Embed column indices
        repeated_col_indices = self.col_indices.unsqueeze(0).repeat(
            num_inputs.size(0), 1
        )
        col_embeddings = self.embeddings(repeated_col_indices)

        repeated_numeric_indices = self.numeric_indices.unsqueeze(0).repeat(
            num_inputs.size(0), 1
        )
        numeric_col_embeddings = self.embeddings(repeated_numeric_indices)

        if mlm is not None:
            # print(f"cat_inputs.shape: {cat_inputs.shape}, mlm.shape: {mlm.shape}")
            cat_inputs.masked_fill_(mlm, self.cat_mask_token)  # `_` is inplace.

        cat_embeddings = self.embeddings(cat_inputs)
        expanded_num_inputs = num_inputs.unsqueeze(2).repeat(1, 1, 64)
        num_embeddings = numeric_col_embeddings * expanded_num_inputs

        if mnm is not None:
            # print(
            #     f"mnm.shape: {mnm.shape}, num_embeddings.shape: {num_embeddings.shape}"
            # )
            # print(num_embeddings.sum())
            numeric_mask_embedding = self.embeddings(self.numeric_mask_token)
            # numeric_mask_embedding = torch.ones_like(numeric_mask_embedding)
            # numeric_mask_embedding = torch.zeros_like(numeric_mask_embedding)
            num_embeddings[mnm] = numeric_mask_embedding
            # print(num_embeddings.sum())

        query_embeddings = torch.cat([cat_embeddings, num_embeddings], dim=1)
        out = self.transformer_encoder(
            col_embeddings,
            query_embeddings,
            query_embeddings
            # col_embeddings, query_embeddings, query_embeddings
        )
        if task == "regression":
            out = self.regressor(out)
            out = self.flatten_layer(out.squeeze(-1))

            return out
        elif task == "mlm":
            cat_out = self.mlm_decoder(out)
            # print(f"Out shape: {out.shape}, cat_out shape: {cat_out.shape}")
            numeric_out = out.view(out.size(0), -1)
            # print(f"numeric_out shape: {numeric_out.shape}")
            numeric_out = self.mnm_decoder(numeric_out)
            return cat_out, numeric_out
        else:
            raise ValueError(f"Task {task} not supported.")


no_price_tokens = tokens.copy()
no_price_tokens.remove("price")

numeric_col_tokens = (
    df.head().drop("price", axis=1).select_dtypes(include=np.number).columns.to_list()
)
cat_col_tokens = df.head().select_dtypes(exclude=np.number).columns.to_list()

model = TabTransformer(
    no_price_tokens,
    numeric_col_tokens=numeric_col_tokens,
    cat_col_tokens=cat_col_tokens,
    token_dict=token_dict,
)
batch_size = 3
mnm = torch.rand(batch_size, X_train_num_tensor.size(1)) > 0.8
mlm = torch.rand(batch_size, X_train_cat_tensor.size(1)) > 0.8
with torch.no_grad():
    x = model(
        X_train_num_tensor[0:batch_size, :],
        X_train_cat_tensor[0:batch_size, :],
        mnm,
        mlm,
        task="mlm",
    )
x[0].shape, x[1].shape

Out shape: torch.Size([3, 9, 64]), cat_out shape: torch.Size([3, 9, 32])
numeric_out shape: torch.Size([3, 576])


(torch.Size([3, 9, 32]), torch.Size([3, 6]))

In [70]:
A = torch.rand(3, 31, 64)
A.reshape(3, 64, -1).shape

torch.Size([3, 64, 31])

In [51]:
model.n_numeric_cols

6

In [None]:
# Masked Tabualr Modeling
epochs = 20
batch_size = 1000
lr = 0.001
mse_loss = nn.MSELoss()
ce_loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

model_time = dt.now()
model_time = model_time.strftime("%Y-%m-%dT%H:%M:%S")
model_name = f"Regression.8_{model_time}"

summary_writer = SummaryWriter("runs/" + model_name)

numeric_column_t
batch_count = 0
model.train()
for epoch in range(epochs):
    for i in range(0, X_train_num_tensor.size(0), batch_size):
        num_inputs = X_train_num_tensor[i : i + batch_size, :]
        cat_inputs = X_train_cat_tensor[i : i + batch_size, :]
        mlm = torch.rand(cat_inputs.shape) > 0.8  # Greater than 1 for testing
        mnm = torch.rand(num_inputs.shape) > 0.8
        # mlm = None
        # mnm = None
        optimizer.zero_grad()
        cat_preds, numeric_preds = model(num_inputs, cat_inputs, mnm=mnm, mlm=mlm)
        cat_targets = torch.cat(cat_inputs, model.numeric_indices.s)
        cat_loss = ce_loss(cat_preds, cat_inputs)
        loss.backward()
        optimizer.step()
        batch_count += 1
        learning_rate = optimizer.param_groups[0]["lr"]
        summary_writer.add_scalar("Loss/train", loss.item(), batch_count)
        summary_writer.add_scalar("Metrics/LearningRate", learning_rate, batch_count)
        if batch_count % 100 == 0:
            print(f"Epoch {epoch+1}/{epochs} Loss: {loss.item():,.2f}")

In [80]:
model.numeric_indices.shape

torch.Size([6])

In [87]:
torch.cat((cat_inputs, model.numeric_indices.expand(cat_inputs.size(0), -1)), dim=1)

tensor([[ 3,  9, 15,  ..., 26, 27, 28],
        [ 0, 11, 15,  ..., 26, 27, 28],
        [ 3,  8, 15,  ..., 26, 27, 28],
        ...,
        [ 3,  9, 19,  ..., 26, 27, 28],
        [ 1,  7, 13,  ..., 26, 27, 28],
        [ 1,  9, 13,  ..., 26, 27, 28]])

In [84]:
model.numeric_indices.expand(cat_inputs.size(0), -1).shape

torch.Size([152, 6])

In [81]:
cat_inputs.shape

torch.Size([152, 3])

In [18]:
# Regression Model

epochs = 20
batch_size = 1000
lr = 0.001
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

model_time = dt.now()
model_time = model_time.strftime("%Y-%m-%dT%H:%M:%S")
model_name = f"just_mnm.8_{model_time}"

summary_writer = SummaryWriter("runs/" + model_name)


batch_count = 0
model.train()
for epoch in range(epochs):
    for i in range(0, X_train_num_tensor.size(0), batch_size):
        num_inputs = X_train_num_tensor[i : i + batch_size, :]
        cat_inputs = X_train_cat_tensor[i : i + batch_size, :]
        # mlm = torch.rand(cat_inputs.shape) > 0.8  # Greater than 1 for testing
        mnm = torch.rand(num_inputs.shape) > 0.8
        mlm = None
        # mnm = None
        optimizer.zero_grad()
        y_pred = model(num_inputs, cat_inputs, mnm=mnm, mlm=mlm)
        loss = loss_fn(y_pred, y_train_tensor[i : i + batch_size, :])
        loss.backward()
        optimizer.step()
        batch_count += 1
        learning_rate = optimizer.param_groups[0]["lr"]
        summary_writer.add_scalar("Loss/train", loss.item(), batch_count)
        summary_writer.add_scalar("Metrics/LearningRate", learning_rate, batch_count)
        if batch_count % 100 == 0:
            print(f"Epoch {epoch+1}/{epochs} Loss: {loss.item():,.2f}")

Epoch 3/20 Loss: 25,652,796.00
Epoch 5/20 Loss: 19,682,204.00
Epoch 7/20 Loss: 5,207,685.00
Epoch 10/20 Loss: 1,074,898.25
Epoch 12/20 Loss: 809,297.25
Epoch 14/20 Loss: 589,495.50
Epoch 16/20 Loss: 436,240.62
Epoch 19/20 Loss: 441,002.25


In [19]:
test_shape = X_train_num_tensor[i : i + batch_size, :].shape

In [20]:
(torch.rand(test_shape) > 0.8).requires_grad

False

In [21]:
# epochs = 40
# batch_size = 1000
# lr = 0.5
# loss_fn = nn.MSELoss()
# optimizer = optim.Adam(model.parameters(), lr=lr)
# batch_count = 0
# model.train()
# for epoch in range(epochs):
#     for i in range(0, X_train_num_tensor.size(0), batch_size):
#         optimizer.zero_grad()
#         y_pred = model(
#             X_train_num_tensor[i : i + batch_size, :],
#             X_train_cat_tensor[i : i + batch_size, :],
#         )
#         loss = loss_fn(y_pred, y_train_tensor[i : i + batch_size, :])
#         loss.backward()
#         optimizer.step()
#         batch_count += 1
#         if batch_count % 100 == 0:
#             print(f"Epoch {epoch+1}/{epochs} Loss: {loss.item():,.2f}")

In [22]:
A = torch.zeros(4, 4, 4)
B = torch.ones(4, 4)

# Insert B into the slice where the first dimension is 2
A[2, :, :] = B

print(A)

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])


In [23]:
with torch.no_grad():
    y_pred = model(X_test_num_tensor[0:10, :], X_test_cat_tensor[0:10, :])
    loss = loss_fn(y_pred, y_test_tensor[0:10])
    print(f"Test loss: {loss.item():,.2f}")

Test loss: 1,131,268.25


In [24]:
for i in range(10):
    print(
        f"Predicted: {y_pred[i].item():,.2f} Actual: {y_test_tensor[i].item():,.2f}",
        f"Diff: {y_pred[i].item() - y_test_tensor[i].item():,.2f}",
    )

Predicted: 659.33 Actual: 559.00 Diff: 100.33
Predicted: 2,475.55 Actual: 2,201.00 Diff: 274.55
Predicted: 1,317.27 Actual: 1,238.00 Diff: 79.27
Predicted: 1,623.84 Actual: 1,304.00 Diff: 319.84
Predicted: 9,862.08 Actual: 6,901.00 Diff: 2,961.08
Predicted: 4,085.95 Actual: 3,011.00 Diff: 1,074.95
Predicted: 1,895.64 Actual: 1,765.00 Diff: 130.64
Predicted: 1,961.49 Actual: 1,679.00 Diff: 282.49
Predicted: 2,296.45 Actual: 2,102.00 Diff: 194.45
Predicted: 5,818.79 Actual: 4,789.00 Diff: 1,029.79


In [25]:
for i in range(10):
    print(f"Predicted: {y_pred[i].item():,.2f} Actual: {y_test_tensor[i].item():,.2f}")

Predicted: 659.33 Actual: 559.00
Predicted: 2,475.55 Actual: 2,201.00
Predicted: 1,317.27 Actual: 1,238.00
Predicted: 1,623.84 Actual: 1,304.00
Predicted: 9,862.08 Actual: 6,901.00
Predicted: 4,085.95 Actual: 3,011.00
Predicted: 1,895.64 Actual: 1,765.00
Predicted: 1,961.49 Actual: 1,679.00
Predicted: 2,296.45 Actual: 2,102.00
Predicted: 5,818.79 Actual: 4,789.00


In [26]:
for i in range(10):
    print(f"Predicted: {y_pred[i].item():,.2f} Actual: {y_test_tensor[i].item():,.2f}")

Predicted: 659.33 Actual: 559.00
Predicted: 2,475.55 Actual: 2,201.00
Predicted: 1,317.27 Actual: 1,238.00
Predicted: 1,623.84 Actual: 1,304.00
Predicted: 9,862.08 Actual: 6,901.00
Predicted: 4,085.95 Actual: 3,011.00
Predicted: 1,895.64 Actual: 1,765.00
Predicted: 1,961.49 Actual: 1,679.00
Predicted: 2,296.45 Actual: 2,102.00
Predicted: 5,818.79 Actual: 4,789.00


In [27]:
# Predicted: 688.47 Actual: 559.00
# Predicted: 2,547.17 Actual: 2,201.00
# Predicted: 1,044.95 Actual: 1,238.00
# Predicted: 1,790.95 Actual: 1,304.00
# Predicted: 10,160.45 Actual: 6,901.00
# Predicted: 3,790.90 Actual: 3,011.00
# Predicted: 1,729.77 Actual: 1,765.00
# Predicted: 1,749.63 Actual: 1,679.00
# Predicted: 2,328.92 Actual: 2,102.00
# Predicted: 5,928.75 Actual: 4,789.00

In [28]:
torch.rand(3, 4, 5) > 0.8

tensor([[[False, False, False, False, False],
         [False,  True,  True, False, False],
         [False, False, False, False,  True],
         [False, False, False, False,  True]],

        [[False,  True, False,  True,  True],
         [False, False,  True, False, False],
         [ True, False,  True, False,  True],
         [ True, False, False, False, False]],

        [[False,  True, False, False, False],
         [False, False, False, False,  True],
         [False, False, False,  True, False],
         [False, False,  True, False, False]]])

In [29]:
torch.rand()

TypeError: rand() received an invalid combination of arguments - got (), but expected one of:
 * (tuple of ints size, *, torch.Generator generator, tuple of names names, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
 * (tuple of ints size, *, torch.Generator generator, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
 * (tuple of ints size, *, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
 * (tuple of ints size, *, tuple of names names, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
