### Installation

In [None]:
!pip install -qqq torchtyping hypothesis pytest git+https://github.com/chalk-diagrams/chalk
!wget -q https://github.com/srush/Tensor-Puzzles/raw/main/lib.py

In [None]:
from lib import draw_examples, make_test, run_test
import torch
import numpy as np
from torchtyping import TensorType as TT
tensor = torch.tensor

In [None]:
def ones_spec(out):
  # ignore, for testing only
  for i in range(len(out)):
      out[i] = 1

def sum_spec(a, out):
  # ignore, for testing only
  out[0] = 0
  for i in range(len(a)):
      out[0] += a[i]

def test_sum_output_format(f):
  a = arange(10)
  t = f(a)
  assert len(t.shape) == 1 and t.shape[0]==1, f"expected shape torch.Size([1]), got {t.shape}"


# Guess the output

In [None]:
list1 = [[1, 2, 3], [4, 5, 6]]
list2 = list1.copy()
list2[0][1] = 'X'
list2[1] = ['A', 'B', 'C']
#print(list1)
#print(list2)

#### More guessing?

In [None]:
x = 10
def outer():
    x = 20
    def inner():
        global x
        x = 30
        print("inner:", x)
    inner()
    print("outer:", x)
print("global:", x)
outer()
print("final:", x)

# Tensor Puzzles
- by [Sasha Rush](http://rush-nlp.com) - [srush_nlp](https://twitter.com/srush_nlp) (with Marcos Treviso)

## Rules


1. Each puzzle needs to be solved in 1 line (<80 columns) of code.
2. You are allowed @, arithmetic, comparison, `shape`, any indexing (e.g. `a[:j], a[:, None], a[arange(10)]`), and previous puzzle functions.
3. You are *not allowed* anything else. No `view`, `sum`, `take`, `squeeze`, `tensor`.
4. These puzzles sometimes use broadcasting

5. You can start with this function:

In [None]:
def arange(i: int):
    return torch.tensor(range(i))

draw_examples("arange", [{"" : arange(i)} for i in [5, 3, 9]])

In [None]:
# Example of broadcasting.
a = arange(4)
b = arange(5)[:, None]
draw_examples("broadcast", [{"a": a, "b":b, "ret": a + b}])

## Puzzle 1 - ones

Compute [ones](https://numpy.org/doc/stable/reference/generated/numpy.ones.html) - the vector of all ones.

In [None]:
def ones(i: int) -> TT["i"]:
  #TODO
  # Hint: use arange
  raise NotImplementedError

test_ones = make_test("one", ones, ones_spec, add_sizes=["i"])

In [None]:
run_test(test_ones)

###### Solution

In [None]:
def ones_solution(i: int) -> TT["i"]:
  return arange(i+1)[1:] - arange(i)

## Puzzle 2 - sum

Compute [sum](https://numpy.org/doc/stable/reference/generated/numpy.sum.html) - the sum of a vector.

In [None]:
def sum(a: TT["i"]) -> TT[1]:
  raise NotImplementedError

test_sum_output_format(sum)
test_sum = make_test("sum", sum, sum_spec)

In [None]:
run_test(test_sum)

## Puzzle 3 - outer

Compute [outer](https://numpy.org/doc/stable/reference/generated/numpy.outer.html) - the outer product of two vectors.

In [None]:
def outer(a: TT["i"], b: TT["j"]) -> TT["i", "j"]:
    raise NotImplementedError

def outer_spec(a, b, out):
  # for testing
  for i in range(len(out)):
      for j in range(len(out[0])):
          out[i][j] = a[i] * b[j]

test_outer = make_test("outer", outer, outer_spec)

In [None]:
run_test(test_outer)

## Puzzle 4 - diag

Compute [diag](https://numpy.org/doc/stable/reference/generated/numpy.diag.html) - the diagonal vector of a square matrix.

In [None]:
def diag(a: TT["i", "i"]) -> TT["i"]:
    raise NotImplementedError

def diag_spec(a, out):
  # for testing
  for i in range(len(a)):
      out[i] = a[i][i]


test_diag = make_test("diag", diag, diag_spec)

In [None]:
run_test(test_diag)

## Puzzle 5 - eye

Compute [eye](https://numpy.org/doc/stable/reference/generated/numpy.eye.html) - the identity matrix.

In [None]:
def eye(j: int) -> TT["j", "j"]:
    raise NotImplementedError

def eye_spec(out):
  # for testing
  for i in range(len(out)):
      out[i][i] = 1

test_eye = make_test("eye", eye, eye_spec, add_sizes=["j"])

In [None]:
run_test(test_eye)

#More puzzles?

## Puzzle 6 - triu

Compute [triu](https://numpy.org/doc/stable/reference/generated/numpy.triu.html) - the upper triangular matrix.

In [None]:
def triu_spec(out):
    for i in range(len(out)):
        for j in range(len(out)):
            if i <= j:
                out[i][j] = 1
            else:
                out[i][j] = 0

def triu(j: int) -> TT["j", "j"]:
    raise NotImplementedError


test_triu = make_test("triu", triu, triu_spec, add_sizes=["j"])

In [None]:
# run_test(test_triu)

## Puzzle 7 - cumsum

Compute [cumsum](https://numpy.org/doc/stable/reference/generated/numpy.cumsum.html) - the cumulative sum.

In [None]:
def cumsum_spec(a, out):
    total = 0
    for i in range(len(out)):
        out[i] = total + a[i]
        total += a[i]

def cumsum(a: TT["i"]) -> TT["i"]:
    raise NotImplementedError

test_cumsum = make_test("cumsum", cumsum, cumsum_spec)

In [None]:
# run_test(test_cumsum)

# Free coding!

You can move to your favorite IDE and use any LLM you like!

In [None]:
import torch
import torch.nn as nn
from torch.nn import TransformerEncoder, TransformerEncoderLayer

class TabPFNTransformer(nn.Module):
    """
    Implement a transformer-based model that predicts test labels given both training and test data.

    The model should:
    1. Take training features, test features, and training labels as input (assume everything is numerical and preprocessed)
    2. Process both training and test data jointly using transformer architecture
    3. Output predictions for test samples in 1 forward pass

    Input shapes:
        X_train: (1, n_train, n_features) For now we assume n_features is fixed.
        X_test: (1, n_test, n_features)
        y_train: (1, n_train)

    Output shape:
        y_pred: (1, n_test) - predictions for test samples
    """

    def __init__(self, n_features: int, d: int):
        super().__init__()
        self.n_features = n_features
        self.d = d
        self.transformer = TransformerEncoder(
            TransformerEncoderLayer(d, nhead=4, dim_feedforward=d*2,
                                    batch_first=True, dropout=0.0),
            num_layers=2
        ) #(batch_size, seq_len, d) --> (batch_size, seq_len, d)

        #TODO?

    def forward(self, X_train, X_test, y_train):
      #TODO
      pass

#### Test that it's running

In [None]:
X = torch.rand(1, 4, 10)
y = torch.randint(0, 2, (1, 4))
X_test = torch.rand(1, 3, 10)

model = TabPFNTransformer(10, 128)
model(X, X_test, y)

In [None]:
# @title Test indépendence
torch.manual_seed(42)  # Set seed for reproducibility
X_test1 = torch.rand(1, 3, 10)
X_test2 = torch.rand(1, 3, 10)
X_test_combined = torch.cat([X_test1, X_test2], dim=1)

# Run model on separate and combined test sets
model = TabPFNTransformer(10, 128)
torch.manual_seed(42)  # Reset seed before model initialization
pred1 = model(X, X_test1, y)
torch.manual_seed(42)  # Reset seed for consistent initialization
pred2 = model(X, X_test2, y)
torch.manual_seed(42)  # Reset seed again
pred_combined = model(X, X_test_combined, y)

#Check if predictions match when run separately vs together
print("Predictions match?", torch.allclose(
    torch.cat([pred1, pred2], dim=1),
    pred_combined,
    rtol=1e-4
))
