In [1]:
import torch
import matplotlib.pyplot as plt

## Data

### Constants

In [2]:
N = 20
FACTOR = 1 / torch.sqrt(torch.tensor(N))
K = 11

In [3]:
NUM_SAMPLES = 2000

In [4]:
def generate_data_1() -> torch.Tensor:
    """
    Returns one sample of data from distribution D_A^(1)
    """
    return torch.sign((torch.rand(N) * 2. - 1.)) * FACTOR

In [5]:
def generate_data_2(imp_cols: torch.Tensor) -> torch.Tensor:
    """
    Returns one sample of data from distribution D_A^(2)

    Arg:
        imp_cols (torch.Tensor): Tensor of columns which are significant in the distribution
    """
    x = torch.sign((torch.rand(N) * 2. - 1.)) * FACTOR
    sign = torch.sign((torch.rand(1) * 2. - 1.))
    for col in imp_cols:
        x[col] = sign * FACTOR
    return x

In [6]:
from numpy.random import choice

A = choice(range(N), K, False)
A

array([ 2,  1, 12, 10,  5,  9, 15, 13, 14, 11, 19])

In [7]:
def get_y_from_data(x: torch.Tensor, imp_cols: torch.Tensor) -> torch.Tensor:
    """
    Returns label y (0 or 1) given a single data point x

    Args:
        x (torch.Tensor): Data tensor
        imp_cols (torch.Tensor): Significant columns from the data
    """
    y = torch.tensor(1.)
    for col in imp_cols:
        y *= torch.sign(x[col])
    if y <= 0:
        y = torch.tensor(0.)
    return y

### Generating data

In [8]:
from sklearn.model_selection import train_test_split

In [9]:
function_choices = torch.round(torch.rand(NUM_SAMPLES))

In [10]:
X = torch.cat([generate_data_1().reshape(1, -1) if function_choices[i] == 0 else generate_data_2(A).reshape(1, -1) for i in range(NUM_SAMPLES)])
X.shape

torch.Size([2000, 20])

In [11]:
Y = torch.cat([get_y_from_data(x, A).reshape(1, -1) for x in X])
Y.shape

torch.Size([2000, 1])

In [12]:
X_training, X_test, Y_training, Y_test = train_test_split(X, Y, test_size=0.2)
X_train, X_val, Y_train, Y_val = train_test_split(X_training, Y_training, test_size=0.25)
X_train.shape, Y_train.shape, X_val.shape, Y_val.shape, X_test.shape, Y_test.shape

(torch.Size([1200, 20]),
 torch.Size([1200, 1]),
 torch.Size([400, 20]),
 torch.Size([400, 1]),
 torch.Size([400, 20]),
 torch.Size([400, 1]))

## Models

### Neural network

In [13]:
device = 'cpu'

In [None]:
# Maybe it's finally time to make a models script and migrate SimpleNN code there