In [1]:
import pandas as pd
import cupy as np
import cupyx.scipy as sp


In [2]:
train = pd.read_csv(
    'G:\My Drive\ML\\ai-masters-ds-contest-2022\\train2022.csv')
test = pd.read_csv('G:\My Drive\ML\\ai-masters-ds-contest-2022\\test2022.csv')
train.set_index('id', inplace=True)
test.set_index('id', inplace=True)
X = train.iloc[:, :402]
y = train.iloc[:, 402:]


In [3]:
from matplotlib import pyplot as plt
%matplotlib inline


def row2pics(X_r, y_r, test_vals=False):
    a1 = np.array(X_r[2:]).reshape((20, 20))
    a2 = np.array(y_r).reshape((20, 20))
    plt.imshow(a1, interpolation='nearest')
    plt.show()
    plt.imshow(a2, interpolation='nearest')
    plt.show()


In [4]:
from cupyx.scipy.signal import convolve2d

'---------------------CONSTANTS---------------------'
n = 20
alpha = np.tile(np.floor(np.arange(n) / (n / 3)) + 1, (n, 1))
beta = alpha.T + 1
alpha, beta = np.minimum(alpha, beta), np.maximum(alpha, beta)
gamma = (alpha + beta) / 2
'---------------------------------------------------'


def make_step(X):
    nbrs_count = convolve2d(X, np.ones(
        (3, 3)), mode='same', boundary='wrap') - X
    return ((nbrs_count <= beta) & (nbrs_count >= gamma)) | (X & (nbrs_count >= alpha) & (nbrs_count <= gamma))


def makeksteps(X, k=3):
    for i in range(k):
        X = make_step(X)
    return (X)


def init_pole(n):
    return np.random.randint(2, size=(n, n))


In [135]:
def generate_data(k:int=1, n_lines:int = 50000):
    # порождение обучающей выборки
    n = 20  # число строк и столбцов на поле
    X = []
    y = []
    for t in np.arange(n_lines):
        k = 1
        X0 = init_pole(n=n)  # инициализация (КОД НЕ ДАН)
        X1 = makeksteps(X0.copy(), k=k)  # сделать k шагов
        X.append(np.array(X1))
        y.append(np.array(X0))
    X = np.array(X).reshape((-1, 1, 20, 20))
    y = np.array(y).reshape((-1, 1, 20, 20))
    return X, y


In [None]:
X_train, y_train = generate_data()

Creating CNN model

Main sequel nn class

In [163]:
from cupyx.scipy import signal


class NN:
    def __init__(self, layers: list, loss_func: str) -> None:
        global mse, mse_prime, binary_crossentropy, binary_crossentropy_prime
        self.network = layers
        self.loss_func = {
            'mse': mse,
            'binary_crossentropy': binary_crossentropy
        }[loss_func]

        self.loss_func_prime = {
            'mse': mse_prime,
            'binary_crossentropy': binary_crossentropy_prime
        }[loss_func]

    def fit(self, X: np.ndarray, Y: np.ndarray, learning_rate: float, n_epochs: int, OD_iter: int = 10, delta=0.0001, opimizer: str = 'adam') -> None:
        best_model = None
        min_error = np.inf
        counter = 0
        for e in np.arange(n_epochs):
            error = 0
            for x, y in zip(X, Y):
                output = x
                for layer in self.network:
                    output = layer.forward(output)

                error += self.loss_func(y, output)
                grad = self.loss_func_prime(y, output)

                for layer in self.network[::-1]:
                    grad = layer.backward(grad, learning_rate)

            error /= len(X)
            counter += 1
            if error < min_error:
                min_error = error
                best_model = self.network.copy()
                if error + delta < min_error:
                    counter = 0

            if OD_iter:
                if counter == OD_iter:
                    break

            print(f"Epoch {e+1}/{n_epochs}: error = {error}")

        self.network = best_model

    def predict_r(self, X_r: np.ndarray) -> np.ndarray:
        for layer in self.network:
            X_r = layer.forward(X_r)
        return X_r

    def predict(self, X: np.ndarray) -> np.ndarray:
        return np.array([np.array(self.predict_r(X[i])) for i in np.arange(X.shape[0])])


Base classes + optimizers

In [8]:
class Optimizer:
    def __init__(self, weights, learning_rate: float) -> None:
        self.learning_rate = learning_rate

    def backward_pass(self, gradients: np.ndarray) -> np.ndarray:
        pass


class Layer:
    def __init__(self) -> None:
        self.input = None
        self.output = None

    def forward(self, input: np.ndarray) -> np.ndarray:
        pass

    def backward(self, output_gradient: np.ndarray, learning_rate: float, optimizer: Optimizer = None) -> np.ndarray:
        pass


class SGD(Optimizer):
    def __init__(self, weights, learning_rate: float) -> None:
        self.weights = weights
        self.learning_rate = learning_rate

    def backward_pass(self, gradients: np.ndarray) -> np.ndarray:
        self.weights -= self.learning_rate * gradients
        return self.weights


class Adam(Optimizer):
    def __init__(self, weights, alpha=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.alpha = alpha
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.m = 0
        self.v = 0
        self.t = 0
        self.theta = weights

    def backward_pass(self, gradient):
        self.t = self.t + 1
        self.m = self.beta1*self.m + (1 - self.beta1)*gradient
        self.v = self.beta2*self.v + (1 - self.beta2)*(gradient**2)
        m_hat = self.m/(1 - self.beta1**self.t)
        v_hat = self.v/(1 - self.beta2**self.t)
        self.theta -= self.alpha*(m_hat/(np.sqrt(v_hat) - self.epsilon))
        return self.theta



Main Layers

In [9]:
class Dense(Layer):
    def __init__(self, input_size, output_size, optimizer: 'str' = 'adam') -> None:
        self.optimizer = {'adam': Adam,
                          'sgd': SGD
                          }[optimizer]
        self.weights = np.random.randn(output_size, input_size)
        self.w_optimizer = self.optimizer(self.weights)

        self.bias = np.random.randn(output_size, 1)
        self.b_optimizer = self.optimizer(self.bias)

    def forward(self, input: np.ndarray) -> np.ndarray:
        self.input = input
        return np.dot(self.weights, self.input) + self.bias

    def backward(self, output_gradient: np.ndarray, learning_rate: float) -> np.ndarray:
        weidhts_gradient = np.dot(output_gradient, self.input.T)
        input_gradient = np.dot(self.weights.T, output_gradient)
        self.weights = self.w_optimizer.backward_pass(weidhts_gradient)
        self.bias = self.b_optimizer.backward_pass(output_gradient)
        return input_gradient


class Convolutional(Layer):
    def __init__(self, input_shape, kernel_size: int, depth: int, padding: bool = True, optimizer: str = 'adam') -> None:
        input_depth, input_height, input_width = input_shape
        self.input_shape = input_shape
        self.input_depth = input_depth
        self.kernel_size = kernel_size
        self.depth = depth
        self.output_shape = (depth, input_height, input_width) if padding else (
            depth, input_height - kernel_size + 1, input_width - kernel_size + 1)
        self.kernels_shape = (depth, input_depth, kernel_size, kernel_size)
        self.kernels = np.random.randn(*self.kernels_shape)
        self.biases = np.random.randn(*self.output_shape)

        self.optimizer = {'adam': Adam,
                          'sgd': SGD
                          }[optimizer]
        self.kernels_optimizer = self.optimizer(self.kernels)
        self.biases_optimizer = self.optimizer(self.biases)

    def forward(self, input: np.ndarray) -> np.ndarray:
        self.input = input
        self.output = np.copy(self.biases)

        for i in range(self.depth):
            for j in range(self.input_depth):
                # self.output[i] += signal.convolve2d(self.input[j], self.kernels[i, j], mode='valid') [j] removed bcs only 1 input
                self.output[i] += signal.correlate2d(
                    self.input[j], self.kernels[i, j], mode='same')
        return self.output

    def backward(self, output_gradient: np.ndarray, learning_rate: float) -> np.ndarray:
        kernels_gradient = np.zeros(self.kernels_shape)
        input_gradient = np.zeros(self.input_shape)

        for i in range(self.depth):
            for j in range(self.input_depth):
                # kernels_gradient[i, j] = signal.correlate2d(self.input[j], output_gradient[i], mode='valid') [j] removed bcs only 1 input
                kernels_gradient[i, j] = signal.correlate2d(
                    self.input[j], output_gradient[i], mode='valid')
                input_gradient[j] += signal.convolve2d(
                    output_gradient[i], self.kernels[i, j], mode='same')

        self.kernels = self.kernels_optimizer.backward_pass(kernels_gradient)
        self.biases = self.biases_optimizer.backward_pass(output_gradient)
        return input_gradient




Util layers

In [10]:
class Reshape(Layer):
    def __init__(self, input_shape, output_shape) -> None:
        self.input_shape = input_shape
        self.output_shape = output_shape

    def forward(self, input: np.ndarray) -> np.ndarray:
        return np.reshape(input, self.output_shape)

    def backward(self, output_gradient: np.ndarray, learning_rate: float) -> np.ndarray:
        return np.reshape(output_gradient, self.input_shape)


class MaxPool2d(Layer):
    def __init__(self, input_shape, kernel_size: int, padding: bool = True) -> None:
        self.input_shape = input_shape
        self.kernel_size = kernel_size
        self.output_shape = input_shape if padding else (
            input_shape[0], input_shape[1] - kernel_size + 1, input_shape[2] - kernel_size + 1)
        self.padding = padding

    def forward(self, input: np.ndarray) -> np.ndarray:
        self.input = input
        self.output = np.zeros(self.output_shape)
        for i in np.arange(self.input_shape[0]):
            for j in np.arange(self.input_shape[1]):
                for k in np.arange(self.input_shape[2]):
                    self.output[i, j//self.kernel_size, k//self.kernel_size] = np.max(
                        self.input[i, j:j+self.kernel_size, k:k+self.kernel_size])
        return self.output

    def backward(self, output_gradient: np.ndarray, learning_rate: float) -> np.ndarray:
        input_gradient = np.zeros(self.input_shape)
        for i in range(self.input_shape[0]):
            for j in range(self.input_shape[1]):
                for k in range(self.input_shape[2]):
                    input_gradient[i, j:j+self.kernel_size, k:k+self.kernel_size] += (self.input[i, j//self.kernel_size, k//self.kernel_size]
                                                                                      == self.input[i, j:j+self.kernel_size, k:k+self.kernel_size])*output_gradient[i, j//self.kernel_size, k//self.kernel_size]
        return input_gradient


class BatchNormalization(Layer):
    def __init__(self, input_shape, epsilon=1e-8):
        self.input_shape = input_shape
        self.epsilon = epsilon
        self.gamma = np.ones(input_shape)
        self.beta = np.zeros(input_shape)
        self.gamma_optimizer = Adam(self.gamma)
        self.beta_optimizer = Adam(self.beta)

    def forward(self, input: np.ndarray) -> np.ndarray:
        self.input = input
        self.mean = np.mean(input, axis=0)
        self.std = np.std(input, axis=0)
        self.output = (input - self.mean) / (self.std + self.epsilon)
        return self.output * self.gamma + self.beta

    def backward(self, output_gradient: np.ndarray, learning_rate:float) -> np.ndarray:
        self.gamma = self.gamma_optimizer.backward_pass(output_gradient)
        self.beta = self.beta_optimizer.backward_pass(output_gradient)
        return (output_gradient * self.gamma) / (self.std + self.epsilon)


Loss functions

In [70]:
def mse(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)


def mse_prime(y_true, y_pred):
    return 2 * (y_pred - y_true)/np.size(y_true)


def binary_crossentropy(y_true, y_pred):
    return np.mean(np.maximum(y_pred, 0) - y_pred * y_true + np.log(1 + np.exp(-np.abs(y_pred))))


def binary_crossentropy_prime(y_true, y_pred):
    return (-(y_true / y_pred) + ((1 - y_true) / (1 - y_pred)))/np.size(y_true)


Activation layers

In [12]:
class Activation(Layer):
    def __init__(self, activation, activation_prime) -> None:
        self.activation = activation
        self.activation_prime = activation_prime

    def forward(self, input: np.ndarray) -> np.ndarray:
        self.input = input
        return self.activation(self.input)

    def backward(self, output_gradient: np.ndarray, learning_rate: float) -> np.ndarray:
        return np.multiply(output_gradient, self.activation_prime(self.input))


class Tanh(Activation):
    def __init__(self) -> None:
        def tanh(x): return np.tanh(x)
        def tanh_prime(x): return 1 - np.tanh(x) ** 2
        super().__init__(tanh, tanh_prime)


class Sigmoid(Activation):
    def __init__(self) -> None:
        def sigmoid(x): return 1 / (1 + np.exp(-x))
        def sigmoid_prime(x): return sigmoid(x) * (1 - sigmoid(x))
        super().__init__(sigmoid, sigmoid_prime)


class ReLU(Activation):
    def __init__(self) -> None:
        def relu(x): return np.maximum(0, x)
        def relu_prime(x): return (x > 0).astype(float)
        super().__init__(relu, relu_prime)


class LeakyReLU(Activation):
    def __init__(self) -> None:
        def leaky_relu(x): return np.maximum(0.01 * x, x)
        def leaky_relu_prime(x): return 1.0 if x > 0.0 else 0.01
        leaky_relu_prime = np.vectorize(leaky_relu_prime)
        super().__init__(leaky_relu, leaky_relu_prime)
        super().__init__(leaky_relu, leaky_relu_prime)


In [13]:
np.cuda.Device(0).use() # Set the GPU to use (RTX 2060m)


<CUDA Device 0>

Решения - CNN в 2 вариантах (микшировал слои выше для sequal нейронки). Обучаем модели смотреть на шаг назад (генерим свои данные по данному образцу). На каждое количество шагов лучше использовать отдельную аналогичную нейронку, т.к. при повторном использовании одной модели ошибка на отдельном поле увеличивается с каждым шагом.

Свёрточные слои склеиваю batch normalization для общего импрувмента и LeakyRelu в качестве активационной функции. На конце сигмоид т.к. нужен вывод вероятности от 0 до 1, жива ли клетка поля. loss func - mse, т.к. binary crossentropy оказалась нестабильной (даже не смотря на её улучшенную версию, кою подглядел у pytorch)

В обоих случаях задействован adam оптимайзер, т.к., судя по статьям и общему опыту комьюнити, это 1 из лучших вариантов. 

В model3 заместо двух свёрточных слоёв используются обычные нейроны

Для обучения используются только numpy и scipy (их cupy версии для задействования gpu)

In [22]:
model3 = NN([
    Convolutional((1, 20, 20), kernel_size=5, depth=1, padding=True),
    LeakyReLU(),
    BatchNormalization((1, 20, 20)),
    Reshape((1, 20, 20), (400,1)),
    
    Dense(400, 800),
    LeakyReLU(),
    BatchNormalization((800, 1)),
    Dense(800, 400),
    Reshape((400,1), (20, 20)),
    Sigmoid()


], loss_func='mse')
model3.fit(X_train, y_train, n_epochs=1000, learning_rate=0.1, OD_iter=10)


Epoch 1/1000: error = 0.4781859898580645
Epoch 2/1000: error = 0.4684473940275236
Epoch 3/1000: error = 0.46609941598441773
Epoch 4/1000: error = 0.4403620032883539
Epoch 5/1000: error = 0.37174852528836394
Epoch 6/1000: error = 0.26226437611798226
Epoch 7/1000: error = 0.26102450419918627
Epoch 8/1000: error = 0.2603947483814405
Epoch 9/1000: error = 0.3615033868173209


In [24]:
model4 = NN([
    Convolutional((1, 20, 20), kernel_size=5, depth=4, padding=True),
    LeakyReLU(),
    BatchNormalization((4, 20, 20)),
    Convolutional((4, 20, 20), kernel_size=5, depth=4, padding=True),
    LeakyReLU(),
    BatchNormalization((4, 20, 20)),

    Convolutional((4, 20, 20), kernel_size=5, depth=1, padding=True),
    Sigmoid()

], loss_func='mse')
model4.fit(X_train, y_train, n_epochs=1000, learning_rate=0.1, OD_iter=10)

Epoch 1/1000: error = 0.2789834721333651
Epoch 2/1000: error = 0.25138456139426757
Epoch 3/1000: error = 0.2491737707802211
Epoch 4/1000: error = 0.24850790139201964
Epoch 5/1000: error = 0.2486578783235552
Epoch 6/1000: error = 0.24849978804541978
Epoch 7/1000: error = 0.24857417241287275
Epoch 8/1000: error = 0.24872210398244343
Epoch 9/1000: error = 0.24902836379247772


In [137]:
models4 = [model4]
models3 = [model3]

for i in np.arange(2, 6):
    X_train_k, y_train_k = generate_data(i)
    model3_k = NN([
        Convolutional((1, 20, 20), kernel_size=5, depth=1, padding=True),
        LeakyReLU(),
        BatchNormalization((1, 20, 20)),
        Reshape((1, 20, 20), (400, 1)),

        Dense(400, 800),
        LeakyReLU(),
        BatchNormalization((800, 1)),
        Dense(800, 400),
        Reshape((400, 1), (20, 20)),
        Sigmoid()
    ], loss_func='mse')
    
    model4_k = NN([
        Convolutional((1, 20, 20), kernel_size=5, depth=4, padding=True),
        LeakyReLU(),
        BatchNormalization((4, 20, 20)),
        Convolutional((4, 20, 20), kernel_size=5, depth=4, padding=True),
        LeakyReLU(),
        BatchNormalization((4, 20, 20)),

        Convolutional((4, 20, 20), kernel_size=5, depth=1, padding=True),
        Sigmoid()

    ], loss_func='mse')
    
    model4_k.fit(X_train_k, y_train_k, n_epochs=1000, learning_rate=0.1, OD_iter=10)
    models4.append(model4_k)
    
    model3_k.fit(X_train_k, y_train_k, n_epochs=1000, learning_rate=0.1, OD_iter=10)
    models3.append(model3_k)

Epoch 1/1000: error = 0.40332036923221154
Epoch 2/1000: error = 0.31640590199607127
Epoch 3/1000: error = 0.28528640615610107
Epoch 4/1000: error = 0.2840457466478288
Epoch 5/1000: error = 0.27294998655715436
Epoch 6/1000: error = 0.25321410873429495
Epoch 7/1000: error = 0.2472476280012612
Epoch 8/1000: error = 0.24663503377930956
Epoch 9/1000: error = 0.24631448991732227
Epoch 1/1000: error = 0.4770091727841832
Epoch 2/1000: error = 0.43422753064228176
Epoch 3/1000: error = 0.4249286029130695
Epoch 4/1000: error = 0.42054326557459104
Epoch 5/1000: error = 0.4392488404603194
Epoch 6/1000: error = 0.4500645196900783
Epoch 7/1000: error = 0.43850816164515827
Epoch 8/1000: error = 0.4366221794640462
Epoch 9/1000: error = 0.4296046475993485
Epoch 1/1000: error = 0.41980640348486764
Epoch 2/1000: error = 0.4267258008198453
Epoch 3/1000: error = 0.43171171443088446
Epoch 4/1000: error = 0.43839332534342595
Epoch 5/1000: error = 0.4400362884109987
Epoch 6/1000: error = 0.4414597221258722
Epo

In [187]:
def submit(models: list[NN], X_test= test) -> None:
    submission = pd.DataFrame()
    for i in range(1, 6):
        X_k = test.drop('regime', axis=1)
        indexes = X_k[X_k['steps']==i].index
        y_pred = pd.DataFrame()
        y_pred['id'] = indexes
        X_k = np.array(X_k[X_k['steps']==i].drop('steps', axis=1).to_numpy()).reshape((-1, 1, 20, 20))
        sos = pd.DataFrame([np.asnumpy(models[i-1].predict_r(X_k[j]).reshape((400))) for j in np.arange(X_k.shape[0])], columns=[f'y_{i}' for i in range(400)])
        
        y_pred =  pd.concat([y_pred, sos], axis=1)
        submission = pd.concat([submission, y_pred], axis=0)
    
    return submission
        

In [189]:
sub1 = submit(models3, test)
sub2 = submit(models4, test)

In [196]:
sub1.to_csv('submission_1.csv', index=False)
sub2.to_csv('submission_2.csv', index=False)

Int64Index([    0,     1,     2,     3,     4,     5,     6,     7,     8,
                9,
            ...
            49990, 49991, 49992, 49993, 49994, 49995, 49996, 49997, 49998,
            49999],
           dtype='int64', name='id', length=50000)

In [179]:
sub1.shape

(0, 0)