In [1]:
from typing import Optional
import torch
from torch import nn
from torch.nn import functional as F
from torchvision import datasets
from torchvision.transforms import v2 as transforms
import numpy as np
import wandb
import time
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
generator = torch.Generator().manual_seed(42)
np.random.seed(42)

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [4]:

class GaussianPosterior(nn.Module):
    LOG_SQRT_2PI = 0.5 * np.log(2 * np.pi)

    def __init__(self, mu, rho):
        super().__init__()

        self.mu = nn.Parameter(mu)
        self.rho = nn.Parameter(rho)

        self.w = None
        self.sigma = None

        self.normal = torch.distributions.Normal(0, 1)

    def sample(self):
        epsilon = self.normal.sample(self.mu.size()).to(device)
        self.sigma = torch.log1p(torch.exp(self.rho))
        self.w = self.mu + self.sigma * epsilon

        return self.w

    def log_posterior(self):
        assert self.w is not None
        assert self.sigma is not None

        log_posterior = -GaussianPosterior.LOG_SQRT_2PI - torch.log(self.sigma) - ((self.w - self.mu) ** 2) / (2 * self.sigma ** 2)

        return log_posterior.sum()


class ScaleMixturePrior(nn.Module):

    def __init__(self, pi: float, sigma1: float, sigma2: float):
        super().__init__()

        self.pi = pi
        self.normal1 = torch.distributions.Normal(0, sigma1)
        self.normal2 = torch.distributions.Normal(0, sigma2)

    def log_prior(self, w):
        likelihood1 = torch.exp(self.normal1.log_prob(w))
        likelihood2 = torch.exp(self.normal2.log_prob(w))

        p_mixture = self.pi * likelihood1 + (1 - self.pi) * likelihood2
        log_prob = torch.log(p_mixture).sum()

        return log_prob


class BayesianModule(nn.Module):
    pass


class BayesLinear(BayesianModule):

    def __init__(
        self,
            in_features: int,
            out_features: int,
            prior_pi: float,
            prior_sigma1: float,
            prior_sigma2: float
    ):
        super().__init__()

        w_mu = torch.empty(out_features, in_features).normal_(0.0, 0.01 * (np.log(in_features) + np.log(out_features)), generator=generator)
        w_rho = torch.empty(out_features, in_features).normal_(-4.5, 0.001 * (np.log(in_features) + np.log(out_features)))

        bias_mu = torch.empty(out_features).normal_(0.0, 0.01 * (np.log(in_features) + np.log(out_features)), generator=generator)
        bias_rho = torch.empty(out_features).normal_(-4.5, 0.001 * (np.log(in_features) + np.log(out_features)))

        self.w_posterior = GaussianPosterior(w_mu, w_rho)
        self.b_posterior = GaussianPosterior(bias_mu, bias_rho)

        self.w_prior = ScaleMixturePrior(prior_pi, prior_sigma1, prior_sigma2)
        self.b_prior = ScaleMixturePrior(prior_pi, prior_sigma1, prior_sigma2)

    def forward(self, x):
        w = self.w_posterior.sample()
        b = self.b_posterior.sample()

        log_prior = self.w_prior.log_prior(w) + self.b_prior.log_prior(b)
        log_posterior = self.w_posterior.log_posterior() + self.b_posterior.log_posterior()

        self.kl_divergence = log_posterior - log_prior

        return F.linear(x, w, b)


def minibatch_weight(batch_idx: int, num_batches: int) -> float:
    return 1 / num_batches

# MNIST classification

In [5]:
class MNISTModel(nn.Module):

    def __init__(self, in_features=28 * 28, out_features=10, prior_sigma_1=0.1, prior_sigma_2=0.4, prior_pi=1):
        super().__init__()

        self.layers = nn.Sequential(
            BayesLinear(
                in_features,
                1200,
                prior_pi,
                prior_sigma_1,
                prior_sigma_2
            ),
            nn.ReLU(),
            BayesLinear(
                1200,
                1200,
                prior_pi,
                prior_sigma_1,
                prior_sigma_2
            ),
            nn.ReLU(),
            BayesLinear(
                1200,
                out_features,
                prior_pi,
                prior_sigma_1,
                prior_sigma_2,
            ),
            nn.Softmax(dim=1),
        )

    def forward(self, x):
        x = self.layers(x)
        # print(x)
        return x

    @property
    def kl_divergence(self):
        kl = 0
        for module in self.modules():
            if isinstance(module, BayesianModule):
                kl += module.kl_divergence

        return kl

    def sample_elbo(self, inputs, labels, criterion, num_samples, complexity_cost_weight=1):
        loss = 0
        for _ in range(num_samples):
            outputs = self(inputs)
            contr1 = criterion(outputs, labels)
            contr2 = self.kl_divergence * complexity_cost_weight
            # print(f"contr1: {contr1}, contr2: {contr2}")
            loss += contr1 + contr2
        return loss / num_samples

In [6]:
def train_one_epoch(model, train_loader, optimizer, criterion, num_samples=1):
    model.train()

    total_loss = 0

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()

        kl_weight = minibatch_weight(batch_idx, len(train_loader))

        loss = model.sample_elbo(data, target, criterion, num_samples, kl_weight)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(train_loader)


def evaluate(model, val_loader, criterion):
    model.eval()

    total_loss = 0
    correct = 0

    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(val_loader):
            data, target = data.to(device), target.to(device)

            output = model(data)

            preds = torch.argmax(output, 1)
            correct += (preds == target).sum().item()

            loss = (
                criterion(output, target) + model.kl_divergence * minibatch_weight(batch_idx, len(val_loader))
            )
            total_loss += loss.item()

    total = len(val_loader.dataset)
    return total_loss / total, (total - correct) / total


def test(model, test_loader):
    model.eval()

    correct = 0

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)

            output = model(data)

            preds = torch.argmax(output, 1)
            correct += (preds == target).sum().item()

    total = len(test_loader.dataset)
    error = (total - correct) / total

    # print(f"Correct: {correct}/{total} ({correct / total:.2%})")
    return error


def train(model, train_loader, val_loader, test_loader, optimizer, criterion, num_epochs, num_samples, use_wandb=False):
    for epoch in range(num_epochs):
        now = time.time()

        train_loss = train_one_epoch(model, train_loader, optimizer, criterion, num_samples)
        val_loss, val_error = evaluate(model, val_loader, criterion)
        test_error = test(model, test_loader)

        elapsed = time.time() - now

        if use_wandb:
            wandb.log({
                "epoch": epoch,
                "train_loss": train_loss,
                "val_loss": val_loss,
                "val_error": val_error,
                "test_error": test_error
            })

        print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Error: {val_error:.2%}, Test Error: {test_error:.2%}, Time: {elapsed:.2f}s")

In [7]:
def train_mnist(train_loader, val_loader, test_loader, epochs, lr, num_samples, pi, minus_log_sigma1, minus_log_sigma2, use_wandb=False):
    sigma1 = np.exp(-minus_log_sigma1)
    sigma2 = np.exp(-minus_log_sigma2)

    model = MNISTModel(prior_sigma_1=sigma1, prior_sigma_2=sigma2, prior_pi=pi)
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss(reduction='sum')

    if use_wandb:
        run = wandb.init(project="asi-paper", name="mnist")

    train(model, train_loader, val_loader, test_loader, optimizer, criterion, epochs, num_samples, use_wandb=use_wandb)

    if use_wandb:
        run.finish()

    return model

In [8]:
batch_size = 128
transform = transforms.Compose([
    transforms.ToImage(),
    transforms.ToDtype(torch.float32, scale=True),
    transforms.Lambda(lambda x: x.view(28 * 28) / 126.0),
])


mnist_dataset = datasets.MNIST(
    root="./mnist",
    download=True,
    transform=transform,
    train=True
)
# transformed_data = transform(mnist_dataset.data).to(device)
# y = mnist_dataset.targets.to(device)
# mnist_dataset = torch.utils.data.TensorDataset(transformed_data, y)

test_set = datasets.MNIST(
    root="./mnist",
    download=True,
    transform=transform,
    train=False
)


train_dataset, val_dataset = torch.utils.data.random_split(mnist_dataset, [50_000, 10_000], generator=generator)

kwargs = {
    'batch_size': batch_size,
    'num_workers': 1,
    'generator': generator,
    'pin_memory': True,
}

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    shuffle=True,
    **kwargs
)
val_loader = torch.utils.data.DataLoader(
    val_dataset,
    shuffle=False,
    **kwargs
)
test_loader = torch.utils.data.DataLoader(
    test_set,
    shuffle=False,
    **kwargs
)
full_train_loader = torch.utils.data.DataLoader(
    mnist_dataset,
    shuffle=True,
    **kwargs
)

100%|██████████| 9.91M/9.91M [00:01<00:00, 6.07MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 160kB/s]
100%|██████████| 1.65M/1.65M [00:01<00:00, 1.52MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 7.02MB/s]


## Grid search with wandb
Uncomment the code below to run a grid search and log the results to wandb.

In [9]:
# from kaggle_secrets import UserSecretsClient
# user_secrets = UserSecretsClient()
# key = user_secrets.get_secret('wand-api-key-asi')
# sweep_continue = user_secrets.get_secret('asi-mnist-sweep-id')

# wandb.login(key=key)


# def train_wrapper():
#     with wandb.init(project="asi-paper") as run:
#         model = train_mnist(
#             train_loader,
#             val_loader,
#             test_loader,
#             epochs=10,
#             lr=run.config.lr,
#             num_samples=run.config.sample_nbr,
#             pi=run.config.pi,
#             minus_log_sigma1=run.config.min_log_sigma1,
#             minus_log_sigma2=run.config.min_log_sigma2,
#             use_wandb=True
#         )

#     return model


# # sweep_configuration = {
# #     "method": "grid",
# #     "metric": {"goal": "minimize", "name": "val_error"},
# #     'name': "sweep-mnist",
# #     "parameters": {
# #         "lr": {'values': [1e-3, 1e-4, 1e-5]},
# #         "sample_nbr": {'values': [1, 2, 3, 5]},
# #         "pi": {'values': [0.25, 0.5, 0.75]},
# #         "min_log_sigma1": {'values': [0, 1, 2]},
# #         "min_log_sigma2": {'values': [6, 7, 8]},
# #     },
# # }

# # sweep_id = wandb.sweep(sweep=sweep_configuration, project="asi-paper")
# # print(f"Sweep ID: {sweep_id}")
# wandb.agent(sweep_continue, function=train_wrapper)

## Manual training
Uncomment the code below to train the model with specified hyperparameters and save it.

In [10]:
def train_final(train_loader, val_loader, epochs=50, lr=1e-3, num_samples=3, pi=0.5, minus_log_sigma1=0, minus_log_sigma2=6, use_wandb=True):
    sigma1 = np.exp(-minus_log_sigma1)
    sigma2 = np.exp(-minus_log_sigma2)

    model = MNISTModel(prior_sigma_1=sigma1, prior_sigma_2=sigma2, prior_pi=pi)
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss(reduction='sum')

    if use_wandb:
        run = wandb.init(project="asi-paper", name="mnist")

    for epoch in range(epochs):
        now = time.time()

        train_loss = train_one_epoch(model, train_loader, optimizer, criterion, num_samples)
        val_loss, val_error = evaluate(model, val_loader, criterion)

        elapsed = time.time() - now

        if use_wandb:
            wandb.log({
                "epoch": epoch,
                "train_loss": train_loss,
                "val_loss": val_loss,
                "val_error": val_error,
            })

        print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Error: {val_error:.2%}, Time: {elapsed:.2f}s")

    if use_wandb:
        run.finish()

    return model

In [11]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
key = user_secrets.get_secret('wand-api-key-asi')

wandb.login(key=key)

# model = train_mnist(train_loader, val_loader, test_loader, epochs=50, lr=1e-3, num_samples=3, pi=0.5, minus_log_sigma1=0, minus_log_sigma2=6, use_wandb=True)
model = train_final(full_train_loader, test_loader)
torch.save(model.state_dict(), "mnist_model.pt")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mmatteo-ghia[0m ([33mmatteo-ghia-2001[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Tracking run with wandb version 0.19.9
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20250602_124128-owqj188y[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mmnist[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/matteo-ghia-2001/asi-paper[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/matteo-ghia-2001/asi-paper/runs/owqj188y[0m


Epoch 1/50, Train Loss: 17289.6424, Val Loss: 671.6394, Val Error: 14.75%, Time: 55.30s
Epoch 2/50, Train Loss: 13393.2142, Val Loss: 571.4220, Val Error: 9.82%, Time: 49.75s
Epoch 3/50, Train Loss: 11314.4361, Val Loss: 474.1558, Val Error: 9.59%, Time: 50.68s
Epoch 4/50, Train Loss: 9344.5042, Val Loss: 390.5471, Val Error: 9.20%, Time: 49.50s
Epoch 5/50, Train Loss: 7852.6975, Val Loss: 334.8559, Val Error: 9.20%, Time: 51.46s
Epoch 6/50, Train Loss: 6930.7381, Val Loss: 302.4638, Val Error: 8.96%, Time: 53.31s
Epoch 7/50, Train Loss: 6401.3755, Val Loss: 283.7884, Val Error: 9.00%, Time: 51.52s
Epoch 8/50, Train Loss: 6087.2564, Val Loss: 272.2376, Val Error: 8.97%, Time: 51.99s
Epoch 9/50, Train Loss: 5883.5541, Val Loss: 264.3164, Val Error: 9.21%, Time: 51.09s
Epoch 10/50, Train Loss: 5735.9754, Val Loss: 258.2136, Val Error: 9.06%, Time: 50.98s
Epoch 11/50, Train Loss: 5618.4322, Val Loss: 253.1820, Val Error: 9.28%, Time: 50.42s
Epoch 12/50, Train Loss: 5516.5795, Val Loss: 24

[34m[1mwandb[0m: uploading output.log; uploading config.yaml
[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:      epoch ▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
[34m[1mwandb[0m: train_loss █▆▅▄▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m:  val_error █▂▂▂▂▁▁▂▁▂▁▂▁▁▂▂▂▂▂▃▄▃▄▄▄▅▅▅▅▅▅▄▅▅▅▅▅▅▅▄
[34m[1mwandb[0m:   val_loss █▇▅▄▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:      epoch 49
[34m[1mwandb[0m: train_loss 3984.27897
[34m[1mwandb[0m:  val_error 0.1159
[34m[1mwandb[0m:   val_loss 178.83055
[34m[1mwandb[0m: 
[34m[1mwandb[0m: 🚀 View run [33mmnist[0m at: [34m[4mhttps://wandb.ai/matteo-ghia-2001/asi-paper/runs/owqj188y[0m
[34m[1mwandb[0m: ⭐️ View project at: [34m[4mhttps://wandb.ai/matteo-ghia-2001/asi-paper[0m
[34m[1mwandb[0m: Synced 5 W&B file(s), 0 me

In [12]:
# model = MNISTModel(prior_sigma_1=np.exp(-1), prior_sigma_2=np.exp(-7), prior_pi=0.75)
# model.to(device)
# model.load_state_dict(torch.load("mnist_model.pt"))

# Regression curves

In [13]:
def generate_samples(num_samples):
    eps = np.random.normal(0, 0.02, num_samples)
    x = np.linspace(0, 0.5, num_samples)
    y = x + 0.3 * np.sin(2 * np.pi * (x + eps)) + 0.3 * np.sin(4 * np.pi * (x + eps))
    return x, y


def save_samples(x, y, filename):
    df = pd.DataFrame({'x': x, 'y': y})
    df.to_csv(filename, index=False)


def load_samples(filename):
    df = pd.read_csv(filename)
    x = df['x'].values
    y = df['y'].values
    return x, y


def plot_samples(x, y):
    plt.figure(figsize=(10, 5))
    plt.plot(x, y, 'kx', label='Generated Samples')
    plt.title('Generated Samples')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.legend()
    plt.show()


# x, y = generate_samples(1000)
# save_samples(x, y, 'regression_samples.csv')


# x, y = load_samples('regression_samples.csv')
# plot_samples(x, y)

# X_tensor = torch.tensor(x, dtype=torch.float32).view(-1, 1).to(device)
# y_tensor = torch.tensor(y, dtype=torch.float32).view(-1, 1).to(device)

# train_dataset = torch.utils.data.TensorDataset(X_tensor[:800], y_tensor[:800])
# val_dataset = torch.utils.data.TensorDataset(X_tensor[800:], y_tensor[800:])


# kwargs = {
#     'batch_size': batch_size,
#     'generator': generator,
# }

# train_loader = torch.utils.data.DataLoader(
#     train_dataset,
#     shuffle=True,
#     **kwargs
# )
# val_loader = torch.utils.data.DataLoader(
#     val_dataset,
#     shuffle=False,
#     **kwargs
# )

In [14]:
class RegressionModel(nn.Module):
    def __init__(self, in_features=1, out_features=1, prior_sigma_1=0.1, prior_sigma_2=0.4, prior_pi=1):
        super().__init__()

        self.layers = nn.Sequential(
            BayesLinear(
                in_features,
                200,
                prior_pi,
                prior_sigma_1,
                prior_sigma_2
            ),
            nn.ReLU(),
            BayesLinear(
                200,
                200,
                prior_pi,
                prior_sigma_1,
                prior_sigma_2
            ),
            nn.ReLU(),
            BayesLinear(
                200,
                out_features,
                prior_pi,
                prior_sigma_1,
                prior_sigma_2,
            ),
        )

    def forward(self, x):
        x = self.layers(x)
        return x

In [15]:
def evaluate_regression(regressor, X, y, samples=100, std_multiplier=2):
    preds = [regressor(X) for _ in range(samples)]
    preds = torch.stack(preds)
    means = preds.mean(axis=0)
    stds = preds.std(axis=0)
    ci_upper = means + (std_multiplier * stds)
    ci_lower = means - (std_multiplier * stds)
    ci_acc = (ci_lower <= y) * (ci_upper >= y)
    ci_acc = ci_acc.float().mean()
    return ci_acc, (ci_upper >= y).float().mean(), (ci_lower <= y).float().mean()


def train_regression(model, train_loader, val_loader, optimizer, criterion, num_epochs, num_samples, use_wandb=False):
    for epoch in range(num_epochs):
        now = time.time()

        train_loss = train_one_epoch(model, train_loader, optimizer, criterion, num_samples)
        ci_acc, ci_upper, ci_lower = evaluate_regression(model, val_loader.dataset.tensors[0], val_loader.dataset.tensors[1])

        elapsed = time.time() - now

        if use_wandb:
            wandb.log({
                "epoch": epoch,
                "train_loss": train_loss,
                "ci_acc": ci_acc,
                "ci_upper": ci_upper,
                "ci_lower": ci_lower,
            })

        print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, CI acc: {ci_acc}, CI upper acc: {ci_upper}, CI lower acc: {ci_lower} Time: {elapsed:.2f}s")

In [16]:
def train_regression_model(train_loader, val_loader, epochs, lr, num_samples, pi, minus_log_sigma1, minus_log_sigma2, use_wandb=False):
    sigma1 = np.exp(-minus_log_sigma1)
    sigma2 = np.exp(-minus_log_sigma2)

    model = RegressionModel(1, 1, prior_sigma_1=sigma1, prior_sigma_2=sigma2, prior_pi=pi)
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    # if use_wandb:
    #     run = wandb.init(project="asi-paper", name="regression")

    train_regression(model, train_loader, val_loader, optimizer, criterion, epochs, num_samples, use_wandb=use_wandb)

    # if use_wandb:
    #     run.finish()

    return model

In [17]:
# from kaggle_secrets import UserSecretsClient
# user_secrets = UserSecretsClient()
# key = user_secrets.get_secret('wand-api-key-asi')

# wandb.login(key=key)


# def train_wrapper():
#     with wandb.init(project="asi-paper") as run:
#         model = train_regression_model(
#             train_loader,
#             val_loader,
#             epochs=15,
#             lr=run.config.lr,
#             num_samples=run.config.sample_nbr,
#             pi=run.config.pi,
#             minus_log_sigma1=run.config.min_log_sigma1,
#             minus_log_sigma2=run.config.min_log_sigma2,
#             use_wandb=True
#         )

#     return model


# sweep_configuration = {
#     "method": "bayes",
#     "metric": {"goal": "maximize", "name": "ci_acc"},
#     'name': "sweep-regression",
#     "parameters": {
#         "lr": {'min': 1e-5, 'max': 1e-2},
#         "sample_nbr": {'min': 1, 'max': 10},
#         "pi": {'min': 0.25, 'max': 0.75},
#         "min_log_sigma1": {'min': 0, 'max': 2},
#         "min_log_sigma2": {'min': 6, 'max': 8},
#     },
# }

# sweep_id = wandb.sweep(sweep=sweep_configuration, project="asi-paper")
# wandb.agent(sweep_id, function=train_wrapper)

In [18]:
# model = train_regression_model(train_loader, val_loader, epochs=10, lr=1e-3, num_samples=1, pi=0.5, minus_log_sigma1=0.5, minus_log_sigma2=0)

In [19]:
# model.eval()
# predicted = model(X_tensor).cpu().detach().numpy()

# plt.figure(figsize=(10, 5))
# plt.plot(x, y, 'kx', label='Generated Samples')
# plt.plot(x, predicted, 'r-', label='Predicted Mean')
# # plt.fill_between(x, predicted - 2 * np.std(predicted), predicted + 2 * np.std(predicted), color='r', alpha=0.2, label='Uncertainty')
# plt.title('Regression with Uncertainty')
# plt.xlabel('x')
# plt.ylabel('y')
# plt.legend()
# plt.show()