In [1]:
import random
import pickle
import numpy as np

from torch.optim.lr_scheduler import *
import torch.nn.functional as F
import torch.nn as nn
import torch

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import metrics

import matplotlib.pyplot as plt

In [2]:
from tqdm import tqdm as tqdm_base

def tqdm(*args, **kwargs):
    if hasattr(tqdm_base, '_instances'):
        for instance in list(tqdm_base._instances):
            tqdm_base._decr_instances(instance)
    return tqdm_base(*args, **kwargs)

### Set the device: cpu or gpu

In [3]:
is_cuda = torch.cuda.is_available()
device = torch.device(torch.cuda.current_device()) if is_cuda else torch.device("cpu")

if is_cuda:
    torch.backends.cudnn.benchmark = True

print(f'Preparing to use device {device}')

Preparing to use device cuda:0


### Load the data

In [4]:
with open('/glade/u/home/gwallach/goes16ci/train_data_scaled.pkl','rb') as f:
    x = pickle.load(f)

In [5]:
print("Saved shape:", x.shape)
x = x.transpose(0,3,1,2)
print("Reshaped to:", x.shape)

Saved shape: (151246, 32, 32, 4)
Reshaped to: (151246, 4, 32, 32)


In [6]:
with open('/glade/u/home/gwallach/goes16ci/train_counts.pkl','rb') as f:
    y = pickle.load(f)

In [7]:
print("Saved shape:", y.shape)
y = y.reshape(y.shape[0], 1)
print("Reshaped to:", y.shape)

Saved shape: (151246,)
Reshaped to: (151246, 1)


### Subsample the data 

In [8]:
# gt0 = list(np.where(y >= 1)[0])
# lt0 = list(np.where(y < 1)[0])
# take_a_sample = random.sample(lt0, 10)

# sample = gt0 + take_a_sample

# x = x[sample]
# y = y[sample]

### Split the data into train / test partitions

In [9]:
X_train, X_test, Y_train, Y_test = train_test_split(
    x, y, test_size=0.2, random_state = 5000
)

In [10]:
# the image data is already scaled. only scale the lightning counts: subtract the mean and divide by sigma to get z-scores
y_scaler = StandardScaler()

#y_train = y_scaler.fit_transform(Y_train)
#y_test = y_scaler.transform(Y_test)

# y_train = np.log1p(Y_train)
# y_test = np.log1p(Y_test)

y_train = np.where(Y_train[:] > 0.0, 1, 0)
y_test = np.where(Y_test[:] > 0.0, 1, 0)

In [11]:
#plt.hist(np.log1p(np.log1p(Y_train)))

### Load a model 

In [23]:
# def conv3x3(in_planes, out_planes, stride=1, dilation=1):
#     "3x3 convolution with padding"
#     # here with dilation
#     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1+(dilation-1)*(3-1), dilation=dilation, bias=False)


# class BasicBlock(nn.Module):
#     expansion = 1

#     def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None):
#         super(BasicBlock, self).__init__()
#         self.conv1 = conv3x3(inplanes, planes, stride)
#         self.bn1 = nn.BatchNorm2d(planes)
#         self.relu = nn.ReLU(inplace=True)
#         self.conv2 = conv3x3(planes, planes)
#         self.bn2 = nn.BatchNorm2d(planes)
#         self.downsample = downsample
#         self.stride = stride

#     def forward(self, x):
#         residual = x

#         out = self.conv1(x)
#         out = self.bn1(out)
#         out = self.relu(out)

#         out = self.conv2(out)
#         out = self.bn2(out)

#         if self.downsample is not None:
#             residual = self.downsample(x)

#         out += residual
#         out = self.relu(out)

#         return out

    
# class Net(nn.Module):
#     def __init__(self, block, layers, fcl_layers = [1], dr = 0.0, output_size = 1):
#         self.inplanes = 64
        
#         super(Net, self).__init__()
        
#         self.conv1 = nn.Conv2d(4, 64, kernel_size=7, stride=2, padding=3,
#                                bias=False)
#         self.bn1 = nn.BatchNorm2d(64)
#         self.relu = nn.ReLU(inplace=True)
#         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
#         # note the increasing dilation
#         self.layer1 = self._make_layer(block, 64, layers[0])
#         self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilation=1)
#         self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2)
#         self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4)

#         # these layers will not be used
#         self.avgpool = nn.AvgPool2d(4)
#         #self.fc = nn.Linear(512 * block.expansion, num_classes)

#         for m in self.modules():
#             if isinstance(m, nn.Conv2d):
#                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
#                 m.weight.data.normal_(0, math.sqrt(2. / n))
#             elif isinstance(m, nn.BatchNorm2d):
#                 m.weight.data.fill_(1)
#                 m.bias.data.zero_()
                
#         ### Set up the fully-connected network to predict the lightning count
#         conv_flatten = 512 * block.expansion
#         if len(fcl_layers) > 0:
#             fcn = [
#                 nn.Linear(conv_flatten, fcl_layers[0]),
#                 nn.BatchNorm1d(fcl_layers[0]),
#                 nn.LeakyReLU(),
#                 nn.Dropout(dr)
#             ]
#             if len(fcl_layers) == 1:
#                 fcn.append(nn.Linear(fcl_layers[0], output_size))
#             else:
#                 for i in range(len(fcl_layers)-1):
#                     fcn += [
#                         nn.Linear(fcl_layers[i], fcl_layers[i+1]),
#                         nn.BatchNorm1d(fcl_layers[i+1]),
#                         nn.LeakyReLU(),
#                         nn.Dropout(dr)
#                     ]
#                 fcn.append(nn.Linear(fcl_layers[i+1], output_size))
#         else:
#             fcn = [
#                 nn.Linear(conv_flatten, output_size)
#             ]
#         if output_size > 1:
#             fcn.append(torch.nn.LogSoftmax())
#         self.fcn = nn.Sequential(*fcn)

#     def _make_layer(self, block, planes, blocks, stride=1, dilation=1):
#         downsample = None
#         if stride != 1 or self.inplanes != planes * block.expansion:
#             downsample = nn.Sequential(
#                 nn.Conv2d(self.inplanes, planes * block.expansion,
#                           kernel_size=1, stride=stride, bias=False),
#                 nn.BatchNorm2d(planes * block.expansion),
#             )

#         layers = []
#         layers.append(block(self.inplanes, planes, stride, 1, downsample))
#         self.inplanes = planes * block.expansion
#         for i in range(1, blocks):
#             # here with dilation
#             layers.append(block(self.inplanes, planes, dilation=dilation))

#         return nn.Sequential(*layers)

#     def forward(self, x):
#         x = self.conv1(x)
#         x = self.bn1(x)
#         x = self.relu(x)
#         x = self.maxpool(x)

#         x = self.layer1(x)
#         x = self.layer2(x)
#         x = self.layer3(x)
#         x = self.layer4(x)

#         # deactivated layers
#         x = self.avgpool(x)
#         #print(x.shape)
#         x = x.view(x.size(0), -1)
#         x = self.fcn(x)

#         return x
    
    
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, fcl_layers = [], dr = 0.0, output_size = 1):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(4, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        
        #self.linear = nn.Linear(512*block.expansion, output_size)
        self.fcn = self.make_fcn(512*block.expansion, output_size, fcl_layers, dr)
        
    def make_fcn(self, input_size, output_size, fcl_layers, dr):
        if len(fcl_layers) > 0:
            fcn = [
                nn.Dropout(dr),
                nn.Linear(input_size, fcl_layers[0]),
                nn.BatchNorm1d(fcl_layers[0]),
                torch.nn.LeakyReLU()
            ]
            if len(fcl_layers) == 1:
                fcn.append(nn.Linear(fcl_layers[0], output_size))
            else:
                for i in range(len(fcl_layers)-1):
                    fcn += [
                        nn.Linear(fcl_layers[i], fcl_layers[i+1]),
                        nn.BatchNorm1d(fcl_layers[i+1]),
                        torch.nn.LeakyReLU(),
                        nn.Dropout(dr)
                    ]
                fcn.append(nn.Linear(fcl_layers[i+1], output_size))
        else:
            fcn = [
                nn.Dropout(dr),
                nn.Linear(input_size, output_size)
            ]
        if output_size > 1:
            fcn.append(torch.nn.LogSoftmax(dim=1))
        return nn.Sequential(*fcn)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        x = self.fcn(out)
        return x

def ResNet18(fcl_layers = [], dr = 0.0, output_size = 1):
    return ResNet(BasicBlock, [2,2,2,2], fcl_layers = fcl_layers, dr = dr, output_size = output_size)

def ResNet34(fcl_layers = [], dr = 0.0, output_size = 1):
    return ResNet(BasicBlock, [3,4,6,3], fcl_layers = fcl_layers, dr = dr, output_size = output_size)

def ResNet50(fcl_layers = [], dr = 0.0, output_size = 1):
    return ResNet(Bottleneck, [3,4,6,3], fcl_layers = fcl_layers, dr = dr, output_size = output_size)

def ResNet101(fcl_layers = [], dr = 0.0, output_size = 1):
    return ResNet(Bottleneck, [3,4,23,3], fcl_layers = fcl_layers, dr = dr, output_size = output_size)

def ResNet152(fcl_layers = [], dr = 0.0, output_size = 1):
    return ResNet(Bottleneck, [3,8,36,3], fcl_layers = fcl_layers, dr = dr, output_size = output_size)

In [24]:
output_size = 2
fcl_layers = [1000]
dropout = 0.5

#model = Net(BasicBlock, [2,2,2,2], fcl_layers, dr = dropout, output_size = output_size).to(device)
model = ResNet18(fcl_layers, dr = dropout, output_size = output_size).to(device)

In [25]:
model

ResNet(
  (conv1): Conv2d(4, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=

### Test model to make sure the architecture is consistent

In [26]:
X = torch.from_numpy(X_train[:2]).float().to(device)
print(X.shape)

torch.Size([2, 4, 32, 32])


In [27]:
model(X).exp() # exp to turn the logits into probabilities, since we used LogSoftmax

tensor([[0.4283, 0.5717],
        [0.5403, 0.4597]], device='cuda:0', grad_fn=<ExpBackward>)

### Load an optimizer

In [28]:
learning_rate = 0.0001

In [29]:
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

### Load a loss function

In [30]:
train_criterion = torch.nn.CrossEntropyLoss() # this is mean-squared error
test_criterion = torch.nn.CrossEntropyLoss()  # this is mean absolute error

# train_criterion = torch.nn.MSELoss() # this is mean-squared error
# test_criterion = torch.nn.L1Loss()  # this is mean absolute error

### Train the model 

In [31]:
# this will help lower the learning rate when the model stops improving
lr_scheduler = ReduceLROnPlateau(
    optimizer, 
    patience = 1, 
    verbose = False
)

In [32]:
epochs = 1000 
train_batch_size = 32
valid_batch_size = 128
batches_per_epoch = 1000

patience = 10 # this is how many epochs we will keep training since we last saw a "best" model -- "early stopping"

In [33]:
epoch_test_losses = []

for epoch in range(epochs):

    ### Train the model 
    model.train()

    # Shuffle the data first
    batch_loss = []
    accuracy = []
    indices = list(range(X_train.shape[0]))
    random.shuffle(indices)
    
    # Now split into batches
    train_batches_per_epoch = int(X_train.shape[0] / train_batch_size) 
    train_batches_per_epoch = min(batches_per_epoch, train_batches_per_epoch)
    X = np.array_split(X_train[indices], train_batches_per_epoch)
    Y = np.array_split(y_train[indices], train_batches_per_epoch)
    
    # custom tqdm so we can see the progress
    training_data = zip(X, Y)
    batch_group_generator = tqdm(
        enumerate(training_data), 
        total=batches_per_epoch, 
        leave=True
    )

    for k, (x, y) in batch_group_generator:

        # Converting to torch tensors and moving to GPU
        inputs = torch.from_numpy(x).float().to(device)
        lightning_counts = torch.from_numpy(y).long().to(device)

        # Clear gradient
        optimizer.zero_grad()

        # get output from the model, given the inputs
        pred_lightning_counts = model(inputs)

        # get loss for the predicted output
        loss = train_criterion(pred_lightning_counts, lightning_counts.squeeze(-1))
        
        # compute the accuracy
        acc = (torch.argmax(pred_lightning_counts, 1) == lightning_counts.squeeze(-1)).float()
        accuracy += list(acc.cpu().numpy())

        # get gradients w.r.t to parameters
        loss.backward()
        batch_loss.append(loss.item())

        # update parameters
        optimizer.step()

        # update tqdm
        to_print = "Epoch {} train_loss: {:.4f}".format(epoch, np.mean(batch_loss))
        to_print += " train_acc: {:.4f}".format(np.mean(accuracy))
        to_print += " lr: {:.12f}".format(optimizer.param_groups[0]['lr'])
        batch_group_generator.set_description(to_print)
        batch_group_generator.update()
                                  
        if k >= train_batches_per_epoch and k > 0:
            break
        
    torch.cuda.empty_cache()

    ### Test the model 
    model.eval()
    with torch.no_grad():

        batch_loss = []
        accuracy = []
        # split test data into batches
        batches_per_epoch = int(X_test.shape[0] / valid_batch_size)
        X = np.array_split(X_test, batches_per_epoch)
        Y = np.array_split(y_test, batches_per_epoch)
        
        # custom tqdm so we can see the progress
        test_data = zip(X, Y)
        batch_group_generator = tqdm(
            test_data, 
            total=batches_per_epoch, 
            leave=True
        )

        for (x, y) in batch_group_generator:
            # Converting to torch tensors and moving to GPU
            inputs = torch.from_numpy(x).float().to(device)
            lightning_counts = torch.from_numpy(y).long().to(device)
            # get output from the model, given the inputs
            pred_lightning_counts = model(inputs)
            # get loss for the predicted output
            loss = test_criterion(pred_lightning_counts, lightning_counts.squeeze(-1))
            batch_loss.append(loss.item())
            # compute the accuracy
            acc = (torch.argmax(pred_lightning_counts, 1) == lightning_counts.squeeze(-1)).float()
            accuracy += list(acc.cpu().numpy())
            # update tqdm
            to_print = "Epoch {} test_loss: {:.4f}".format(epoch, np.mean(batch_loss))
            to_print += " test_acc: {:.4f}".format(np.mean(accuracy))
            batch_group_generator.set_description(to_print)
            batch_group_generator.update()

    test_loss = np.mean(batch_loss)
    epoch_test_losses.append(test_loss)
    
    # Lower the learning rate if we are not improving
    lr_scheduler.step(test_loss)

    # Save the model if its the best so far.
    if test_loss == min(epoch_test_losses):
        state_dict = {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': test_loss
        }
        torch.save(state_dict, "best.pt")
        
    # Stop training if we have not improved after X epochs
    best_epoch = [i for i,j in enumerate(epoch_test_losses) if j == min(epoch_test_losses)][-1]
    offset = epoch - best_epoch
    if offset >= patience:
        break
        
    #-l gpu_type = gpu100, v100

Epoch 0 train_loss: 0.1845 train_acc: 0.9271 lr: 0.000100000000: 100%|██████████| 1000/1000 [04:13<00:00,  3.94it/s]
Epoch 0 test_loss: 0.1879 test_acc: 0.9285: 100%|██████████| 236/236 [00:06<00:00, 35.64it/s]
Epoch 1 train_loss: 0.1409 train_acc: 0.9459 lr: 0.000100000000: 100%|██████████| 236/236 [04:03<00:00,  1.03s/it]
Epoch 1 test_loss: 0.1418 test_acc: 0.9466: 100%|██████████| 236/236 [00:06<00:00, 35.82it/s]
Epoch 2 train_loss: 0.1315 train_acc: 0.9505 lr: 0.000100000000: 100%|██████████| 236/236 [03:59<00:00,  1.01s/it]
Epoch 2 test_loss: 0.1513 test_acc: 0.9388: 100%|██████████| 236/236 [00:06<00:00, 34.33it/s]
Epoch 3 train_loss: 0.1265 train_acc: 0.9523 lr: 0.000100000000: 100%|██████████| 236/236 [03:59<00:00,  1.02s/it]
Epoch 3 test_loss: 0.1361 test_acc: 0.9479: 100%|██████████| 236/236 [00:06<00:00, 36.26it/s]
Epoch 4 train_loss: 0.1188 train_acc: 0.9558 lr: 0.000100000000: 100%|██████████| 236/236 [03:55<00:00,  1.00it/s]
Epoch 4 test_loss: 0.1552 test_acc: 0.9442: 100

### Compute some test metrics

##### Load the best model

In [37]:
checkpoint = torch.load(
    "best.pt",
    map_location=lambda storage, loc: storage
)
best_epoch = checkpoint["epoch"]
#model = Net(filter_sizes, fcl_layers).to(device)
model = ResNet18(fcl_layers, dr = dropout, output_size = output_size).to(device)
model.load_state_dict(checkpoint["model_state_dict"])

<All keys matched successfully>

##### Predict on the test dataset with the best model

In [38]:
model.eval()
with torch.no_grad():
    y_true = []
    y_pred = []
    batch_loss = []
    accuracy = []
    # split test data into batches
    batches_per_epoch = int(X_test.shape[0] / valid_batch_size)
    X = np.array_split(X_test, batches_per_epoch)
    Y = np.array_split(y_test, batches_per_epoch)
    
    for (x, y) in zip(X, Y):
        # Converting to torch tensors and moving to GPU
        inputs = torch.from_numpy(x).float().to(device)
        lightning_counts = torch.from_numpy(y).long().to(device)
        # get output from the model, given the inputs
        pred_lightning_counts = model(inputs)
        # get loss for the predicted output
        loss = test_criterion(pred_lightning_counts, lightning_counts.squeeze(-1))
        batch_loss.append(loss.item())
        # compute the accuracy
        acc = (torch.argmax(pred_lightning_counts, 1) == lightning_counts.squeeze(-1)).float()
        accuracy += list(acc.cpu().numpy())
        
        y_true.append(lightning_counts.squeeze(-1))
        y_pred.append(torch.argmax(pred_lightning_counts, 1))

y_true = torch.cat(y_true, axis = 0)
y_pred = torch.cat(y_pred, axis = 0)

In [39]:
print(np.mean(batch_loss), np.mean(accuracy))

0.1361466792996152 0.9479339


In [41]:
#y_norm = np.expm1(y_true.cpu().numpy()) #y_scaler.inverse_transform(y_true.cpu().numpy())
#y_pred_norm = np.expm1(y_pred.cpu().numpy()) #y_scaler.inverse_transform(y_pred.cpu().numpy())

y_true = y_true.cpu().numpy()
y_pred = y_pred.cpu().numpy()

In [59]:
print(metrics.confusion_matrix(y_true, y_pred, normalize = 'true'))

[[0.95372506 0.04627494]
 [0.0632764  0.9367236 ]]


In [56]:
metrics.f1_score(y_true, y_pred)

0.9245653527467791

In [48]:
# def plotter(p, t, X):
#     plt.scatter(p, t, alpha = 0.005)
#     plt.plot(range(X), range(X), c = 'k')

#     r2 = metrics.r2_score(t, p)
#     acc = []
#     for x,y in zip(t, p):
#         if x > 0:
#             acc.append(100.0*abs(x-y)/abs(x))
#         else:
#             acc.append(100.0*abs(x-y) / 1.0)
    
#     plt.xlabel("Predicted", fontsize = 14)
#     plt.ylabel("True", fontsize = 14)
#     plt.axis('square')
    
#     acc = np.mean(acc)
#     plt.tight_layout()
    
#     plt.xlim([-1,X])
#     plt.ylim([-1,X])

#     print(r2, acc)

In [49]:
#plotter(y_pred, y_true, X = 2)

In [50]:
# plt.hist(y_true, bins = range(500), density = True)
# plt.xlim([-1, 20])
# plt.ylim([0, 1.0])

In [51]:
# plt.hist(y_pred, bins = range(500), density = True)
# plt.xlim([-1, 20])
# plt.ylim([0, 1.0])