In [1]:
import os
import pandas as pd 
import numpy as np 

# visualisation
import matplotlib.pylab as plt
import matplotlib.font_manager as fm

# our scripts 
import src.processing as sp 
import settings as st
import utils.helpers as hp
import utils.imaging as ui

### Correct for the locations of the images 

In [2]:
%%capture
!git pull

In [3]:
# dr5_desc = sp.correct_location('gz_decals_volunteers_5', save = True, filename='dr5_votes')

### The data with the votes

In [4]:
dr5 = hp.read_parquet(st.DATA_DIR, 'descriptions/dr5_votes')

### Generate the labels 

In [5]:
labels = sp.generate_labels(dr5, nan_value = 0, save=False)

In [6]:
labels.head()

Unnamed: 0,iauname,png_loc,smooth-or-featured_smooth_fraction,smooth-or-featured_featured-or-disk_fraction,smooth-or-featured_artifact_fraction,disk-edge-on_yes_fraction,disk-edge-on_no_fraction,has-spiral-arms_yes_fraction,has-spiral-arms_no_fraction,bar_strong_fraction,...,spiral-arm-count_1_fraction,spiral-arm-count_2_fraction,spiral-arm-count_3_fraction,spiral-arm-count_4_fraction,spiral-arm-count_more-than-4_fraction,spiral-arm-count_cant-tell_fraction,merging_none_fraction,merging_minor-disturbance_fraction,merging_major-disturbance_fraction,merging_merger_fraction
0,J112953.88-000427.4,J112/J112953.88-000427.4.png,1,0,0,0,1,0,1,0,...,0,0,0,0,0,1,1,0,0,0
1,J104325.29+190335.0,J104/J104325.29+190335.0.png,1,0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,J104629.54+115415.1,J104/J104629.54+115415.1.png,0,1,0,0,1,1,0,0,...,0,1,0,0,0,0,1,0,0,0
3,J082950.68+125621.8,J082/J082950.68+125621.8.png,0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,J122056.00-015022.0,J122/J122056.00-015022.0.png,0,1,0,0,1,1,0,0,...,0,1,0,0,0,0,1,0,0,0


# Calculate the weights per class

In [7]:
labels_num = labels.iloc[:,2:]
weights = labels_num.sum(axis=0)
inv_weights = 1/weights
inv_weights_norm = inv_weights/inv_weights.sum()

# Split into train, test and validate

In [8]:
train_val_test = sp.split_data(labels, 0.2, 0.2, save=False)

In [9]:
train_val_test['train'].shape

(151970, 36)

In [10]:
train_val_test['validate'].shape

(50658, 36)

In [11]:
train_val_test['test'].shape

(50658, 36)

In [12]:
labels.shape

(253286, 36)

### DataLoader

In [13]:
from src.dataset import DECaLSDataset
from torch.utils.data import Dataset, DataLoader

In [14]:
traindataset = DECaLSDataset(mode = 'train', augment = False)

In [15]:
train_loader = DataLoader(dataset=traindataset, batch_size=4, shuffle=True)

In [16]:
dataiter = iter(train_loader)

In [17]:
data = dataiter.next()

# Test Network

In [18]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

# our scripts and functions
from src.network import MultiLabelNet
from src.dataset import DECaLSDataset
import settings as st

In [19]:
out_path = './output/'
os.makedirs(out_path, exist_ok=True)

# Set device to CUDA if a CUDA device is available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# create the dataloader
train_dataset = DECaLSDataset(mode='train', augment=False)
val_dataset = DECaLSDataset(mode='validate', augment=False)

train_loader = DataLoader(dataset=train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=4, shuffle=False)

In [20]:
# define the model
model = MultiLabelNet(backbone="resnet18")
model.to(device)

MultiLabelNet(
  (backbone): ResNet(
    (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, tr

In [22]:
# to assign weights to this loss function
weights = torch.tensor(st.CLASS_WEIGHTS)

# set the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1E-4, weight_decay=1E-5)

In [None]:
model.parameters()

In [28]:
test = torch.tensor([[  1.0567,  -1.5172, -10.6859,  -3.7097,   1.5681,  -2.7092,   0.3964,
            -5.4593,  -2.6822,   0.3939, -15.2788,  -2.7801,   0.2324,  -2.7833,
            -9.1558,  -1.5297,   1.5854,  -6.4870,  -8.0428,  -6.4800,  -1.1258,
            -3.6832,  -3.5413,  -3.9208, -10.3810,  -1.6498, -13.5035, -14.2177,
           -15.4877,  -3.3631,   1.5496,  -4.7406,  -7.3878,  -8.1576]])

tensor([[1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 1, 0, 0, 0]], dtype=torch.int32)

tensor([[1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 1, 0, 0, 0]], dtype=torch.int32)