In [2]:
import matplotlib.pyplot as plt
import numpy as np

In [3]:
import torch
import torchvision
import torchvision.transforms as transforms

In [4]:
import torch.nn as nn
import torch.nn.functional as F

In [5]:
import torch.optim as optim

In [6]:
from htru1 import HTRU1

Completed Execution


In [7]:
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(), # randomly flip 
    transforms.RandomRotation(10), # randomly rotate
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

In [199]:
trainset = HTRU1(root='./data', train=True, download=True, transform=transform)


# Print the unique class labels

# Identify the majority and minority classes in the training set
# You should replace these labels with the actual class labels used in your dataset
majority_class = 1
minority_class = 0

# Find indices of majority and minority class samples in the training set
train_majority_indices = [i for i, label in enumerate(trainset.targets) if label == majority_class]
train_minority_indices = [i for i, label in enumerate(trainset.targets) if label == minority_class]

print(len(train_majority_indices))
print(len(train_minority_indices))

# Randomly under-sample majority class in the training set to balance the classes
num_samples_to_keep_train = (min(len(train_majority_indices), len(train_minority_indices)))
print(num_samples_to_keep_train)
selected_train_majority_indices = np.random.choice(train_majority_indices, size=int(1.0765*num_samples_to_keep_train), replace=False)

# Combine indices of both classes to create a balanced training set
balanced_train_indices = np.concatenate([selected_train_majority_indices, train_minority_indices])

# Use the balanced indices to create the balanced training set
balanced_trainset = torch.utils.data.Subset(trainset, balanced_train_indices)

# Create a DataLoader for the balanced training set
trainloader = torch.utils.data.DataLoader(balanced_trainset, batch_size=4, shuffle=True, num_workers=2)

#trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)

Files already downloaded and verified
49005
995
995


In [200]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [201]:
net = Net()

In [202]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [219]:
nepoch = 4  # number of epochs

for epoch in range(nepoch):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        #labels = labels.float().unsqueeze(1)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 10 == 9:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 10))
            running_loss = 0.0

print('Finished Training')

[1,    10] loss: 0.062
[1,    20] loss: 0.026
[1,    30] loss: 0.150
[1,    40] loss: 0.034
[1,    50] loss: 0.102
[1,    60] loss: 0.121
[1,    70] loss: 0.055
[1,    80] loss: 0.371
[1,    90] loss: 0.073
[1,   100] loss: 0.042
[1,   110] loss: 0.024
[1,   120] loss: 0.064
[1,   130] loss: 0.105
[1,   140] loss: 0.033
[1,   150] loss: 0.096
[1,   160] loss: 0.115
[1,   170] loss: 0.043
[1,   180] loss: 0.259
[1,   190] loss: 0.052
[1,   200] loss: 0.036
[1,   210] loss: 0.078
[1,   220] loss: 0.069
[1,   230] loss: 0.515
[1,   240] loss: 0.186
[1,   250] loss: 0.124
[1,   260] loss: 0.093
[1,   270] loss: 0.091
[1,   280] loss: 0.086
[1,   290] loss: 0.076
[1,   300] loss: 0.080
[1,   310] loss: 0.116
[1,   320] loss: 0.178
[1,   330] loss: 0.146
[1,   340] loss: 0.088
[1,   350] loss: 0.051
[1,   360] loss: 0.025
[1,   370] loss: 0.241
[1,   380] loss: 0.117
[1,   390] loss: 0.087
[1,   400] loss: 0.073
[1,   410] loss: 0.021
[1,   420] loss: 0.026
[1,   430] loss: 0.294
[1,   440] 

In [220]:
testset = HTRU1(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)


Files already downloaded and verified


In [221]:
dataiter = iter(testloader)
images, labels = next(dataiter)

In [222]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

In [223]:
classes = ('pulsar', 'nonpulsar')
for i in range(2):
    print('Accuracy of %5s : %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))

Accuracy of pulsar : 93 %
Accuracy of nonpulsar : 97 %


In [224]:
print((class_correct[0]+class_correct[1])/(class_total[0]+class_total[1]))

0.9717
