# First attempt at PyTorch Framework
I am just going to create the same structure MLPClassifier that we found worked best last semester. Basic documentation for PyTorch can be found [here](https://pytorch.org/tutorials/beginner/basics/intro.html).

In [1]:
import os
here = os.getcwd()

## See if GPU is available
If it is, we want to move all of our tensors to GPU

In [2]:
import torch
is_cuda = False
if torch.cuda.is_available():
    is_cuda = True
device = torch.device("cuda:0" if is_cuda else "cpu")
device

device(type='cpu')

## Splitting into test and train

In [4]:
import pandas as pd

df = pd.read_csv(os.path.join(here, 'data/HTRU_2.csv'))
print(df.shape[0])
train = df.iloc[:14317]
test = df.iloc[14317:]

train.to_csv(os.path.join(here, 'data/train.csv'), index=False)
test.to_csv(os.path.join(here, 'data/test.csv'), index=False)

17897


## Defining custom dataset
[Helpful Video](https://www.youtube.com/watch?v=PXOzkkB5eH0)

In [9]:
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math
class PulsarDataset(Dataset):
    def __init__(self, path, is_cuda):
        # loading in full set
        data_set = np.loadtxt(os.path.join('data', path), delimiter=',', dtype=np.float32)

        #splitting into inputs and outputs and casting into tensor
        self.inputs = torch.from_numpy(data_set[:, :8]) # all rows, columns 0-7
        self.outputs = torch.from_numpy(data_set[:, [8]]) # all rows, column 8
        
        # maybe move to cuda
        if is_cuda:
            self.inputs = self.inputs.to('cuda')
            self.outputs = self.outputs.to('cuda')
        self.num_of_samples = data_set.shape[0]

    # method to return an index of dataset
    def __getitem__(self, index):
        return self.inputs[index], self.outputs[index]
    # method to give total length of dataset
    def __len__(self):
        return self.num_of_samples

## Creating dataset object and showing features and labels
Sanity check.

In [10]:
# Override is_cuda for time checking if wanted
#*****************************************
#is_cuda= False
train_dataset = PulsarDataset('train.csv', is_cuda)
test_dataset = PulsarDataset('test.csv', is_cuda)
features, labels = train_dataset[0]
print(features.shape[0])
print(labels.shape[0])

8
1


In [11]:
trainloader = DataLoader(dataset=train_dataset, batch_size = 1000, shuffle=True)
testloader = DataLoader(dataset=test_dataset, batch_size=1000, shuffle=True)

## Creating an MLP Classifier Class
For now we will use 1 layer of 5 hidden nodes ( fully connected )

In [12]:
import torch.nn.functional as F
from torch import nn
class MLPClassifier(nn.Module):
    def __init__(self):
        super().__init__()

        # 8 input nodes to 5 hidden nodes
        self.fc1 = nn.Linear(8, 5)

        #5 hidden nodes to an output layer
        self.output = nn.Linear(5, 2)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.log_softmax(self.output(x), dim=1)
        return x

classifier = MLPClassifier()

In [13]:
from torch import optim
criterion = nn.NLLLoss()
optimizer = optim.SGD(classifier.parameters(), lr=.03)

In [14]:
import time

In [18]:
epochs = 10


if is_cuda:
    criterion.cuda()
    device = torch.device("cuda:0")
    classifier.cuda()
    
    
else:
    criterion.cpu()
    device = torch.device("cpu")
    classifier.cpu()
    
# time
start_time = time.time()
for e in range(epochs):
    running_loss = 0
    for inputs, labels in trainloader:
        optimizer.zero_grad()
        #print(inputs.shape)
        output = classifier.forward(inputs)

        loss = criterion(output, labels.squeeze(1).long())
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    else:
        print(running_loss/len(trainloader))

end_time = time.time()
print(f"Time to run: {end_time-start_time}")

0.36637682318687437
0.36440290212631227
0.36109764377276105
0.3574163556098938
0.3550948659578959
0.35538906455039976
0.35197394092877704
0.3528457780679067
0.3537181854248047
0.35103628039360046
Time to run: 1.2935402393341064


## Predictions
Let's check out how this thing predicts

In [16]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix as cm
from sklearn.metrics import f1_score
import random

In [19]:

# Disable grad

with torch.no_grad():

    for inputs, labels in testloader:
        # Generate prediction
        prediction = classifier.forward(inputs).data.cpu().numpy()
        # Predicted class value using argmax
        predicted_classes = np.argmax(prediction, axis=1)
        accuracy = accuracy_score(labels.cpu(), predicted_classes)
        confusion_matrix = cm(labels.cpu(), predicted_classes)
        f1 = f1_score(labels.cpu(), predicted_classes)
        print(f"Confusion Matrix:\n {confusion_matrix}")
        print(f"Accuracy Score: {accuracy}")
        print(f'f1 Score: {f1}')

Confusion Matrix:
 [[980   0]
 [ 20   0]]
Accuracy Score: 0.98
f1 Score: 0.0
Confusion Matrix:
 [[988   0]
 [ 12   0]]
Accuracy Score: 0.988
f1 Score: 0.0
Confusion Matrix:
 [[986   0]
 [ 14   0]]
Accuracy Score: 0.986
f1 Score: 0.0
Confusion Matrix:
 [[574   0]
 [  7   0]]
Accuracy Score: 0.9879518072289156
f1 Score: 0.0
