# First attempt at PyTorch Framework
I am just going to create the same structure MLPClassifier that we found worked best last semester. Basic documentation for PyTorch can be found [here](https://pytorch.org/tutorials/beginner/basics/intro.html).

In [1]:
import os
here = os.getcwd()

## Splitting into test and train

In [2]:
import pandas as pd

df = pd.read_csv(os.path.join(here, 'data/HTRU_2.csv'))
print(df.shape[0])
train = df.iloc[:14317]
test = df.iloc[14317:]

train.to_csv(os.path.join(here, 'data/train.csv'), index=False)
test.to_csv(os.path.join(here, 'data/test.csv'), index=False)

17897


## Defining custom dataset
[Helpful Video](https://www.youtube.com/watch?v=PXOzkkB5eH0)

In [3]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math
class PulsarDataset(Dataset):
    def __init__(self, path):
        # loading in full set
        data_set = np.loadtxt(os.path.join('./data', path), delimiter=',', dtype=np.float32)

        #splitting into inputs and outputs and casting into tensor
        self.inputs = torch.from_numpy(data_set[:, :8]) # all rows, columns 0-7
        self.outputs = torch.from_numpy(data_set[:, [8]]) # all rows, column 8
        self.num_of_samples = data_set.shape[0]

    # method to return an index of dataset
    def __getitem__(self, index):
        return self.inputs[index], self.outputs[index]
    # method to give total length of dataset
    def __len__(self):
        return self.num_of_samples

## Creating dataset object and showing features and labels
Sanity check.

In [4]:
train_dataset = PulsarDataset('train.csv')
test_dataset = PulsarDataset('test.csv')
features, labels = train_dataset[0]
print(features.shape[0])
print(labels.shape[0])

8
1


In [5]:
trainloader = DataLoader(dataset=train_dataset, batch_size = 64, shuffle=True)
testloader = DataLoader(dataset=test_dataset, batch_size = 64, shuffle=True)

## See if GPU is available
If it is, we want to move all of our tensors to GPU

In [6]:
is_cuda = False
if torch.cuda.is_available():
    is_cuda = True
is_cuda

False

## Creating an MLP Classifier Class
For now we will use 1 layer of 5 hidden nodes ( fully connected )

In [51]:
import torch.nn.functional as F
from torch import nn
class MLPClassifier(nn.Module):
    def __init__(self):
        super().__init__()

        # 8 input nodes to 5 hidden nodes
        self.fc1 = nn.Linear(8, 5)

        #5 hidden nodes to an output layer
        self.output = nn.Linear(5, 2)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.log_softmax(self.output(x), dim=1)
        return x

classifier = MLPClassifier()
classifier

MLPClassifier(
  (fc1): Linear(in_features=8, out_features=5, bias=True)
  (output): Linear(in_features=5, out_features=2, bias=True)
)

In [52]:
from torch import optim
criterion = nn.NLLLoss()
optimizer = optim.SGD(classifier.parameters(), lr=.03)

In [58]:
epochs = 5

for e in range(epochs):
    running_loss = 0
    for inputs, labels in trainloader:
        optimizer.zero_grad()

        output = classifier.forward(inputs)

        loss = criterion(output, labels.squeeze(1).long())
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    else:
        print(running_loss/len(trainloader))

0.15050158273827816
0.1378457886832101
0.1239381890877017
0.12138978394380372
0.12245405355601438


## Predictions
Let's check out how this thing predicts

In [81]:
# Disable grad
import random
with torch.no_grad():

    # Retrieve item
    index = int(random.random() * 3000)
    item = test_dataset[index]
    input = item[0]
    expected_class = item[1]

    print(input)
    print(expected_class)
    # Generate prediction
    prediction = classifier.forward(input)

    # Predicted class value using argmax
    predicted_class = np.argmax(prediction)

    print(f'Predicted: {predicted_class}\nExpected: {int(expected_class[0])}')

tensor([ 1.2977e+02,  5.3460e+01,  1.7285e-02, -4.2711e-01,  6.5092e+00,
         3.1011e+01,  5.5623e+00,  3.2475e+01])
tensor([0.])


IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)