In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import ShuffleSplit
from sklearn.metrics import accuracy_score, balanced_accuracy_score, recall_score, precision_score, confusion_matrix
import numpy as np
import pandas as pd
from utils import specificity_score, negative_prediction_value_score, gmean_score, informedness_score
import torch.nn.functional as F
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from base_pulsa_ai import BasePulsarAI

# Data Loading

In [7]:
base_pulsar_ai = BasePulsarAI()
x_train, x_test, y_train, y_test = base_pulsar_ai.get_torch_htru_2()

# Model definition

In [8]:
class ClassicCNN(nn.Module):
    def __init__(self, num_features, output_dim):
        super(ClassicCNN, self).__init__()
        self.conv1d = nn.Conv1d(in_channels=num_features, out_channels=128, kernel_size=1)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(128, 64)
        self.fc2 = nn.Linear(64, output_dim)

    def forward(self, x):
        x = x.view(-1, 8, 1)
        x = self.conv1d(x)
        x = self.relu(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x


input_dim  = 8 # number of features
output_dim = 2 # binary classification, to be change for sigmoid ?


In [9]:
scaler = StandardScaler() # TBD if we need it

#X_train = scaler.fit_transform(X_train)
#X_test = scaler.transform(X_test)

# Training CNN

In [10]:


ss = ShuffleSplit(n_splits=base_pulsar_ai.num_runs, train_size=60, test_size=120)

# Load the data
df = pd.read_csv('./data/HTRU_2.csv', header=None)
df.columns = ['IpMean', 'IpDev', 'IpKurt','IpSkew', 'DMMean', 'DMDev', 'DMKurt', 'DMSkew', 'Class']


def train_network(model,optimizer,criterion,x_train,y_train,x_test,y_test,num_epochs,train_losses,test_losses):
    for epoch in range(num_epochs):
        #clear out the gradients from the last step loss.backward()
        optimizer.zero_grad()
        
        #forward feed
        output_train = model(x_train)

        #calculate the loss
        loss_train = criterion(output_train, y_train)
        
        #backward propagation: calculate gradients
        loss_train.backward()

        #update the weights
        optimizer.step()
        
        output_test = model(x_test)
        loss_test = criterion(output_test,y_test)

        train_losses[epoch] = loss_train.item()
        test_losses[epoch] = loss_test.item()

for train_index, test_index in ss.split(x_train):
    num_epochs = 50
    train_losses = np.zeros(num_epochs)
    test_losses  = np.zeros(num_epochs)
    model = ClassicCNN(input_dim, output_dim)

    learning_rate = 0.01
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)

    train_network(model,optimizer,criterion, x_train,y_train, x_test,y_test,num_epochs,train_losses,test_losses)

    # Predict the test set
    output_test = model(x_test)
    _, predicted_test = torch.max(output_test, 1)

    # Calculate the scores
    base_pulsar_ai.append_score(y_test, predicted_test)

  _warn_prf(average, modifier, msg_start, len(result))


# Prediction

In [11]:
# Print the scores
for metric, values in base_pulsar_ai.scores.items():
    mean_value = np.mean(values)
    std_value = np.std(values)
    print(f"{metric.capitalize()}: {mean_value:.3f} ± {std_value:.3f}")

Accuracy: 0.972 ± 0.011
Balanced_accuracy: 0.891 ± 0.059
Recall: 0.791 ± 0.120
Specificity: 0.990 ± 0.008
Precision: 0.884 ± 0.141
Npv: 0.979 ± 0.011
Gmean: 0.876 ± 0.127
Informedness: 0.781 ± 0.117
