In [4]:
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

from dataloader import MisciblityData

In [7]:
# Load data
data = MisciblityData()
((X_train, Y_train), (X_val, Y_val)) = data.load_data()

In [12]:
# Data characteristics
print(f"Number of features >> {data.num_descriptors}")
print(f"Number of training sets >> {len(X_train)}")
print(f"Number of validation sets >> {len(X_val)}")
print()
print(f"Training input data shape >> {np.shape(X_train)}")
print(f"Training output data shape >> {np.shape(Y_train)}")

Number of features >> 7
Number of training sets >> 1594
Number of validation sets >> 684

Training input data shape >> (1594, 14)
Training output data shape >> (1594,)


In [None]:
# Prepare data in tensor form
# NOTE: dtype of torch.float32 is a common practice
# NOTE: It provides good balance b/w accuracy and efficiency
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
Y_val_tensor = torch.tensor(Y_val, dtype=torch.float32)

# Create DataLoader objects
train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, Y_val_tensor)

# NOTE: batch_size of 32 is a common practice
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)

In [None]:
# Define model
class ATPSPredictor(nn.Module):
    def __init__(self):
        super(ATPSPredictor, self).__init__()
        self.fc1 = nn.Linear(14, 32)
        self.fc2 = nn.Linear(32, 32)
        self.fc3 = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

In [None]:
# Train the model