In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import sys
import os
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

In [None]:
writer = SummaryWriter('./runs/classification_all/')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
class COMDataset(Dataset):
    def __init__(self, production_dir, transform=None):
        self.transform = transform
        self.prodiction_dir = production_dir
        self.com_dir = os.path.join(production_dir, 'com_dir')
        self.com_list = []
        self.com_files = os.listdir(self.com_dir)
        self.com_files = [f for f in os.listdir(self.com_dir) if f.endswith('.txt')]
        self.com_files.sort(key=self.sort_coms)
        for window, file in enumerate(self.com_files):
            self.com_list.append(pd.read_csv(os.path.join(self.com_dir, file), header=None, names=[window], usecols=[0]))
        self.com_list = np.array(self.com_list)
        for idxs in range(len(self.com_list)):
            for i in range(len(self.com_list[idxs])):
                self.com_list[idxs][i][0] = np.float64(self.com_list[idxs][i][0][9:].strip()) 
        self.com_list = self.com_list.squeeze()
        
        self.n_windows = len(self.com_files)
        self.window_paths = [os.path.join(self.prodiction_dir, str(window), 'hb_observable.txt') for window in range(self.n_windows)]
        self.hb_list = []
        for window, path in enumerate(self.window_paths):
            self.hb_list.append(pd.read_csv(path, header=None,names=[window], usecols=[0]))        
        self.hb_list = np.array(self.hb_list).squeeze()
        
        self.x = np.reshape(self.com_list, self.com_list.shape[0] * self.com_list.shape[1])
        self.y = np.reshape(self.hb_list, self.hb_list.shape[0] * self.hb_list.shape[1])

        self.n_samples = self.x.shape[0]
        
    def sort_coms(self, file):
        # Method to sort com files by window number
        var = int(file.split('_')[-1].split('.')[0])
        return var
    
    def __getitem__(self, index):
        sample = self.x[index], self.y[index]
        if self.transform:
            sample = self.transform(sample)
        return sample
    
    def __len__(self):
        return self.n_samples

In [None]:
production_dir = '/scratch/mlsample/ipy_oxDNA/ipy_oxdna_examples/duplex_melting/us_melting/production/'
dataset  = COMDataset(production_dir)
batch_size = 4096
validation_split = .2
shuffle_dataset = True
random_seed= 42

# Creating data indices for training and validation splits:
dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))
if shuffle_dataset :
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, 
                                           sampler=train_sampler)
test_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                                sampler=valid_sampler)

In [None]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
#hyper params
input_size = 1 # 28x28
hidden_size = 640
num_classes = 10
num_epochs = 10
learning_rate = 0.0001

In [None]:
model = NeuralNet(input_size, hidden_size, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# Training Loop
n_total_steps = len(train_loader)
for epoch in tqdm(range(num_epochs)):
    running_loss = 0.0
    running_corrects = 0
    for i, (x, y) in enumerate(train_loader):
        x = x.float().view(-1, input_size).to(device)
        y = y.to(device)
        
        outputs = model(x)
        loss = criterion(outputs, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * x.size(0)
        _, preds = torch.max(outputs, 1)
        running_corrects += torch.sum(preds == y.data)
        
        if (i+1) % 100 == 0:
            epoch_loss = running_loss / (i * batch_size)
            epoch_acc = running_corrects.double() / (i * batch_size)
            print(f'Epoch {epoch}/{num_epochs} - Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
            writer.add_scalar('training loss', epoch_loss, epoch * n_total_steps + i)
            writer.add_scalar('accuracy', epoch_acc, epoch * n_total_steps + i)


In [None]:
with torch.no_grad():
    n_correct = 0
    n_sample = 0
    for x, y in test_loader:
        x = torch.tensor(x, dtype=torch.float32)
        x = x.view(x.shape[0], 1).to(device)
        y = y.to(device)
        outputs = model(x)
        #value, index
        _, predictions = torch.max(outputs, 1)
        n_sample += y.shape[0]
        n_correct += (predictions == y).sum().item()
        
acc = 100.0 * n_correct / n_sample
print(acc)

In [None]:
%tensorboard --logdir './runs/classification_all/'