In [14]:
# Loading in libraries necessary for CNN
import torch
import torch.nn as nn
import numpy as np
import os
import glob
from torch.utils.data import Dataset, DataLoader, TensorDataset
import pandas as pd
from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout
from torch.optim import Adam, SGD, Adagrad, RMSprop, SparseAdam, LBFGS, Adadelta
from sklearn.model_selection import train_test_split
import math
import matplotlib.pyplot
import time
import torch.nn.functional as F
import nmrsim
from nmrsim import plt
from itertools import product

# whether to run on GPU or CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using {device} device")
#print(torch.cuda.get_device_name(0))

Using cpu device


In [15]:
#Checking how many files are in repository for training, testing, and validation
files = glob.glob('/home/fostooq/NMR_Upscale_UW_DIRECT/Spectral_Data/Jupyter_NB/spectral_data/400MHz/spectral_data_*.csv')
print('Total number of files: ', len(files))


Total number of files:  100


Establishing a Dataloader for 400MHz dataset

In [16]:
class GHzData(Dataset):
    def __init__(self):
        # Data loading starting with list of csv strings
        self.files = glob.glob(os.path.join('/home/fostooq/NMR_Upscale_UW_DIRECT/Spectral_Data/Jupyter_NB/spectral_data/400MHz/spectral_data_*.csv'))

        self.y_60 = [] # Establishes a list for 60 MHz data
        self.y_400 = [] # Establishes a list for 400 MHz data

        for self.file in self.files: # For loop for each file in files
            self.df = pd.read_csv(self.file) # Reads each into a pandas dataframe
            self.array_60 = self.df['60MHz_intensity'].to_numpy() # Takes 60MHz intensity to np
            self.array_400 = self.df['400MHz_intensity'].to_numpy() # Takes 400MHz intensity to np
            self.y_60.append(self.array_60) # Appends all arrays to 60MHz list
            self.y_400.append(self.array_400) # Appends all arrays to 400MHz list
            
        # Creates a 60 MHz tensor from list, converts to float, unsqueezes to have shape (n, 1, 5500)
        self.tensor_60 = torch.Tensor(self.y_60).float().unsqueeze(1).to(device)        

        # Creates a 400 MHz tensor from list, converts to float, unsqueezes to have shape (n, 1, 5500)
        self.tensor_400 = torch.Tensor(self.y_400).float().unsqueeze(1).to(device)
        
        # Track the length of number of samples in frame
        self.num_samples = len(self.y_60)

    def __getitem__(self, index): # establishes an index for the tensors
        return self.tensor_60[index], self.tensor_400[index]
    
    def __len__(self): # Returns variable number of samples
        return self.num_samples
    
  

In [17]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Conv1d(in_channels= 1,out_channels= 128, kernel_size= 3, padding='same') # input layer to hidden
        self.relu = nn.ReLU() # activation function
        self.fc2 = nn.Conv1d(in_channels=128,out_channels=128,kernel_size= 3, padding='same') # input layer to output
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Conv1d(in_channels=128,out_channels=128,kernel_size= 3, padding='same')
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Conv1d(in_channels=128, out_channels=128,kernel_size= 3, padding='same')
        self.relu4 = nn.ReLU()
        self.fc5 = nn.Conv1d(in_channels=128,out_channels= 1,kernel_size= 3, padding='same')
            
    def forward(self, x):  # Forward loop for the neural network
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        x = self.relu3(x)
        x = self.fc4(x)
        x = self.relu4(x)
        x = self.fc5(x)
        return x

In [18]:
model = NeuralNetwork().to(device) # Assigns model to variable model, sends to gpu

In [19]:
# Establishing and loading data into notebook
dataset = GHzData()

#Splitting the data
train_X, test_X, train_y, test_y = train_test_split(dataset.tensor_60, dataset.tensor_400,
                                                    test_size=0.1)

# Splits train data into validation data
train_X, valid_X, train_y, valid_y = train_test_split(train_X, train_y,
                                                      test_size=0.1)
# Creating datasets
train_dataset = TensorDataset(train_X, train_y)
test_dataset = TensorDataset(test_X, test_y)
valid_dataset = TensorDataset(valid_X, valid_y)

# Batch size change to higher batch sizes
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=True)

In [20]:
train_dataset[0][0].size()

torch.Size([1, 5500])

In [21]:
criterion = nn.MSELoss() # Loss function for the model

In [22]:
#RMSprop and Adam seems to work ptetty well
optimizer = RMSprop(model.parameters(), lr=0.001) # Optimization function

In [23]:
num_epochs = 30 # Number of epochs to run model

In [None]:
time_ = time.time() # Assigns time to variable time_

train_loss_epoch = []
valid_loss_epoch = []

for e in range(num_epochs):
    running_loss = 0.0
    
    for index, (inputs, labels) in enumerate(train_dataloader):
        #inputs = inputs.squeeze(1)
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        print(inputs,outputs, e, index)
        
        loss = criterion(outputs, labels)
        loss.backward()
        
        optimizer.step()
        
        running_loss += loss.item() * inputs.size(1)
    
    epoch_loss = running_loss / len(train_dataloader.dataset)
    train_loss_epoch.append(epoch_loss)
    
    model.eval() # Model to evaluation mode
    valid_loss = 0.0
    valid_correct = 0
    valid_total = 0
    loss_list_test = []

    # Loop for testing
    for inputs, labels in valid_dataloader:
        #Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        valid_loss += loss.item() * inputs.size(1)

        _, labels = torch.min(labels, 1)
        _, predicts = torch.min(outputs.data, 1)
        predicts = predicts.unsqueeze(1)
        valid_total += labels.size(0)
        valid_correct += (predicts == labels).float().mean()
        
    epoch_loss = valid_loss / len(valid_dataloader.dataset)
    valid_loss_epoch.append(epoch_loss)  
           
    
    if(int(e) % 10) == 0:
        print(f'Epoch {e} loss: {epoch_loss:.4f}')

print(f'Time Elapsed: {round(time.time()-time_, 5)} seconds')


tensor([[[2.7062e-04, 2.7196e-04, 2.7331e-04,  ..., 7.8373e-05,
          7.8226e-05, 7.8079e-05]],

        [[6.8608e-04, 6.8868e-04, 6.9131e-04,  ..., 1.6402e-04,
          1.6377e-04, 1.6352e-04]],

        [[4.8071e-04, 4.8292e-04, 4.8516e-04,  ..., 2.8591e-05,
          2.8561e-05, 2.8530e-05]],

        ...,

        [[3.2878e-04, 3.3022e-04, 3.3166e-04,  ..., 1.4956e-04,
          1.4938e-04, 1.4921e-04]],

        [[1.8046e-04, 1.8162e-04, 1.8279e-04,  ..., 4.5711e-05,
          4.5665e-05, 4.5619e-05]],

        [[2.0318e-04, 2.0437e-04, 2.0557e-04,  ..., 8.1842e-05,
          8.1755e-05, 8.1669e-05]]]) tensor([[[-0.0239, -0.0352, -0.0344,  ..., -0.0368, -0.0370, -0.0298]],

        [[-0.0239, -0.0352, -0.0344,  ..., -0.0368, -0.0370, -0.0298]],

        [[-0.0239, -0.0352, -0.0344,  ..., -0.0368, -0.0370, -0.0298]],

        ...,

        [[-0.0239, -0.0352, -0.0344,  ..., -0.0368, -0.0370, -0.0298]],

        [[-0.0239, -0.0352, -0.0344,  ..., -0.0368, -0.0370, -0.0298]],

 

In [None]:
# valid_loss_epoch = []

# for e in range(num_epochs):
#     running_loss = 0.0
#     loss_list_valid = []
     
#     model.eval() # Model to evaluation mode

#     valid_loss = 0.0
#     valid_correct = 0
#     valid_total = 0
#     loss_list_test = []

#     # Loop for testing
#     for inputs, labels in valid_dataloader:
#         #Forward pass
#         outputs = model(inputs)
#         loss = criterion(outputs, labels)
#         valid_loss += loss.item() * inputs.size(1)

#         _, labels = torch.min(labels, 1)
#         _, predicts = torch.min(outputs.data, 1)
#         predicts = predicts.unsqueeze(1)
#         valid_total += labels.size(0)
#         valid_correct += (predicts == labels).float().mean()
        
#     epoch_loss = valid_loss / len(valid_dataloader.dataset)
#     valid_loss_epoch.append(epoch_loss)  


In [None]:
x = np.linspace(1, num_epochs, num_epochs)

fig, ax = matplotlib.pyplot.subplots()
ax.plot(x, valid_loss_epoch, label='Validation Loss')
ax.plot(x, train_loss_epoch, label='Training Loss' )

matplotlib.pyplot.ylim([0.0, 0.001])
matplotlib.pyplot.xlabel('Number of Epochs')
matplotlib.pyplot.ylabel('Loss')
matplotlib.pyplot.legend()
matplotlib.pyplot.show()

In [None]:
# Evaluating Model Performance with Test Data

model.eval() # Model to evaluation mode

test_loss = 0.0
test_correct = 0
test_total = 0
loss_list_test = []

# Loop for testing
for inputs, labels in test_dataloader:
    #Forward pass
    outputs = model(inputs)
    loss = criterion(outputs, labels)
    test_loss += loss.item() * inputs.size(1)
    loss_list_test.append(loss)
    
    _, labels = torch.min(labels, 1)
    _, predicts = torch.min(outputs.data, 1)
    predicts = predicts.unsqueeze(1)
    test_total += labels.size(0)
    test_correct += (predicts == labels).float().mean()

accuracy = (test_correct / test_total)*100
test_loss /= len(test_dataloader.dataset)
print(f' Mean Loss of Function: {test_loss}, Accuracy: {accuracy}')
print(labels.shape, outputs.shape)

In [None]:
model.eval()
with torch.no_grad():
    for inputs, _ in test_dataloader:
        predictions = model(inputs)

predictions_numpy = predictions.cpu().numpy().reshape(10,-1)
pred = pd.DataFrame(predictions_numpy)

In [None]:
predictions_numpy.shape

In [None]:
df_60 = pd.read_csv('/home/fostooq/NMR_Upscale_UW_DIRECT/spectral_data/400MHz/spectral_data_11_00022.csv')
df_x = df_60['60MHz_intensity']
df_x = df_x.to_numpy()
df_x = torch.Tensor(df_x).unsqueeze(0).unsqueeze(1).to(device)

In [None]:
df_x.size()

In [None]:
predictions = model(df_x)
predictions_numpy = predictions.detach().cpu().numpy()
predictions_numpy=predictions_numpy.reshape(-1)

In [None]:
nmrsim.plt.mplplot_lineshape(df_60['60MHz_ppm'], df_60['60MHz_intensity'], limits=(-0.5, 10.5))

In [None]:
predictions_numpy.shape


np.square(predictions_numpy - df_60['60MHz_intensity']).mean(), np.square(predictions_numpy - df_60['400MHz_intensity']).mean()

In [None]:

x = np.linspace(-.5,10.5,5500)
y = predictions_numpy


nmrsim.plt.mplplot_lineshape(x, y, limits=(-0.5, 10.5))

In [None]:
nmrsim.plt.mplplot_lineshape(np.array(df_60['400MHz_ppm']), np.array(df_60['400MHz_intensity']), limits=(-0.5, 10.5))

In [None]:
test = glob.glob(os.path.join('/home/fostooq/NMR_Upscale_UW_DIRECT/spectral_data/400MHz/spectral_data_*.csv'))

y_60 = []
y_400 = []

for file in test:
    df = pd.read_csv(file)
    array_60 = df['60MHz_intensity'].to_numpy()
    array_400 = df['400MHz_intensity'].to_numpy()
    y_60.append(array_60)
    y_400.append(array_400)

tensor = torch.Tensor(y_400)
reshaped_tensor = tensor.unsqueeze(1)
reshaped_tensor_2 = reshaped_tensor.squeeze(1)

print(reshaped_tensor.shape)
print(reshaped_tensor_2.shape)

In [None]:
Training loop for optuna:
def objective(trial):
    # Generate the model
    model = LinearVAE(trial).to(device)
    # Generate optimizers
    # Try Adam, AdaDelta, Adagrad, RMSprop, SGD
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'Adadelta', 'Adagrad', 'RMSprop', 'SGD'])
    lr = trial.suggest_float('lr', 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)
    batch_size_trial = trial.suggest_int('batch_size', 64, 256, step=64)
    num_epochs = trial.suggest_int('num_epochs', 5, 50, step=5)
    criterion = nn.MSELoss()
    # Load Data
    # Establishing and loading data into notebook
    dataset = GHzData()
    #Splitting the data
    train_X, test_X, train_y, test_y = train_test_split(dataset.tensor_60, dataset.tensor_400,
                                                        test_size=0.1)
    # Splits train data into validation data
    train_X, valid_X, train_y, valid_y = train_test_split(train_X, train_y,
                                                          test_size=0.1)
    # Creating datasets
    train_dataset = TensorDataset(train_X, train_y)
    test_dataset = TensorDataset(test_X, test_y)
    valid_dataset = TensorDataset(valid_X, valid_y)
    # Batch size change to higher batch sizes
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size_trial, shuffle=True)
    valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size_trial, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size_trial, shuffle=True)
    # Paste training loop here
    for epoch in range(num_epochs):
        train_epoch_loss = fit(model, train_dataloader, optimizer, criterion)
        val_epoch_loss = validate(model, valid_dataloader, optimizer, criterion)
    trial.report(train_epoch_loss, epoch)
    # Handle pruning
    if trial.should_prune():
        raise optuna.exceptions.TrialPruned()
    return train_epoch_loss