In [6]:
# Here we import all the necessary libraries to generate NMR spectra
import matplotlib.pyplot as plt
from nmrsim import Multiplet
from random import randint, uniform
import pandas

In [1]:
# Function to generate NMR signals based on input argument of spectrometer freq
def NMR_Signal_Generator(spectrometer_frequencies):
    # Assigning number of protons defined by peak integral
    integral = randint(1,4)
    #Assigning the linewidth of an NMR peak in the spectrum
    linewidth_hz = 2
    # Randomly return a floating point chemical shift assignment
    chemical_shift = uniform(0.5,9)
    # Randomly return a floating point coupling (J coupling) frequency
    coupling = uniform(3,15)
    # Randomly select a multiplicity (peak splitting)
    multiplicity = randint(0,5)
    # Generate a list of NMR signals using the Multiplet function of nmrsim
    signals_list = [(Multiplet(chemical_shift * frequency, integral, 
                               [(coupling,multiplicity)], linewidth_hz)) for frequency in spectrometer_frequencies]
    return signals_list

In [4]:
def create_random_spectra(num_peaks):
    # Assigns desired NMR frequencies to a list
    spectrometer_frequencies = [400,60]
    # Creates blank dataframe with desired NMR frequencies column
    spectral_data = pandas.DataFrame(columns=['400MHz', '60MHz'])
    
    # Generates NMR Signals from NMRSIM using established list, generates
    # spectra and puts them in spectral_data frame
    for i in range(0,num_peaks):
        signals_list = NMR_Signal_Generator(spectrometer_frequencies)
        spectral_data.loc[len(spectral_data)] = signals_list

    # Defines spectrum object, from Multiplet class
    # Multiplets taken from spectral_data frequency df
    spectrum_400MHz = Multiplet(0,0.5,[],2) 
    for multiplet in spectral_data['400MHz']:
        spectrum_400MHz += multiplet
        
    # Process repeated for other frequency
    spectrum_60MHz = Multiplet(0,0.5,[],2)
    for multiplet in spectral_data['60MHz']:
        spectrum_60MHz += multiplet
    
    # Normalize the spectrometer frequencies and have n number of points on plots
    spectrum_400MHz.vmin = -0.5 * spectrometer_frequencies[0]
    spectrum_400MHz.vmax = 10.5 * spectrometer_frequencies[0]
    x_400, y_400 = spectrum_400MHz.lineshape(points=5500)

    spectrum_60MHz.vmin = -0.5 * spectrometer_frequencies[1]
    spectrum_60MHz.vmax = 10.5 * spectrometer_frequencies[1]
    x_60, y_60 = spectrum_60MHz.lineshape(points=5500)

    return x_400, y_400, x_60, y_60


In [19]:
def write_nmr_spectra(index, num_peaks, x_400, y_400, x_60, y_60):
    # Saving data to file
    sf = [400,60]
    x_ppm_400 = x_400/sf[0]
    x_ppm_60 = x_60/sf[1]
    spectral_data = pandas.DataFrame(columns=['400MHz_ppm','400MHz_intensity', '60MHz_ppm','60MHz_intensity'])
    spectral_data['400MHz_ppm'] = x_ppm_400
    spectral_data['400MHz_intensity'] = y_400
    spectral_data['60MHz_ppm'] = x_ppm_60
    spectral_data['60MHz_intensity'] = y_60
    filename="NMRfiles_Test/400MHz/"+"spectral_data_"+ str(num_peaks).zfill(2)+ "_" +str(index).zfill(5)+".csv"
    print(filename)
    spectral_data.to_csv(filename)

In [20]:
import time

In [21]:
# num_peaks = Number of Peaks that you want in your spectra
# j = how many spectra do you want.
x = time.time()

for j in range (0,200):
    num_peaks = randint(5,16)
    #print(num_peaks)
    x_400, y_400, x_60, y_60 = create_random_spectra(num_peaks)
    write_nmr_spectra(j, num_peaks, x_400, y_400, x_60, y_60)

print(time.time()-x)    

NMRfiles_Test/400MHz/spectral_data_11_00000.csv
NMRfiles_Test/400MHz/spectral_data_12_00001.csv
NMRfiles_Test/400MHz/spectral_data_11_00002.csv
NMRfiles_Test/400MHz/spectral_data_16_00003.csv
NMRfiles_Test/400MHz/spectral_data_06_00004.csv
NMRfiles_Test/400MHz/spectral_data_11_00005.csv
NMRfiles_Test/400MHz/spectral_data_06_00006.csv
NMRfiles_Test/400MHz/spectral_data_08_00007.csv
NMRfiles_Test/400MHz/spectral_data_12_00008.csv
NMRfiles_Test/400MHz/spectral_data_08_00009.csv
NMRfiles_Test/400MHz/spectral_data_12_00010.csv
NMRfiles_Test/400MHz/spectral_data_13_00011.csv
NMRfiles_Test/400MHz/spectral_data_15_00012.csv
NMRfiles_Test/400MHz/spectral_data_16_00013.csv
NMRfiles_Test/400MHz/spectral_data_16_00014.csv
NMRfiles_Test/400MHz/spectral_data_16_00015.csv
NMRfiles_Test/400MHz/spectral_data_05_00016.csv
NMRfiles_Test/400MHz/spectral_data_13_00017.csv
NMRfiles_Test/400MHz/spectral_data_15_00018.csv
NMRfiles_Test/400MHz/spectral_data_05_00019.csv
NMRfiles_Test/400MHz/spectral_data_08_00

We will need to do a) Hyperparameter search for machine learning model, irrespective of the model that we end up choosing; b) explore different encoding/decoding ML frameworks (CNN, ANN, etc); 

In [22]:
import pandas as pd

In [23]:
df_test = pd.read_csv('./NMRfiles_Test/spectral_data_05_00001.csv')
df_test

Unnamed: 0.1,Unnamed: 0,400MHz_ppm,400MHz_intensity,60MHz_ppm,60MHz_intensity
0,0,-0.500000,0.000005,-0.500000,0.000222
1,1,-0.498000,0.000005,-0.498000,0.000224
2,2,-0.495999,0.000005,-0.495999,0.000225
3,3,-0.493999,0.000005,-0.493999,0.000226
4,4,-0.491999,0.000005,-0.491999,0.000228
...,...,...,...,...,...
5495,5495,10.491999,0.000002,10.491999,0.000084
5496,5496,10.493999,0.000002,10.493999,0.000084
5497,5497,10.495999,0.000002,10.495999,0.000083
5498,5498,10.498000,0.000002,10.498000,0.000083
