-
Notifications
You must be signed in to change notification settings - Fork 0
/
ClearSpectrum2.py
579 lines (474 loc) · 20.5 KB
/
ClearSpectrum2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
#!/usr/bin/python3
#coding -*-coding:Utf-8 -*
# Import
import sys # basic files tools
from matplotlib.pyplot import *
from mpl_toolkits.axes_grid1.axes_rgb import make_rgb_axes, RGBAxes
import numpy as np
from math import *
import time
# Yes, they really are
# VARIABLES GLOBAL to the Module
# in getFileName
spectrum_name = 'FromMarie/CP99_pzf1.sp'
file_name = 'FromMarie/CP99_pzf1'
known_freq_name = 'FromMarie/artifacts.cat'
known_name = 'artifacts'
ext = 'cat'
factor = 1
# in readSpectrumAndLineFiles
freq_list_ini = []
int_list_ini = []
step = 1.0
peak_freq_ini = []
peak_int_ini = []
peak_unc_ini = []
peak_array = np.array( [peak_freq_ini, peak_int_ini, peak_unc_ini] )
# in readKnownFile
known_freq_array = []
#in peak_assignment
match_freq = []
match_int = []
# in createListOfPeaks
match_peak_array = []
peak_freq_end = []
peak_int_end = []
peak_unc_end = []
peak_freq_assign = []
peak_int_assign = []
peak_unc_assign = []
#in ClearSpectrum
freq_list_assign = []
int_list_end = []
freq_list_end = []
int_list_assign = []
bucket = []
bucketedLines = []
def clearVariables(): # This clears the variables necessary for re-doing "remove molecular lines"
# IT does NOT remove the variables necessary for opening a new file, or creating a new FFT
global known_freq_array; global match_freq; global match_int;
global match_peak_array; global peak_freq_end; global peak_int_end; global peak_unc_end;
global peak_freq_assign; global peak_int_assign; global peak_unc_assign;
global freq_list_assign; global int_list_end; global freq_list_end; global int_list_assign
# del known_freq_array[:] - set to an np.array
# in peak_assignment
del match_freq[:]
del match_int[:]
# in createListOfPeaks
# del match_peak_array[:] - set to an np.array
del peak_freq_end[:]
del peak_int_end[:]
del peak_unc_end[:]
del peak_freq_assign[:]
del peak_int_assign[:]
del peak_unc_assign[:]
#in ClearSpectrum
del freq_list_assign[:]
del int_list_end[:]
del freq_list_end[:]
del int_list_assign[:]
#########################################################
# BODY OF THE PROGRAM
#########################################################
#------------------------------------
# Input file and reading of the file
#------------------------------------
# Input file
def getFileName():
global spectrum_name; global file_name; global known_freq_name
global known_name; global ext; global factor
if len(sys.argv) != 3:
print('Usage: ClearSpectrum.py spectrum_file known_freq_file')
exit(1)
# The script needs 3 input files :
# the experimental spectrum;
# the peak list with the same name than the experimental spectrum, and the extension'.lines';
# a file containing a list of known frequencies
# The peak list can contain as many column as desired.
# However, the frequency should be in the first column,
# and the intensity in the second
# In order to have the output files name correctly,
# please respect these few pre-requisites:
# the experimental spectrum should possess an extension
# the known frequency file should possess an extension
# and be placed in a subfolder.
# It should contain in its first column the frequency, and
# on the third the intensity in Pickett's format (log(int))
spectrum_name = sys.argv[1]
file_name = sys.argv[1].split('.')[0]
known_freq_name = sys.argv[2]
known_name, ext = sys.argv[2].split('.')
if ('/' in known_name):
known_name = known_name.split('/')[1]
factor = 1
if ('artifacts' in known_name):
factor = 2
#--------------------------------------------------------------------
# Read the spectrum file
# and create a list of frequencies, and a list of intensities
#--------------------------------------------------------------------
def readSpectrumAndLineFiles():
global freq_list_ini; global int_list_ini
global peak_freq_ini; global peak_int_ini; global peak_unc_ini;
global peak_array; global step
global bucket; global bucketedLines;
with open(spectrum_name, 'r') as spectrum_file:
spectrum_lines = spectrum_file.read().splitlines()
del freq_list_ini[:]
del int_list_ini[:]
del bucket[:]
del bucketedLines[:]
for i in range(len(spectrum_lines)):
if (len(spectrum_lines[i])>1):
freq_ini, int_ini = spectrum_lines[i].split()
freq_list_ini.append(float(freq_ini))
int_list_ini.append(float(int_ini))
#else:
# freq_list_ini.append(freq_list_ini[i-1])
# int_list_ini.append(-0.0001)
del(spectrum_lines)
step = freq_list_ini[1] - freq_list_ini[0]
#--------------------------------------------------------------------
# Read the peak file
# and an array with a list of frequencies and intensities
# and a transpose of this array as the initial list of remaining peaks
#--------------------------------------------------------------------
with open(file_name + '.lines', 'r') as peak_file:
peak_lines = peak_file.read().splitlines()
del peak_freq_ini[:]
del peak_int_ini[:]
del peak_unc_ini[:]
for i in range(len(peak_lines)):
peak_freq = peak_lines[i].split()[0]
peak_int = peak_lines[i].split()[1]
peak_freq_ini.append(float(peak_freq))
peak_int_ini.append(float(peak_int))
peak_unc = peak_lines[i].split()[2]
peak_unc_ini.append(float(peak_unc))
lines_ini_length = str(len(peak_int_ini))
print(lines_ini_length + ' initial experimental frequencies')
peak_array = np.array( [peak_freq_ini, peak_int_ini, peak_unc_ini] )
#--------------------------------------------------------------------
# Read the known freq file
# Create a list and an array of known frequencies
#--------------------------------------------------------------------
def readKnownFile():
global known_freq_array
global known_name; global ext; global factor
with open(known_freq_name, 'r') as known_freq_file:
known_freq_lines = known_freq_file.read().splitlines()
known_freq_list = []
known_freq_int = []
known_name, ext = known_freq_name.split('.')
print("known name = " + known_name)
if ('/' in known_name):
known_name = known_name.split('/')[1]
factor = 1
if ('artifacts' in known_name):
factor = 2
#print("Number of Lines = " + str(len(known_freq_lines)))
for i in range(len(known_freq_lines)):
#if(known_name == "h2c6"):
# print ("i:", i)
# print (known_freq_lines[i])
# print("i = " + str(i))
# print("string = "+ known_freq_lines[i])
known_freq = known_freq_lines[i].split()[0]
if (ext == 'list'): # case of just a frequency list
known_int = -1
elif (ext == "lines"): #PRAA Mar 29 2016
known_int = -1
else :
known_int = known_freq_lines[i].split()[2]
if (freq_list_ini[0] < float(known_freq) < freq_list_ini[-1]):
if (ext == 'mrg'):
seen_freq = known_freq_lines[i].split()[5]
try:
seen_freq = float(seen_freq)
except ValueError: #only if the sixth column is not an interger == only if the column has a '-' sign == experimental frequency
known_freq_list.append(float(known_freq))
known_freq_int.append(10**float(known_int))
else:
known_freq_list.append(float(known_freq))
known_freq_int.append(10**float(known_int))
del(known_freq_lines)
known_freq_array = np.array( [known_freq_list, known_freq_int] ).transpose()
known_freq_array = known_freq_array[known_freq_array[:, 1].argsort()[::-1]]
# the list is here sorted by decreasing intensities
del(known_freq_list, known_freq_int)
#---------------------------------------------------
# PEAK ASSIGNMENT
# Compare the list of experimental peaks
# and the list of known frequencies to find matches
#---------------------------------------------------
def peakAssignment(doNotPlot):
global match_freq
global match_int
index_list = []
match_unc = []
# Initialize a counter for matches
counter = 0
del match_freq[:]
del match_int[:]
del match_unc[:]
for i in range(len(known_freq_array)): # PRAA note that peak_array[0] is a column of frequencies
min_freq = np.abs(known_freq_array[i,0] - peak_array[0]) # PRAA note that min_freq is an array
index = np.argmin(min_freq)
#print("I, index, freq ", str(i), str(index), known_freq_array[i,0])
if (min_freq[index] < 0.2):
#append the match list, and remove items from the remaining peaks array
print("index= "+ str(index))
print("peak_array[0, index] = " + str(peak_array[0, index]))
match_freq.append(peak_array[0,index])
match_int.append(peak_array[1,index])
match_unc.append(peak_array[2,index])
index_list.append(index)
counter += 1
# Exit the script if counter < i for the 3 strongest predicted lines
if (counter - i < 0 and 0 < i < 1 and (doNotPlot == False)):
print('No plausible line of ' + known_name + ' in the spectrum')
#---------------------------------------------------
# PLOT
#---------------------------------------------------
figure(2) # creation of a figure
#
# Known frequencies
subplot(211) #
title ('Known frequencies: ' + known_name)
ion() #interactive mode
xlim([freq_list_ini[0], freq_list_ini[-1]])
vlines(known_freq_array[:,0], 1e-10, known_freq_array[:,1], colors='red')
#
# Initial spectrum in the background, Cleared spectrum in the front
subplot(212, sharex=subplot(211))
title ('Initial spectrum (red), Cleared spectrum (blue)')
plot(freq_list_ini,int_list_ini)
#
show(block=True)
sys.exit()
del(index)
if ((counter == 0) and (doNotPlot == False)):
print('No plausible line of ' + known_name + ' in the spectrum')
#---------------------------------------------------
# PLOT
#---------------------------------------------------
figure(2) # creation of a figure
#
# Known frequencies
subplot(211) #
title ('Known frequencies: ' + known_name)
ion() #interactive mode
xlim([freq_list_ini[0], freq_list_ini[-1]])
vlines(known_freq_array[:,0], 1e-10, known_freq_array[:,1], colors='red')
#
# Initial spectrum in the background, Cleared spectrum in the front
subplot(212, sharex=subplot(211))
title ('Initial spectrum (red), Cleared spectrum (blue)')
plot(freq_list_ini,int_list_ini)
# Remove the following two lines for testing Feb 9 2016 praa
# show(block=True)
show(block=False)
# sys.exit()
time.sleep(2)
print(" finished with this task, and sleeping for 2 seconds")
return(counter)
#---------------------------------------------------
# Creation of a reduce list of peaks
# And a list of assigned peaks
#---------------------------------------------------
def createListsOfPeaks():
# Remove multiple occurences in index_list
# and sort by decreasing values (allow to delete)
global match_freq
global peak_array
global match_peak_array
#index_list = list(set(index_list))
#index_list.sort(reverse=True)
#
#peak_lines_end = list(peak_lines)
global peak_freq_end
global peak_int_end
global peak_unc_end
global peak_freq_assign
global peak_int_assign
global peak_unc_assign
global bucket
global bucketedLines
#peaks_assigned = []
# remove the assigned peak + those that are too close (in the cleaning window)
# print(str(peak_freq_ini))
for i in range(len(peak_freq_ini)):
counter = 0
for j in range(len(match_freq)):
if (abs(match_freq[j]-peak_freq_ini[i]) < (5.2 - 4.97 * 0.41**(match_int[j])) *factor):
counter += 1
peak_freq_assign.append(peak_freq_ini[i])
peak_int_assign.append(peak_int_ini[i])
peak_unc_assign.append(peak_unc_ini[i])
# Also drop these into the bucket of lines that are assigned, with the
# name of the assignment . . .. something like
bucket.append([peak_freq_ini[i], peak_int_ini[i], known_name])
# where file_name gives the name of the molecule or "artifact"
#but now we need to keep track of what was taken out so we will have a list of unassigned lines -
# could do somthing like
#if(peak_freq_ini[i]) in leftoversOfBucket: removeIt
# humk - this seems awkward -
# how about we just record the indices?? That's much better !!!
bucketedLines.append(i)
if (counter == 0):
peak_freq_end.append(peak_freq_ini[i])
peak_int_end.append(peak_int_ini[i])
peak_unc_end.append(peak_unc_ini[i])
#for i in range(len(index_list)):
# del(peak_lines_end[index_list[i]])
# peaks_assigned.append(peak_lines[index_list[i]])
#
#del(index_list)
# Check that the matching frequencies + new peak list
# is equal to the initial number of peaks
#check = len(peak_lines) - len(peaks_assigned) - len(peak_lines_end)
print(str(len(match_freq)) + ' matching frequencies in ' + known_freq_name)
print(str(match_freq)) #praa MArch 30 2016
print(str(len(peak_freq_ini) - len(peak_freq_end)) + ' peak removed')
if (len(match_freq) > (len(peak_freq_ini) - len(peak_freq_end))):
print("Mismatch ! Matching Frequencies: "+ str(match_freq))
#print(peaks_assigned)
print(str(len(peak_freq_end)) + ' remaining peaks')
#print('----------------------')
#print(str(len(peak_freq_end)) + ' - ' + str(len(peaks_assigned)) + ' - ' + str(len(peak_lines_end)) + ' = ' + str(check))
#print('----------------------')
# Create an array containing the list of matching frequencies
match_peak_array = np.array( match_freq )
del(counter)
#---------------------------------------------------
# CLEAR SPECTRUM
# Create a new spectrum without known transitions
#---------------------------------------------------
def clearSpectrum():
global freq_list_ini
global int_list_ini
global match_peak_array
global freq_list_assign
global int_list_end
global freq_list_end
global int_list_assign
# Loop on the spectrum frequencies
for i in range(len(freq_list_ini)):
# definition of an array for each frequency
# by calculating the difference between the frequencies of the spectrum
# and the matching frequencies
diff = np.abs(match_peak_array-freq_list_ini[i])
# finding the index of the minimum of this array
index = np.argmin(diff)
# Since the array diff is of the same dimension as match_peak_array,
# it is also of the same dimension as the match_int list,
# We can thus determine intensity at this index.
# The following expression of type y = a - b * c**x allows to clear the spectrum
# in a range depending of the intensity of the peak to remove.
# Values have been determined to describe these points:
# (0,0), (0.03,0.3), (1,3), (5,5)
if ( diff[index] < (5.2 - 4.97 * 0.41**(match_int[index])) *factor ):
freq_list_assign.append(freq_list_ini[i])
int_list_assign.append(int_list_ini[i])
# Also drop these into the bucket of lines that are assigned, with the
# name of the assignment . . .. e.g. - oh, not here in the spectrum, in the line list - previous routime
# bucket.append(freq_list_ini[i], int_list_ini[i], file_name)
else:
int_list_end.append(int_list_ini[i])
freq_list_end.append(freq_list_ini[i])
# Create new lists for the assigned transitions
# and without them
#---------------------------------------------------
# EXPORT THE ClEARED GRAPH,
# THE ASSIGNED PORTIONS OF THE GRAPH
# THE LIST OF REMAINING PEAKS,
# AND THE LIST OF ASSIGNED PEAKS
#---------------------------------------------------
def exportTheClearedFiles():
if("clear" in file_name):
file_name_short = file_name.split('_clear')[0]
file_name_long = file_name
else:
file_name_short = file_name
file_name_long = file_name + '_clear'
with open(file_name_long + '_' + known_name + '.sp', 'w') as sp_file:
for i in range(len(freq_list_end)-1):
sp_file.write('%5.3f %5.5f\n' %(freq_list_end[i], int_list_end[i]))
# test without these lines - April 6 2016 PRAA
#if (freq_list_end[i+1] - freq_list_end[i] > 2*step):
# sp_file.write('\n')
sp_file.write('%5.3f %5.5f\n' %(freq_list_end[-1], int_list_end[-1]))
with open(file_name_short + '_assigned_' + known_name + '.sp', 'w') as sp_assign_file:
for i in range(len(freq_list_assign)-1):
sp_assign_file.write('%5.3f %5.5f\n' %(freq_list_assign[i], int_list_assign[i]))
#if (freq_list_assign[i+1] - freq_list_assign[i] > 2 * step):
# sp_assign_file.write('\n')
sp_assign_file.write('%5.3f %5.5f\n' %(freq_list_assign[-1], int_list_assign[-1]))
with open(file_name_long + '_' + known_name + '.lines', 'w') as linelist_file:
for i in range(len(peak_freq_end)):
linelist_file.write('%9.3f %5.3f %5.3f\n' %(peak_freq_end[i], peak_int_end[i], peak_unc_end[i]))
with open(file_name_short + '_assigned_' + known_name + '.lines', 'w') as lines_assign_file:
for i in range(len(peak_freq_assign)):
lines_assign_file.write('%9.3f %5.3f %5.3f\n' %(peak_freq_assign[i], peak_int_assign[i], peak_unc_assign[i]))
# def copyClearedLineListToNextInput():
#And also spectrum??
#---------------------------------------------------
# PLOT
#---------------------------------------------------
def renamedPlot():
freq_list_a = []
int_list_a = []
freq_list_e = []
int_list_e = []
#For a nicer plot aspect
for i in range(len(freq_list_assign)-1):
freq_list_a.append(freq_list_assign[i])
int_list_a.append(int_list_assign[i])
if (freq_list_assign[i+1] - freq_list_assign[i] > 2 * step):
freq_list_a.append(freq_list_assign[i])
int_list_a.append(0)
freq_list_a.append(freq_list_assign[i+1])
int_list_a.append(0)
freq_list_a.append(freq_list_assign[-1])
int_list_a.append(int_list_assign[-1])
for i in range(len(freq_list_end)-1):
freq_list_e.append(freq_list_end[i])
int_list_e.append(int_list_end[i])
if (freq_list_end[i+1] - freq_list_end[i] > 2 * step):
freq_list_e.append(freq_list_end[i])
int_list_e.append(0)
freq_list_e.append(freq_list_end[i+1])
int_list_e.append(0)
freq_list_e.append(freq_list_end[-1])
int_list_e.append(int_list_end[-1])
figure(2) # creation of a figure
#
# Known frequencies
subplot(211) #
title ('Known frequencies: ' + known_name)
ion() #interactive mode
#xlim([freq_list_ini[0], freq_list_ini[-1]])
vlines(known_freq_array[:,0], 1e-10, known_freq_array[:,1], colors='red')
#
# Initial spectrum in the background, Cleared spectrum in the front
subplot(212, sharex=subplot(211))
title ('Initial spectrum (red), Cleared spectrum (blue)')
plot(freq_list_a,int_list_a, color='red')
plot(freq_list_e,int_list_e)
vlines(match_peak_array, 1e-6, match_int, colors='gray', linestyle = '--')
show(block=True)
if __name__ == "__main__":
# Import
import sys # basic files tools
from matplotlib.pyplot import *
import numpy as np
from math import *
from mpl_toolkits.axes_grid1.axes_rgb import make_rgb_axes, RGBAxes
getFileName()
readSpectrumAndLineFiles()
readKnownFile()
peakAssignment(False)
createListsOfPeaks()
clearSpectrum()
exportTheClearedFiles()
renamedPlot()