# 4. Stitch outputs together
- Once you have fitted the weak, medium and dense diads, stitch them all together, and apply the Ne correction model

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import joblib
import os
from pickle import load
import pickle
import DiadFit as pf
pf.__version__

'0.0.59'

In [2]:
MasterFolder = r"C:\Users\charl\Documents\Instruments and calibrations"
DayFolder = os.path.join(MasterFolder, "UCB_6-7-23")

meta_path=DayFolder +  '\Metadata'
spectra_path=DayFolder + '\Spectra'

if not os.path.exists(MasterFolder + '\OUTPUT'):
    os.mkdir(MasterFolder +'\OUTPUT')

output_path=MasterFolder +'\OUTPUT'


## Load in specra, and combine

In [3]:
from os import path
if path.exists('Discarded_df.xlsx'):
    discard=pd.read_excel('Discarded_df.xlsx')
else:
    discard=None
if path.exists('Weak_Diads.xlsx'):
    grp1=pd.read_excel('Weak_Diads.xlsx')
else:
    grp1=None
if path.exists('Medium_Diads.xlsx'):
    grp2=pd.read_excel('Medium_Diads.xlsx')
else:
    grp2=None
if path.exists('Strong_Diads.xlsx'):
    grp3=pd.read_excel('Strong_Diads.xlsx')
else:
    grp3=None
df2=pd.concat([grp1, grp2, grp3], axis=0)
if discard is not None:
    discard_cols=discard[discard.columns.intersection(df2.columns)]
    df2=pd.concat([df2, discard_cols])

In [4]:
df2.head()

Unnamed: 0.1,Unnamed: 0,filename,Splitting,Split_err_abs,Split_err_quadrature,Diad1_Combofit_Cent,Diad1_cent_err,Diad1_Combofit_Height,Diad1_Voigt_Cent,Diad1_Voigt_Area,...,HB2_Sigma,C13_Cent,C13_Area,C13_Sigma,Diad2_Gauss_Cent,Diad2_Gauss_Area,Diad2_Gauss_Sigma,Diad1_Gauss_Cent,Diad1_Gauss_Area,Diad1_Gauss_Sigma
0,0,FDCA-P28-1,104.522847,0.004,0.004,1283.107201,0.0,801.982439,1283.107201,2292.943622,...,,,,,,,,,,
1,0,FDCA-P29-1,104.431439,0.004738,0.004738,1283.26292,0.0,697.29594,1283.26292,2031.641449,...,,,,,,,,,,
2,0,FDCA-P30-1,104.325317,0.015497,0.011279,1283.553382,0.009637,485.94841,1283.553382,1460.872215,...,,,,,,,,,,
3,0,FDCA-P30-2,104.290017,0.01117,0.008092,1283.590141,0.006829,489.935089,1283.590141,1452.568423,...,,,,,,,,,,
4,0,FDCA-P30-3,104.303668,0.011112,0.007885,1283.57794,0.006022,492.050372,1283.57794,1469.331278,...,,,,,,,,,,


## First, get the metadata for this day

In [5]:
# Put the common string you can in all your Ne lines here
ID_str='_'
file_ext_meta='txt' # Will only take files of this type 
diad_meta=pf.get_files(path=meta_path,
file_ext='txt', exclude_str=['N', 'Si', 'series','IMG','Cap','window','nodiad'],
 sort=True)
diad_meta

['FDCA-P1-1.txt',
 'FDCA-P1-2.txt',
 'FDCA-P1-3.txt',
 'FDCA-P10-1.txt',
 'FDCA-P11-1.txt',
 'FDCA-P12-1.txt',
 'FDCA-P13-1.txt',
 'FDCA-P14-1.txt',
 'FDCA-P14-2.txt',
 'FDCA-P14-3.txt',
 'FDCA-P15-1.txt',
 'FDCA-P16-1.txt',
 'FDCA-P17-1.txt',
 'FDCA-P18-1.txt',
 'FDCA-P19-1.txt',
 'FDCA-P19-2.txt',
 'FDCA-P19-3.txt',
 'FDCA-P2-1.txt',
 'FDCA-P2-2.txt',
 'FDCA-P2-3.txt',
 'FDCA-P20-1.txt',
 'FDCA-P21-1.txt',
 'FDCA-P22-1.txt',
 'FDCA-P23-1.txt',
 'FDCA-P23-2.txt',
 'FDCA-P23-3.txt',
 'FDCA-P24-1.txt',
 'FDCA-P25-1.txt',
 'FDCA-P26-1.txt',
 'FDCA-P27-1.txt',
 'FDCA-P27-2.txt',
 'FDCA-P27-3.txt',
 'FDCA-P28-1.txt',
 'FDCA-P29-1.txt',
 'FDCA-P3-1.txt',
 'FDCA-P3-2.txt',
 'FDCA-P3-3.txt',
 'FDCA-P30-1.txt',
 'FDCA-P30-2.txt',
 'FDCA-P30-3.txt',
 'FDCA-P4-1.txt',
 'FDCA-P4-2.txt',
 'FDCA-P4-3.txt',
 'FDCA-P5-1.txt',
 'FDCA-P5-2.txt',
 'FDCA-P5-3.txt',
 'FDCA-P6-1.txt',
 'FDCA-P6-2.txt',
 'FDCA-P6-3.txt',
 'FDCA-P7-1.txt',
 'FDCA-P7-2.txt',
 'FDCA-P7-3.txt',
 'FDCA-P8-1.txt',
 'FDCA-P8-2.txt

## Now get all the important information out of the metadata files

In [6]:
meta=pf.stitch_metadata_in_loop_witec(path=meta_path, 
Allfiles=diad_meta, prefix=False,
trupower=True)
meta['filename'].iloc[0]

100%|██████████| 58/58 [00:00<00:00, 594.78it/s]

Done





'FDCA-P1-1.txt'

## Then get a simple file name you can stitch with the spectra

In [7]:
file_m=pf.extracting_filenames_generic(names=meta['filename'],
    prefix=False, str_prefix=" ",
   file_type='.txt')
# Checks they are all unique
file_m[0]

good job, no duplicate file names


'FDCA-P1-1'

## Now get filename from spectra into same form as metadata

In [8]:
# Remove these to get the pure file name
file_s=pf.extracting_filenames_generic(
    prefix=False, str_prefix=" ",
    names=df2['filename'].reset_index(drop=True),
   file_type='.txt')

# Remove the cosmic rays, as doesnt have it in the metatdata
file_s = np.char.replace(file_s.astype(str), "_CRR_DiadFit", "")

good job, no duplicate file names


In [9]:
file_s

array(['FDCA-P28-1', 'FDCA-P29-1', 'FDCA-P30-1', 'FDCA-P30-2',
       'FDCA-P30-3', 'FDCA-P1-1', 'FDCA-P1-2', 'FDCA-P1-3', 'FDCA-P10-1',
       'FDCA-P11-1', 'FDCA-P12-1', 'FDCA-P13-1', 'FDCA-P14-1',
       'FDCA-P14-2', 'FDCA-P14-3', 'FDCA-P15-1', 'FDCA-P16-1',
       'FDCA-P17-1', 'FDCA-P18-1', 'FDCA-P19-1', 'FDCA-P19-2',
       'FDCA-P19-3', 'FDCA-P2-1', 'FDCA-P2-2', 'FDCA-P2-3', 'FDCA-P20-1',
       'FDCA-P21-1', 'FDCA-P22-1', 'FDCA-P23-1', 'FDCA-P23-2',
       'FDCA-P23-3', 'FDCA-P24-1', 'FDCA-P25-1', 'FDCA-P26-1',
       'FDCA-P27-1', 'FDCA-P27-2', 'FDCA-P27-3', 'FDCA-P3-1', 'FDCA-P3-2',
       'FDCA-P3-3', 'FDCA-P4-1', 'FDCA-P4-2', 'FDCA-P4-3', 'FDCA-P5-1',
       'FDCA-P5-2', 'FDCA-P5-3', 'FDCA-P6-1', 'FDCA-P6-2', 'FDCA-P6-3',
       'FDCA-P7-1', 'FDCA-P7-2', 'FDCA-P7-3', 'FDCA-P8-1', 'FDCA-P8-2',
       'FDCA-P8-3', 'FDCA-P9-1', 'FDCA-P9-2', 'FDCA-P9-3'], dtype='<U10')

In [10]:
# Combining them together
meta['name_for_matching']=file_m
df2['name_for_matching']=file_s
df2['Name_for_Secondary_Phases']=file_s
df_combo=df2.merge(meta, on='name_for_matching')

In [11]:
Ne_corr=pf.calculate_Ne_corr_std_err_values(pickle_str='polyfit_data.pkl', 
    new_x=df_combo['sec since midnight'], CI=0.67)
Ne_corr.head()


1


Unnamed: 0,time,preferred_values,lower_values,upper_values
0,60627.0,0.997538,0.997533,0.997544
1,61089.0,0.997538,0.997532,0.997544
2,61885.0,0.997537,0.997531,0.997543
3,61961.0,0.997537,0.997531,0.997543
4,62046.0,0.997537,0.997531,0.997543


In [12]:

split_err=pf.propagate_errors_for_splitting(Ne_corr, df_combo)
df_combo_out=df_combo.copy()
df_combo_out.insert(1, 'Corrected_Splitting', df_combo['Splitting']*Ne_corr['preferred_values'])
df_combo_out.insert(2, 'Corr_Split+1σ', df_combo_out['Corrected_Splitting']+split_err)
df_combo_out.insert(3, 'Corr_Split-1σ',df_combo_out['Corrected_Splitting']-split_err)
df_combo_out.insert(4, 'Corr_Split_1σ_val', split_err)

nm=os.path.basename(DayFolder)
df_combo_out.to_excel(output_path+'/'+nm+'_FI_fitting.xlsx')

## If you have secondary phases, now is the time to merge those in

In [13]:
if path.exists('Carb_Peak_fits.xlsx'):
    Carb=pd.read_excel('Carb_Peak_fits.xlsx')
else:
    Carb=None
if path.exists('SO2_Peak_fits.xlsx'):
    SO2=pd.read_excel('SO2_Peak_fits.xlsx')
else:
    SO2=None
if SO2 is not None and Carb is not None:
    Sec_Phases=pd.merge(SO2, Carb, on='filename', how='outer')
elif SO2 is not None and Carb is None:
    Sec_Phases=SO2
elif SO2 is None and Carb is not None:
    Sec_Phases=Carb
else:
    Sec_Phases=None
Sec_Phases.head()

AttributeError: 'NoneType' object has no attribute 'head'

In [None]:
# Remove these to get the pure file name
if Sec_Phases is not None:
    file_sec_phase=pf.extracting_filenames_generic(
        prefix=False, str_prefix=" ",
        names=Sec_Phases['filename'].reset_index(drop=True),
       file_type='.txt')

    file_sec_phase

good job, no duplicate file names


In [14]:
file_sec_phase

NameError: name 'file_sec_phase' is not defined

In [15]:
df_combo['name_for_matching']

0     FDCA-P28-1
1     FDCA-P29-1
2     FDCA-P30-1
3     FDCA-P30-2
4     FDCA-P30-3
5      FDCA-P1-1
6      FDCA-P1-2
7      FDCA-P1-3
8     FDCA-P10-1
9     FDCA-P11-1
10    FDCA-P12-1
11    FDCA-P13-1
12    FDCA-P14-1
13    FDCA-P14-2
14    FDCA-P14-3
15    FDCA-P15-1
16    FDCA-P16-1
17    FDCA-P17-1
18    FDCA-P18-1
19    FDCA-P19-1
20    FDCA-P19-2
21    FDCA-P19-3
22     FDCA-P2-1
23     FDCA-P2-2
24     FDCA-P2-3
25    FDCA-P20-1
26    FDCA-P21-1
27    FDCA-P22-1
28    FDCA-P23-1
29    FDCA-P23-2
30    FDCA-P23-3
31    FDCA-P24-1
32    FDCA-P25-1
33    FDCA-P26-1
34    FDCA-P27-1
35    FDCA-P27-2
36    FDCA-P27-3
37     FDCA-P3-1
38     FDCA-P3-2
39     FDCA-P3-3
40     FDCA-P4-1
41     FDCA-P4-2
42     FDCA-P4-3
43     FDCA-P5-1
44     FDCA-P5-2
45     FDCA-P5-3
46     FDCA-P6-1
47     FDCA-P6-2
48     FDCA-P6-3
49     FDCA-P7-1
50     FDCA-P7-2
51     FDCA-P7-3
52     FDCA-P8-1
53     FDCA-P8-2
54     FDCA-P8-3
55     FDCA-P9-1
56     FDCA-P9-2
57     FDCA-P9-3
Name: name_for

In [16]:
df_combo['name_for_matching']=df_combo['Name_for_Secondary_Phases']

if Sec_Phases is not None:
    Sec_Phases['name_for_matching']=file_sec_phase
    df_combo_sec_phase=df_combo_out.merge(Sec_Phases, 
    on='name_for_matching', how='outer')
    
else:
    df_combo_sec_phase=df_combo_out

### Add some more useful columns

In [17]:
if 'Peak_Area_Carb' in df_combo_sec_phase.columns:
    df_combo_sec_phase['Carb_Diad_Ratio']=(df_combo_sec_phase['Peak_Area_Carb']/(df_combo_sec_phase['Diad1_Voigt_Area']
                      +df_combo_sec_phase['Diad2_Voigt_Area']))
if 'Peak_Area_SO2' in df_combo_sec_phase.columns:
    df_combo_sec_phase['SO2_Diad_Ratio']=(df_combo_sec_phase['Peak_Area_SO2']/(df_combo_sec_phase['Diad1_Voigt_Area']
                      +df_combo_sec_phase['Diad2_Voigt_Area']))
    
df_combo_sec_phase.head()

Unnamed: 0.1,Unnamed: 0,Corrected_Splitting,Corr_Split+1σ,Corr_Split-1σ,Corr_Split_1σ_val,filename_x,Splitting,Split_err_abs,Split_err_quadrature,Diad1_Combofit_Cent,...,Month,Day,power (mW),Int_time (s),accumulations,Mag (X),duration,24hr_time,sec since midnight,Spectral Center
0,0,104.265563,104.269599,104.261526,0.004036,FDCA-P28-1,104.522847,0.004,0.004,1283.107201,...,June,7.0,34.006,10.0,5.0,5.0,"['0h', '0m', '52s']",4:50:27 PM\n,60627.0,1325.004
1,0,104.174307,104.179073,104.169542,0.004766,FDCA-P29-1,104.431439,0.004738,0.004738,1283.26292,...,June,7.0,34.046,10.0,5.0,5.0,"['0h', '0m', '51s']",4:58:09 PM\n,61089.0,1325.004
2,0,104.068349,104.079617,104.057081,0.011268,FDCA-P30-1,104.325317,0.015497,0.011279,1283.553382,...,June,7.0,34.031,10.0,5.0,5.0,"['0h', '0m', '51s']",5:11:25 PM\n,61885.0,1325.004
3,0,104.033129,104.041224,104.025033,0.008095,FDCA-P30-2,104.290017,0.01117,0.008092,1283.590141,...,June,7.0,34.019,10.0,5.0,5.0,"['0h', '0m', '51s']",5:12:41 PM\n,61961.0,1325.004
4,0,104.046738,104.054628,104.038849,0.007889,FDCA-P30-3,104.303668,0.011112,0.007885,1283.57794,...,June,7.0,34.028,10.0,5.0,5.0,"['0h', '0m', '51s']",5:14:06 PM\n,62046.0,1325.004


In [18]:
df_combo_sec_phase.to_excel(output_path+'/'+nm+'_FI_fitting_w_sec_phases.xlsx')


In [19]:
from winotify import Notification, audio

toast= Notification(app_id="VSCode",title="Notebook completed",
                    msg="Step4_Stitch_Outputs_Together is done!",
                    duration="short")
toast.set_audio(audio.Mail,loop=False)
toast.show()