In [1]:
import numpy as np
import pandas as pd
import os
from matplotlib import pyplot as plt
from scipy import optimize
from scipy import integrate
from scipy.signal import find_peaks
from scipy.integrate import simpson
from scipy.integrate import quad
from pathlib import Path
from os import listdir, chdir
from os.path import isfile, join
import regex as re
from lmfit import Model
from lmfit.models import LinearModel, GaussianModel, ExponentialModel, ConstantModel, PowerLawModel, PolynomialModel, LorentzianModel, VoigtModel
import math
import time
import itertools as it

In [2]:
def make_dataframe(sample_name, data_path):
    
    #INPUT - Set the path to the output files from the I vs q script - Use absolute path
    #data_path = r'C:\Users\Elizabeth\APS Spring 2022\Data Processing\Test_data\Sample6b_charge_dbl\Ivsq_text'
    #os.chdir(data_path)
    
    # Importing integrated XRD pattern from APS synchrotron expierment in Fall 2021 at beamline 5-BM-C
    file = open(os.path.join(data_path, sample_name))
    data = pd.read_csv(file, skiprows = 13, header = None, delim_whitespace=True)
    df = pd.DataFrame(data)
    df.columns = ['q','I']
        
    return df

In [3]:
def get_xy_motor(sample_name, data_path):

    try:
        # Find the x_motor position in the file title using Regex
        start_x = re.search('_x_', sample_name).end()
        end_x = re.search('mm_ss_stg2_y_', sample_name).start()
        x_motor = sample_name[start_x:end_x].replace(',', '.')
        x_motor = float(x_motor)

        # Find the y_motor position in the file title using Regex
        start_y = re.search('_y_', sample_name).end()
        end_y = re.search('mm_primary', sample_name).start()
        y_motor = sample_name[start_y:end_y].replace(',', '.')
        y_motor = float(y_motor)
    
    except AttributeError:
        print('oh shit bra, the name changed! (function could not find x and y position in file name)')
        x_motor = input('Whats the x value?')
        x_motor = float(x_motor)
        
        y_motor = input('Whats the y value?')
        y_motor = float(y_motor)
        print("Groovie.")
    
    return x_motor, y_motor


In [4]:
def get_points(df,q_min,q_max):
    
    ''' This function creates a condensed dataframe that isolates the deired peak
    Inputs: data set in data frame (df), lower q bound for peak(q_min), upper q bound for peak(q_max)
    Outputs: shortened dataframe (df_cut)'''
    
    return df[(df['q'] >= q_min) & (df['q'] <= q_max)]


In [5]:
def make_model(q_max, q_min, model_centers, sig, amp):
    background = LinearModel(prefix=('b' + '_'))  
    pars = background.make_params()
    
    model = background
    
    # initial guesses     
    slope1 = 0 
    int1 = 12
    
    # For linear background
    pars = background.make_params()
    pars['b' + '_slope'].set(slope1)
    pars['b' + '_intercept'].set(int1)
    
    for peak, center in enumerate(model_centers):
        # create prefex for each peak
        pref = 'g'+str(peak)+'_'
        peak = GaussianModel(prefix=pref)
        # set the parimiters for each peak
        pars.update(peak.make_params())
        pars[pref+'center'].set(value=center, min=q_min, max=q_max)
        pars[pref+'sigma'].set(value=sig, max = 0.05)
        pars[pref+'amplitude'].set(amp, min = 0)
        
        model = model + peak

    return (model, pars)
        

In [6]:
def get_model_list(df_cut, q_max, q_min, num_of_centers, num_peaks, sig, amp, peak_name, Li_q_max, Li_q_min):
    # set some inital parimiters if its lithium we want to narrow the range it will guess for peaks
    if peak_name == 'Li':
        temp_max = q_max
        temp_min = q_min
        q_max = Li_q_max
        q_min = Li_q_min
    # generate a list of centers to try
    increment = (q_max - q_min) / num_of_centers
    n = 0
    center_list = []
    
    while n <= num_of_centers:
        center_list.append(n*increment + q_min)
        n += 1
    q_range = q_max - q_min
    
    if peak_name != 'Li':
        center_list[0] = center_list[0] + .1 * q_range
        # -1 refers to the last element in the list
        center_list[-1] = center_list[-1] - .1 * q_range
    
    # creat unique combination of peak positions returns a list of tuples. 
    # Tuples are samp length of num_peaks
    center_list = list(it.combinations(center_list, num_peaks))
    
    # if its lithium we now need to reset the q max/mmin so the model will look at the whole range
    if peak_name == 'Li':
        q_max = temp_max
        q_min = temp_min
    
    # make a list of models for each center
    model_list = []
    for center in center_list:
        model_list.append(make_model(q_max, q_min, center, sig, amp))
    
    return(model_list)  
       

In [7]:
def run_model(df_cut, model, pars):
    model_result = model.fit(df_cut['I'], pars, x = df_cut['q'], nan_policy = 'omit')
    return(model_result)

In [8]:
def user_model(best_model, df_cut, sig, amp):
    good = 'no'
    print("\n\n fit not found")
    print('The chisqr is ', best_model.chisqr)
    best_model.plot()
    plt.pause(1)
    
    good = input('if its good enter yes\n')
    
    while good != 'yes':  
        try:
            centers =  input('Enter peak centers separated by comma \n')
            centers = centers.split(',')
            for i in range(len(centers)):
                # convert each item to int type
                centers[i] = float(centers[i])
            print(centers)
            # make_model(q_max, q_min, model_centers, sig, amp):
            model = make_model(q_max, q_min, centers, sig, amp)
            best_model = run_model(df_cut, model[0], model[1])
            print("chisqr is ", best_model.chisqr)
            best_model.plot()
            plt.pause(1)
            good = input('enter yes to continue. To try again enter no.\n')
        except:
            print('operation filed with the following messege')
            print('Note for Ben. Add function so this prints error message. Also Hope your science is going well!')
    return best_model

In [9]:
def fit_data(df_cut, q_max, q_min, num_of_centers, sig, amp, chisqu_fit_value, peak_name, Li_q_max, Li_q_min):
    chisqr = 1000000000
    num_peaks = 1
    more_peaks = False
    #assign the max number of peaks allowed (1 plus that number so if there can be 3 peaks put 4 here)
    if peak_name == 'NMC':
        max_peak_allowed = 2
    else:
        max_peak_allowed = 4
    while chisqr >= chisqu_fit_value:

        if more_peaks is True and num_peaks >= max_peak_allowed:
            #print("TURN THE USER FIT BACK ON BEN")
            best_model = user_model(best_model, df_cut, sig, amp)
            return best_model
        
        if num_peaks >= max_peak_allowed:
            num_peaks = 1
            num_of_centers = num_of_centers*2
            more_peaks = True
            print("THE THING HAPPENED MORE PEAKS")
 
            
        # returns a list of tuples. first value is the model second value is the pars. 
        # looks like this ((model, pars), (model, pars), ........)
        model_list = get_model_list(df_cut, q_max, q_min, num_of_centers, num_peaks, sig, amp, peak_name,
                                    Li_q_max, Li_q_min)
        
        model_result_list = []

        for i in range(len(model_list)):
            model = model_list[i][0]
            pars = model_list[i][1]
            model_result_list.append(run_model(df_cut, model, pars))
            
        results_sorted = sorted(model_result_list, key=lambda model: model.chisqr)
        best_model = results_sorted[0]
        chisqr = best_model.chisqr
        num_peaks += 1
        
    #best_model.plot()
    plt.pause(1)
    return best_model

In [10]:
def get_values(best_model, df_cut):
         
    # a list of tuples with 4 values. the peak data, fwhm, and center.
    # Looks like ((peak_data, fwhm, center, guess), (peak_data, fwhm, center, guess), ........)
    comps_list = []
    comps = best_model.eval_components(x=df_cut['q'])
    for prefex in comps.keys():
        if prefex != 'b_':
            comps_list.append(((comps[str(prefex)]), best_model.params[str(prefex)+'fwhm'].value, best_model.params[str(prefex)+'center'].value, 1.75))
    
    
    integral_list = []
    fwhm_list = []
    peak_center_list = []
    
    for vals in comps_list:
        integral_val = integrate_model(df_cut, vals[0], vals[2], vals[3])
        integral_list.append(integral_val)
        # get_fwhm_center function not needed
       # fwhm_list, peak_center_list = get_fwhm_center(integral_val, vals[1], vals[2], vals[3])
        fwhm_list.append(vals[1])
        peak_center_list.append(vals[2])
        
    return integral_list, fwhm_list, peak_center_list
    

In [11]:
def master_function(read_sample_file, num_of_centers,  data_path, q_min, q_max,  sample_name, sig, amp, chisqu_fit_value, peak_name, Li_q_max, Li_q_min):
    
    # Make a dataframe of the entire XRD pattern
    df = make_dataframe(read_sample_file, data_path)
    # Get xy_motor positions
    x_motor, y_motor = get_xy_motor(read_sample_file, data_path)
    
    # Slice the dataframe to desired q range
    df_cut = get_points(df, q_min, q_max)

    # get the best fit for the data
    best_model = fit_data(df_cut, q_max, q_min, num_of_centers, sig, amp, chisqu_fit_value, peak_name, Li_q_max, Li_q_min)

    if best_model is not None:
        integral_list, fwhm_list, peak_center_list = get_values(best_model, df_cut)
    else:
        return sample_name, x_motor, y_motor
    
    return [sample_name, x_motor, y_motor, integral_list, fwhm_list, peak_center_list, best_model]

In [12]:
def integrate_model(df_cut, Gaussian, center_raw, q_guess):
    
    # Define model
    Model = Gaussian
    
    # Select the data to integrate over
    q_range = df_cut['q'].tolist()
    

    # Caclulate the integral based on the direct data using Simpson's rule
    integral = integrate.simpson(Model, q_range, even='avg')
    return integral

In [25]:
def save_fits(savePath_gen, get_integrals, element, list_of_files, i):
    # find the cordanets of the sample and get rid of the dots file paths don't like that
    coordinates = (str(get_integrals[1]) + '_' + str(get_integrals[2])).replace('.', '-')
    # make it a file path
    savePath = os.path.join(savePath_gen, sample_name, element, coordinates)
    
    # if that foulder dosn't exist make it exist
    if not os.path.exists(savePath):

        os.makedirs(savePath)

    # name the file
    y = str(i)
    file_name = str(list_of_files[i])
    file_name = file_name[:len(file_name) - 5]
    fig_path = os.path.join(savePath, file_name)
    # save the file! that wasn't at all convaluded was it?
    get_integrals[6].plot().savefig(fig_path)
    plt.close()

In [29]:
startTime = time.time()

# Sample info
sample_name = 'Sample9_map_charge'

# numper of centers to try
num_of_centers = 5

# the range you want lmift to use for centers for a lithium peak. 
Li_q_max = 2.52
Li_q_min = 2.535

#Setup dataframe 
df_integrals = pd.DataFrame(columns=['Sample', 'file_name', 'x motor', 'y motor',  'Gaussian1', 'FWHM1', 'Center1',
                                     'Gaussian2', 'FWHM2', 'Center2', 'Gaussian3', 'FWHM3', 'Center3'])
# path to all the tiff files
general_folder = r'C:\Users\Elizabeth Allan-Cole\Desktop\XRD Data Processing\NSLS-II Winter 2023\Planning'
input_folder = os.path.join(general_folder, 'Ben - test cases', sample_name)
output_folder = os.path.join(general_folder,  'intergral_folder',  sample_name)
plot_folder = os.path.join(general_folder, 'Plot Output')

# if that foulder dosn't exist make it exist
if not os.path.exists(output_folder):
     os.makedirs(output_folder)

#data_path = r"C:\Users\Elizabeth Allan-Cole\Desktop\XRD Data Processing\NSLS-II Winter 2023\Planning\Ben - test cases\Sample9_map_discharge"

# path to whateverfile you want to save pictures of the fits in
#plot_folder = 'Plot Output\\'
 #savePath_gen = general_folder + plot_folder + sample_name

#Set isolated peak q range dict: [q_min, q_max, chi squared, sigma, amplitude]
q_range_dict = {'Graphite-LixC6':[1.75, 1.9, 5, 0.05, 5], 'NMC':[1.25, 1.36, 1000, 0.1, 100], 'Li': [2.49, 2.55, 10, 0.05, 1]}

# Graphite/LixC6 only
#q_range_dict = {'Graphite-LixC6':[1.75, 1.9, 5, 0.05, 5]}

# nmc peaks only
#q_range_dict = {'NMC':[1.25, 1.36, 1000, 0.1, 100]}

# Li peaks only
#q_range_dict = {'Li': [2.49, 2.55, 10, 0.05, 1]}

# Make a list of all files names in folder
list_of_files = [files for files in listdir(input_folder) if isfile(join(input_folder, files))]


for element in q_range_dict.keys():
    df_integrals_temp = pd.DataFrame(columns=['Sample', 'file_name', 'x motor', 'y motor',  'Gaussian1', 'FWHM1', 'Center1',
                                     'Gaussian2', 'FWHM2', 'Center2', 'Gaussian3', 'FWHM3', 'Center3'])
    q_min = q_range_dict.get(element)[0]
    q_max = q_range_dict.get(element)[1]
    sig = q_range_dict.get(element)[3]
    amp =q_range_dict.get(element)[4]
    chisqu_fit_value = q_range_dict.get(element)[2]
    print("finding ", element, " peaks! Hold on to your socks!")
    print("qmax is " ,q_min, "q min is ", q_max)

    n = 0
    # loop through the list of files and append df_integrals --> Troubleshoot the peak fitting, getting weird numbers! 
    for i in range(len(list_of_files)): 
        if 'mean_q' in list_of_files[i]:
            
            #Call the master function to get the integral values for the specified peak
            # returns [sample_name, x_motor, y_motor, integral_list, fwhm_list, peak_center_list, best_model]
            get_integrals = master_function(list_of_files[i], num_of_centers, input_folder, q_min, q_max, 
                                            sample_name, sig, amp, chisqu_fit_value, element, Li_q_max, Li_q_min)
            
            
            # save the plots for the best fit if you want
            save_fits(plot_folder, get_integrals, element, list_of_files, i)
            
            
            # this just prints the number of files we've cronked through
            print(n)
            n += 1
            
            # uncomment me to see the fits!!
#             print(get_integrals[6].plot())
#             print('chisqr is ', get_integrals[6].chisqr)
            
            
            # zips the integral_list, fwhm_list, peak_center_list together to make a list of lists
            # ie ((integral_1, fwhm_1, center_1), (integral_2, fwhm_2, center_2))
            vals_list = list(zip(get_integrals[3], get_integrals[4], get_integrals[5]))
            
            #flatten the list to just a list (integral_1, fwhm_1, center_1, integral_2, fwhm_2, center_2)
            vals_list = [item for sublist in vals_list for item in sublist]
            
            
            # add the sample and position info sample_name, x_motor, y_motor
            info_list = [get_integrals[0], get_integrals[1], get_integrals[2]]
            # add the filename 
            info_list.insert(1, list_of_files[i])
            # add then together
            info_list = info_list + vals_list
            #Find the number of nan vales we add to make this list have 12 values so we can slap it in a dataframe
            num_nans = df_integrals_temp.shape[1] - len(info_list)
            
            #Add a bunch of nans
            x = 0
            while x < num_nans:
                info_list.append(np.nan)
                x += 1
                
            # find the last row in the df    
            max_row = df_integrals_temp.shape[0]
            # slap our list of values in the dataframe!
            df_integrals_temp.loc[max_row + 1,] = info_list
            
            
    # after each peak is run save the data frame
    file_name = str(get_integrals[0] + '_' + element + '.csv')
    output_file = os.path.join(output_folder, file_name)
    df_integrals_temp.to_csv(output_file)

    #add data to the master data frame
    if df_integrals.empty:
        df_integrals = df_integrals_temp
    else:
        df_integrals = pd.concat([df_integrals, df_integrals_temp])

# save the master dataframe
file_name = str(get_integrals[0]) + '_all_data.csv'
output_file = os.path.join(output_folder, file_name)
df_integrals.to_csv(output_file)

executionTime = (time.time() - startTime)
print('Execution time in seconds: ' + str(executionTime))# Sample info

finding  Graphite-LixC6  peaks! Hold on to your socks!
qmax is  1.75 q min is  1.9
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
THE THING HAPPENED MORE PEAKS
72
73
74
75
76
77
78
79
80
81
82
THE THING HAPPENED MORE PEAKS
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
finding  NMC  peaks! Hold on to your socks!
qmax is  1.25 q min is  1.36
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
finding  Li  peaks! Hold on to your socks!
qmax is  2.49 q min is  2.55
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
4

In [15]:
print("IM DONE")

IM DONE


In [16]:
print( df_integrals)

      Sample                                          file_name x motor  \
1    Sample9  Sample9_20220701-141343_907cae_sample_x_30,00m...    30.0   
2    Sample9  Sample9_20220701-141343_907cae_sample_x_30,00m...    30.0   
3    Sample9  Sample9_20220701-141343_907cae_sample_x_30,00m...    30.0   
4    Sample9  Sample9_20220701-141343_907cae_sample_x_30,00m...    30.0   
5    Sample9  Sample9_20220701-141343_907cae_sample_x_30,00m...    30.0   
..       ...                                                ...     ...   
96   Sample9  Sample9_20220701-141343_907cae_sample_x_40,00m...    40.0   
97   Sample9  Sample9_20220701-141343_907cae_sample_x_40,00m...    40.0   
98   Sample9  Sample9_20220701-141343_907cae_sample_x_40,00m...    40.0   
99   Sample9  Sample9_20220701-141343_907cae_sample_x_40,00m...    40.0   
100  Sample9  Sample9_20220701-141343_907cae_sample_x_40,00m...    40.0   

    y motor Gaussian1     FWHM1   Center1 Gaussian2     FWHM2   Center2  \
1      61.0  0.026099  0