## Thermal Conductivity Raw Data Fitting
Developed by Henry Nachman

Last Edited: 14 March 2024

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os, json, shutil

# note : most functions needed for running this notebook can be found in tc_utils.
from tc_utils import *
# Defines the matplotlib backend for plots
%matplotlib qt5

plots = True # Set to true to reproduce all plots, note this will likely lengthen the time to run the code

First, we need to find where all our RAW data is

In [2]:
path_to_lib = f"{os.getcwd()}\\lib"
mat_directories = [folder for folder in os.listdir(path_to_lib) if not folder.endswith(".md")]

path_to_RAW = dict()

for mat in mat_directories:
    path_to_mat = f"{path_to_lib}\\{mat}"
    raw_str = f"{path_to_mat}\\RAW"
    config_str = f"{path_to_mat}\\config.yaml"
    other_str = f"{path_to_mat}\\OTHERFITS"
    nist_str = f"{path_to_mat}\\NIST"
    source = []
    if os.path.exists(raw_str): # Finds the raw data if it exists.
        path_to_RAW[mat] = raw_str
        source.append("RAW")
    if os.path.exists(other_str): # Finds other fits
        source.append("other")
    if os.path.exists(nist_str): # Finds NIST fit
        source.append("NIST")

    # if not os.path.exists(config_str): # Check for existing JSON
    yaml_dict = []
    for i in range(len(source)):
        yaml_dict.append({"name":f"{mat}", "parent":"NA", "source":f"{source[i]}"}) # Define JSON dictionary
    yaml_dict = json.dumps(yaml_dict, indent=4)
    with open(config_str, 'w') as file:
        file.write(yaml_dict) # Write to new JSON

In [3]:
# Load the JSON
for mat in mat_directories:
    # path_to_mat = f"{path_to_lib}\\{mat}"
    raw_str = f"{path_to_mat}\\RAW"
    config_str = f"{path_to_mat}\\config.yaml"
    # other_str = f"{path_to_mat}\\OTHERFITS"
    with open(config_str, 'r') as file:
        mat_config = json.load(file)
    parent = mat_config[0]["parent"]

    if parent != "NA":
        print(mat, "has parent:", parent)
        parent_dir = f"{path_to_lib}\\{parent}"
        if not os.path.exists(parent_dir):
            os.mkdir(parent_dir)
            os.mkdir(f"{parent_dir}\\RAW")
        raw_files = get_datafiles(raw_str)
        for file in raw_files:
            # print(file)
            shutil.copy(f"{raw_str}\\{file}", f"{parent_dir}\\RAW\\{file}")

Let's run our fitting code for every material found in the library.

In [4]:
path_to_RAW = dict()
path_to_fits = dict()
path_to_plots = dict()

for mat in mat_directories:
    path_to_mat = f"{path_to_lib}\\{mat}"
    raw_str = f"{path_to_mat}\\RAW"
    fits_str = f"{path_to_mat}\\fits"
    plots_str = f"{path_to_mat}\\plots"
    if os.path.exists(raw_str):
        path_to_RAW[mat] = raw_str
        if not os.path.exists(fits_str):
            os.mkdir(fits_str)
        path_to_fits[mat] = fits_str
        if not os.path.exists(plots_str):
            os.mkdir(plots_str)
        path_to_fits[mat] = fits_str
        path_to_plots[mat] = plots_str

In [5]:
for mat in path_to_RAW.keys(): # ["SS304"]: # 
    perc_diff_avg = np.array([])
    ## First, let's collect the raw data from their csv files
    big_data, data_dict = parse_raw(mat, path_to_RAW[mat], plots=False, weight_const=0.00)
    T, k, koT, weights = [big_data[:,0], big_data[:,1], big_data[:,2], big_data[:,3]]

    maxT, minT = [max(T), min(T)]
    fit_orders = [3,3]
    fit_types = ["k/T", "loglog"]

    if (maxT <= 20):
        print(f"{mat} : Using a low fit - {maxT} is below 20K.")
        low_fit_xs, low_fit = koT_function(T, koT, fit_orders[0], weights)
        hi_fit, hi_fit_xs, erf_loc = [[0], [0], [0]]
        fit_args = dict_combofit(low_fit, low_fit_xs, hi_fit, hi_fit_xs, fit_orders, fit_types, erf_loc)
    elif (minT >= 20):
        print(f"{mat} : Using a hi fit - {minT} is above 20K.")
        hi_fit_xs, hi_fit = logk_function(np.log10(T), np.log10(k), fit_orders[1], weights)
        low_fit, low_fit_xs, erf_loc = [[0], [0], [-1]]
        fit_args = dict_combofit(low_fit, low_fit_xs, hi_fit, hi_fit_xs, fit_orders, fit_types, erf_loc)
    else:
        print(f"{mat} : Using a combined fit - data exists on both sides of 20K.")
        erf_locList = np.linspace(np.sort(T)[0], np.sort(T)[-1], 15) # [30] # 
        for erf_loc in erf_locList:
            dsplit = split_data(big_data, erf_loc)
            lowT, lowT_k, lowT_koT, low_ws, hiT, hiT_k, hiT_koT, hi_ws = dsplit
            # Take a log10 of the high range
            log_hi_T = np.log10(hiT)
            log_hi_k = np.log10(hiT_k)
            
            if (len(lowT)==0):
                low_fit = [0]
                print(f"Only using high fit minT: {min(T)} > 20")
            else:
                low_fit_xs, low_fit = koT_function(lowT, lowT_koT, fit_orders[0], low_ws)
            if (len(hiT)==0):
                hi_fit = [0]
                print(f"Only using low fit {max(T)} < 20")
            else:
                hi_fit_xs, hi_fit = logk_function(log_hi_T, log_hi_k, fit_orders[1], hi_ws)
            fit_args = dict_combofit(low_fit, low_fit_xs, hi_fit, hi_fit_xs, fit_orders, fit_types, erf_loc)
            ## With the fit complete, let's output a formatted dictionary with the fit parameters
            output_array = format_combofit(fit_args)
            ## We want to figure out the best location for the split in data, so we will compute the residual of the combined fit
            low_param, hi_param, erf_param = fit_args["low_fit_param"], fit_args["hi_fit_param"], fit_args["combined_fit_param"][-1]
            kpred = loglog_func(T, low_param, hi_param, erf_param)
            # and append it to the array resVal
            diff = abs(kpred-k)
            perc_diff_arr = 100*diff/kpred
            perc_diff_avg = np.append(perc_diff_avg, np.mean(perc_diff_arr))

        # Now that we have found the residuals of the fits for many different split locations, let's choose the best one.    
        erf_locdict = dict(zip(erf_locList, perc_diff_avg))
        bestRes = min(erf_locdict.values())
        besterf_loc = [key for key in erf_locdict if erf_locdict[key] == bestRes]
        print(f"Low-Hi split centered at : {besterf_loc[0]} ~~ with average percent difference value of: {bestRes:.2f}%")
        
        # We will repeat the above fit with this new 'optimized' split location
        fit_args = dual_tc_fit(big_data, path_to_plots[mat], erf_loc=besterf_loc, fit_orders=(3, 3), plots=False)
    
    output_array = format_combofit(fit_args)

    # Finally, we will output the fit parameters as a csv, and lh5 file - and plot the data.
    create_data_table(output_array, f"{path_to_fits[mat]}\\{mat}.txt")
    create_tc_csv(output_array, f"{path_to_fits[mat]}\\{mat}.csv")
    make_fit_lh5(fit_args, path_to_fits[mat])
    # PLOTTING CODE
    if plots:
        tk_plot(mat,path_to_RAW, data_dict, fit_args, fit_range = [100e-3, np.sort(T)[-1]], points=True, fits="combined", fill=True)

Aluminum : Using a low fit - 1.061 is below 20K.
Clearwater : Using a low fit - 4.842 is below 20K.
DPP : Using a low fit - 4.02 is below 20K.
Fiberglass : Using a low fit - 2.970783932 is below 20K.
Graphlite : Using a low fit - 4.015 is below 20K.
Ketron : Using a low fit - 2.851 is below 20K.
Macor : Using a low fit - 3.21338073 is below 20K.
SS304 : Using a combined fit - data exists on both sides of 20K.
Only using high fit minT: 0.3846 > 20
Only using low fit 1672.0 < 20
Low-Hi split centered at : 119.7857 ~~ with average percent difference value of: 3.95%
Torlon : Using a low fit - 2.977 is below 20K.
VESPEL : Using a low fit - 3.032 is below 20K.
