In [14]:
import os
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from msc_project.utils.fit_NLS import *

plt.style.use('ggplot')

In [2]:
# DATA_PATH = '/scratch/msc24h18/nls_fit/data/Data_for_NLS/10000 nm ID2946.csv'
# PMAX_PATH = '/scratch/msc24h18/nls_fit/data/Data_for_NLS/maxpols/10000 nm ID2946.csv'

# data = load_data(DATA_PATH)
# pmax = np.loadtxt(PMAX_PATH, delimiter=',', skiprows=1)
# pmax = {float(v): min(p*1.05, 1) for v, p in pmax}

# fig, axs = plt.subplots(1, 2, figsize=(20, 10))
# fig.suptitle(DATA_PATH)
# opt_df, popt_tl = plot_all_iterative(data, axs, pmax=pmax, p0_tmin=None, p0_tmax=None, fit_type="lorentzian")
# axs[0].set(xlabel = 'Pulse duration', ylabel = 'Partial polarization', xscale='log', ylim=(0, 1))
# axs[0].legend()
# axs[1].clear()

# voltages = opt_df['voltage'].values
# log_taus = opt_df['p2'].values

# axs[1].scatter(opt_df['voltage'], opt_df['p2'])
# popt, pcov = curve_fit(lambda V, t0, V0, n: np.log(f_tau(V, t0, V0, n)), voltages, log_taus)
# v = np.linspace(voltages.min(), voltages.max(), 1000)
# t = f_tau(v, *popt)
# axs[1].plot(v, np.log(t), label=f'$\\log(\\tau_{{Lorentz}})$ (fit): $\\tau_0$={popt[0]:.2e}, $V_0$={popt[1]:.2f}, $n$={popt[2]:.2f}')

# axs[1].legend()
# plt.tight_layout()
# plt.show()

In [12]:
# TODO: Multithreading

def fit_voltages(data_path, n_vals, n_tol, output_file):
    csv_files = [f for f in os.listdir(data_path) if f.endswith('.csv')]
    fit_data = {"size": [], "voltage": [], "n": [], "A": [], "omega": [], "log_tlorentz": []}

    for f in csv_files:
        size = f.split(' ')[0]
        data = load_data(os.path.join(data_path, f))

        for n in n_vals:
            p0 = [0.2, 0.1, -7, n]
            bounds = ([-np.inf, -np.inf, -np.inf, n-n_tol], [np.inf, np.inf, np.inf, n+n_tol])
            for col in data.columns[1:]:
                try:
                    popt = fit_polarization(data['Pulse Width'], data[col], type="lorentzian", p0=p0, bounds=bounds)
                except Exception as e:
                    print(f"Couldn't fit {col}V (size {size}): {e}")
                    continue

                fit_data["size"].append(size)
                fit_data["voltage"].append(col)
                fit_data["A"].append(popt[0])
                fit_data["omega"].append(popt[1])
                fit_data["log_tlorentz"].append(popt[2])
                fit_data["n"].append(popt[3])

                print(f"Fit {col}V (size {size}): {popt}")

    fit_df = pd.DataFrame(fit_data)
    fit_df.to_csv(output_file, index=False)

def fit_tau_lorentz(data_path, n_vals, n_tol, fix_n_tau, exclude_voltages, output_file):
    fit_df = pd.read_csv(data_path)

    tau_fit_data = {"size": [], "n": [], "t0": [], "V0": [], "n_tau": []}
    unique_sizes = fit_df['size'].unique()
    for size in unique_sizes:
        size_df = fit_df[fit_df['size'] == size]
        print(f"Processing size: {size}")

        for n in n_vals:
            size_n_df = size_df[np.isclose(size_df['n'], n, atol=n_tol*1.1)]
            if not size_n_df.empty:
                print(f"  n = {n}: {len(size_n_df)} entries")
                p0 = [1e-5, 5, n]
                bounds = ([-np.inf, 0, 0], [np.inf, np.inf, 4])

                x = np.array(size_n_df['voltage'])
                y = np.array(size_n_df['log_tlorentz'])
                if exclude_voltages is not None:
                    mask = np.isin(x, exclude_voltages, invert=True)
                    x = x[mask]
                    y = y[mask]

                if fix_n_tau:
                    bounds = ([-np.inf, 0, n-n_tol], [np.inf, np.inf, n+n_tol])

                try:
                    popt = fit_tau(x, y, p0=p0, bounds=bounds)
                except Exception as e:
                    print(f"  Couldn't fit {size} (n={n}): {e}")
                    continue

                tau_fit_data["size"].append(size)
                tau_fit_data["n"].append(n)
                tau_fit_data["t0"].append(popt[0])
                tau_fit_data["V0"].append(popt[1])
                tau_fit_data["n_tau"].append(popt[2])

                print(f"  Fit {size} (n={n}): {popt}")

    tau_fit_df = pd.DataFrame(tau_fit_data)
    tau_fit_df.to_csv(output_file, index=False)

In [13]:
def plot_size_n(data_path, voltage_fit_path, tau_fit_path, size, n, n_tol, savefig=None,
                tau_fit_fixn_path=None, tau_fit_exclude_path=None, tau_fit_fixn_exclude_path=None):
    # read data
    csv_files = [f for f in os.listdir(data_path) if f.endswith('.csv')]
    size_csv_file = None
    for csv_file in csv_files:
        if f"{size} nm" in csv_file:
            size_csv_file = csv_file
            break
    if size_csv_file is None:
        raise ValueError(f"Couldn't find data for size {size}")
    data = load_data(os.path.join(data_path, size_csv_file))

    fit_df = pd.read_csv(voltage_fit_path)
    tau_fit_df = pd.read_csv(tau_fit_path)
    tau_fit_fixn_df = None if tau_fit_fixn_path is None else pd.read_csv(tau_fit_fixn_path)
    tau_fit_exclude_df = None if tau_fit_exclude_path is None else pd.read_csv(tau_fit_exclude_path)
    tau_fit_fixn_exclude_df = None if tau_fit_fixn_exclude_path is None else pd.read_csv(tau_fit_fixn_exclude_path)

    fig, axs = plt.subplots(1, 2, figsize=(20, 10))

    # plot polarization vs. pulse duration with fit on the left
    size_df = fit_df[fit_df['size'] == size]
    size_n_df = size_df[np.isclose(size_df['n'], n, atol=n_tol*1.1)]

    t = np.logspace(np.log10(data['Pulse Width'].min()), np.log10(data['Pulse Width'].max()), 1000)

    if not size_n_df.empty:
        print(size_n_df)
        norm = clr.Normalize()
        cmap=plt.cm.plasma     
        colors = cmap(norm(data.columns[1:].astype(float)))
        cbar = plt.colorbar(cm.ScalarMappable(cmap=cmap, norm=norm), ax=axs[0])
        cbar.set_label("V")
        
        for col, c in zip(data.columns[1:], colors):           
            axs[0].scatter(data["Pulse Width"], data[col], label=f'{col}V', color=c)
            # plot fit
            popt = size_n_df[size_n_df['voltage'] == float(col)].values
            if len(popt) == 0:
                continue
            popt = popt[0][2:]
            n, A, omega, log_tlorentz = popt
            p = polarization_lorentzian(t, A, omega, log_tlorentz, n)
            r2 = r2_score(data[col], polarization_lorentzian(data['Pulse Width'], A, omega, log_tlorentz, n))
            label = f'{col}V: $A$={A:.2f}, $\\omega$={omega:.2f}, $\\log(\\tau_{{Lorentz}})$={log_tlorentz:.2f}, $n$={n:.2f}, ($R^2$={r2:.3f})'
            axs[0].plot(t, p, color=c)

        # plot tau vs. voltage with fit on the right
        axs[1].scatter(size_n_df['voltage'], size_n_df['log_tlorentz'], label="$\tau_{Lorentz}$ (exp.)")

        tau_dfs = [tau_fit_df, tau_fit_fixn_df, tau_fit_exclude_df, tau_fit_fixn_exclude_df]
        notes = ["", ", fixed n", ", exclude low/high voltages", ", fixed n & exclude low/high voltages"]
        for df,note in zip(tau_dfs, notes):
            if df is None:
                continue
            popt = tau_fit_df[(df['size'] == size) & np.isclose(df['n'], n, atol=n_tol*1.1)].values
            if popt is not None:
                popt = popt[0][2:]
                v = np.linspace(size_n_df['voltage'].min(), size_n_df['voltage'].max(), 1000)
                t = f_tau(v, *popt)
                r2 = r2_score(size_n_df['log_tlorentz'], np.log(f_tau(size_n_df['voltage'], *popt)))
                label=f'$\\log(\\tau_{{Lorentz}})$ (fit{note}): $\\tau_0$={popt[0]:.2e}, $V_0$={popt[1]:.2f}, $n$={popt[2]:.2f}, ($R^2$={r2:.3f})'
                axs[1].plot(v, t, label=label)
            

    axs[0].set(xlabel = 'Pulse duration', ylabel = 'Partial polarization', xscale='log', ylim=(0, 1))
    axs[0].legend()
    axs[1].set(xlabel = 'Voltage', ylabel = '$\log(\\tau_{{lorentz}})$')
    axs[1].legend()

    if savefig is not None:
        plt.savefig(savefig)
    else:
        plt.show()

In [None]:
#DATA_PATH = '/Users/pauluv/Documents/Code/msc_project/data/Data_for_NLS'
DATA_PATH = '/scratch/msc24h18/nls_fit/data/Data_for_NLS'
RESULTS_DIR = '/scratch/msc24h18/nls_fit/results/nls_model'

results_subdir = os.path.join(RESULTS_DIR, datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))
voltage_fits_path = os.path.join(results_subdir, 'voltage_fits.csv')
tau_fits_path = os.path.join(results_subdir, 'tau_fits.csv')

n_vals = [1.5, 1.75, 2, 2.25, 2.5]
n_tol = 0.01

fit_voltages(DATA_PATH, n_vals, n_tol, voltage_fits_path)

Fit 1.00V (size 1000): [ 5.16987883e-26  1.00000002e-01 -7.00000000e+00  1.50000000e+00]
Couldn't fit 1.25V (size 1000): Optimal parameters not found: The maximum number of function evaluations is exceeded.
Fit 1.50V (size 1000): [1.57863573 1.33149189 4.06322974 1.50999998]
Fit 1.75V (size 1000): [ 0.2761028   0.29316515 -4.91769346  1.51      ]
Fit 2.00V (size 1000): [ 0.40317672  0.34146251 -5.75782925  1.49      ]
Fit 2.25V (size 1000): [ 0.47813589  0.36994321 -6.51628807  1.49      ]
Fit 2.50V (size 1000): [ 0.48310972 -0.25524328 -7.33083966  1.51      ]
Fit 2.75V (size 1000): [ 0.49876797  0.27176513 -8.00257416  1.51      ]
Fit 3.00V (size 1000): [ 0.50842856  0.25341814 -8.31320438  1.51      ]
Fit 3.25V (size 1000): [ 0.49969518 -0.17170457 -8.57448327  1.51      ]
Fit 3.50V (size 1000): [ 0.50925036 -0.16451615 -8.79123214  1.51      ]
Fit 3.75V (size 1000): [ 0.50569122 -0.14885115 -8.91441619  1.51      ]
Fit 4.00V (size 1000): [ 0.51651754  0.20936606 -9.14136094  1.51  

  return (1 - np.exp(-(t/t0)**n)) * distribution_function_lorentzian(z0, A, omega, zl)
  return (1 - np.exp(-(t/t0)**n)) * distribution_function_lorentzian(z0, A, omega, zl)


Couldn't fit 5.00V (size 1000): (34, 'Numerical result out of range')
Fit 1.00V (size 1000): [-1.03397577e-25  1.00000001e-01 -7.00000000e+00  1.75000000e+00]
Fit 1.25V (size 1000): [  0.02379248   0.1355995  -11.08882568   1.74232795]
Fit 1.50V (size 1000): [1.62040425 1.35096384 4.24539063 1.75999999]
Fit 1.75V (size 1000): [ 0.26494513  0.33234399 -4.96009812  1.76      ]
Fit 2.00V (size 1000): [ 0.40361201  0.3695896  -5.77818392  1.74      ]
Fit 2.25V (size 1000): [ 0.48044776  0.39792363 -6.53192484  1.74      ]
Fit 2.50V (size 1000): [ 0.48527464 -0.28346764 -7.34482545  1.74      ]
Fit 2.75V (size 1000): [ 0.50019884  0.2942369  -8.01783065  1.76      ]
Fit 3.00V (size 1000): [ 0.51011291  0.28164513 -8.32629219  1.76      ]
Fit 3.25V (size 1000): [ 0.50065301 -0.19523443 -8.58801659  1.76      ]
Fit 3.50V (size 1000): [ 0.51014233  0.18615335 -8.80127753  1.76      ]
Fit 3.75V (size 1000): [ 0.50592877 -0.15834627 -8.91717326  1.76      ]
Fit 4.00V (size 1000): [ 0.51655763  0

  return (1 - np.exp(-(t/t0)**n)) * distribution_function_lorentzian(z0, A, omega, zl)
  return (1 - np.exp(-(t/t0)**n)) * distribution_function_lorentzian(z0, A, omega, zl)


Couldn't fit 4.00V (size 600): (34, 'Numerical result out of range')
Fit 4.25V (size 600): [  0.49385661   3.12210509 -31.07699373   1.49252858]


  return (1 - np.exp(-(t/t0)**n)) * distribution_function_lorentzian(z0, A, omega, zl)


Couldn't fit 4.50V (size 600): (34, 'Numerical result out of range')
Couldn't fit 4.75V (size 600): (34, 'Numerical result out of range')
Fit 5.00V (size 600): [  0.47590366   0.82333976 -11.16513336   1.49      ]
Couldn't fit 1.00V (size 600): Optimal parameters not found: The maximum number of function evaluations is exceeded.
Fit 1.25V (size 600): [  0.05924425  -0.37982066 -21.27575382   1.74553966]
Fit 1.50V (size 600): [ 0.05089742  0.67760997 -5.36575848  1.76      ]
Fit 1.75V (size 600): [ 0.62938086  0.29102628 -4.50966389  1.75999994]
Fit 2.00V (size 600): [ 0.96215616  0.80457266 -4.42318799  1.76      ]
Fit 2.25V (size 600): [ 0.529414    0.6988383  -6.57607074  1.76      ]
Fit 2.50V (size 600): [ 0.45134403  0.34676334 -7.42735874  1.74      ]
Fit 2.75V (size 600): [ 0.48780459  0.40884563 -8.0366229   1.74      ]
Fit 3.00V (size 600): [ 0.45011774  0.20758548 -8.63854907  1.76      ]
Fit 3.25V (size 600): [ 0.4089943   0.23694205 -9.03885138  1.7563039 ]
Couldn't fit 3.50

  return (1 - np.exp(-(t/t0)**n)) * distribution_function_lorentzian(z0, A, omega, zl)


Fit 1.25V (size 30000): [ 0.01173972  0.44643375 -4.88917192  2.50999711]
Fit 1.50V (size 30000): [ 9.56604741e-01  7.64947507e-04 -4.09237457e+00  2.51000000e+00]
Fit 1.75V (size 30000): [ 0.19502212  0.25543793 -5.09724705  2.51      ]
Fit 2.00V (size 30000): [ 0.34889302  0.12610429 -5.52871184  2.49      ]
Fit 2.25V (size 30000): [ 0.46068008 -0.13190433 -5.83839188  2.49      ]
Fit 2.50V (size 30000): [ 0.5112226   0.16673681 -6.04119173  2.49      ]
Fit 2.75V (size 30000): [ 0.51627846 -0.12724634 -6.2575003   2.49      ]
Fit 3.00V (size 30000): [ 0.52644436  0.13094548 -6.41080031  2.49      ]
Fit 3.25V (size 30000): [ 0.53099266 -0.14476713 -6.54054018  2.49      ]
Fit 3.50V (size 30000): [ 0.53610174 -0.18003051 -6.66891176  2.49      ]
Fit 3.75V (size 30000): [ 0.53868069  0.20394641 -6.7700528   2.49      ]
Fit 4.00V (size 30000): [ 0.54118032  0.22857544 -6.87201458  2.49      ]
Fit 4.25V (size 30000): [ 0.55981561  0.27828364 -6.969008    2.49      ]
Fit 4.50V (size 30000)

  return (1 - np.exp(-(t/t0)**n)) * distribution_function_lorentzian(z0, A, omega, zl)


Couldn't fit 4.00V (size 500): (34, 'Numerical result out of range')
Fit 4.25V (size 500): [ 0.50751039  0.66978474 -9.82114206  1.49      ]
Couldn't fit 4.50V (size 500): Optimal parameters not found: The maximum number of function evaluations is exceeded.
Fit 4.75V (size 500): [ 0.50670015  0.53351629 -9.75490789  1.49000008]
Fit 5.00V (size 500): [  0.49739318   0.60064356 -10.60029271   1.50999998]
Fit 1.00V (size 500): [ 0.16529396  0.86052285 -5.15558     1.75999889]
Couldn't fit 1.25V (size 500): Optimal parameters not found: The maximum number of function evaluations is exceeded.
Fit 1.50V (size 500): [ 0.04250929 -0.14631937 -7.43149942  1.74      ]
Fit 1.75V (size 500): [ 0.24902187  0.7204023  -4.37370511  1.75998282]
Fit 2.00V (size 500): [ 0.30327158  0.27572671 -5.68981659  1.74      ]
Fit 2.25V (size 500): [ 0.38648454  0.52169588 -6.56071821  1.76      ]
Fit 2.50V (size 500): [ 0.46132264  0.8654329  -7.17975841  1.76      ]
Fit 2.75V (size 500): [ 0.45273903 -0.4982672

In [None]:
tau_fits_path_fixn = os.path.join(results_subdir, 'tau_fits_fixn.csv')
tau_fits_path_exclude3 = os.path.join(results_subdir, 'tau_fits_exclude3.csv')
tau_fits_path_fixn_exclude3 = os.path.join(results_subdir, 'tau_fits_fixn_exclude3.csv')

fit_tau_lorentz(voltage_fits_path, n_vals, n_tol, fix_n_tau=False, exclude_voltages=None, output_file=tau_fits_path)
fit_tau_lorentz(voltage_fits_path, n_vals, n_tol, fix_n_tau=True, exclude_voltages=None, output_file=tau_fits_path_fixn)
fit_tau_lorentz(voltage_fits_path, n_vals, n_tol, fix_n_tau=False, exclude_voltages=[1, 1.25, 1.5, 4.5, 4.75, 5], output_file=tau_fits_path_exclude3)
fit_tau_lorentz(voltage_fits_path, n_vals, n_tol, fix_n_tau=True, exclude_voltages=[1, 1.25, 1.5, 4.5, 4.75, 5], output_file=tau_fits_path_fixn_exclude3)


# for each size and value of n, plot on the right (same plot): fit including all points (free n), fit excluding 3 points on either side (free n), repeat both with fixed n.

