In [1]:
from analysis import DumpAnalyzer
from os import path, walk
from typing import Tuple
import matplotlib.pyplot as plt
import numpy as np
import multiprocessing as mp

In [2]:
txt_files = []
for root, dirs, files in walk('../analysis_many_dims'):
    for file in files:
        if file.endswith('.txt'):
            txt_files.append(path.join(root, file))
das = []

def open_file(file: str) -> DumpAnalyzer:
    """
    Open a file and return a DumpAnalyzer object.
    :param file: File to open.
    :return: DumpAnalyzer object.
    """
    with open(file, 'r') as f:
        return DumpAnalyzer(import_data_file=f)

with mp.Pool() as pool:
    das = pool.map(open_file, txt_files)


_ = """
# walk through dumps_compressed and store all files in a list
dump_files = []
for root, dirs, files in walk('dumps_compressed'):
    for file in files:
        if file.endswith('.xz'):
            dump_files.append(path.join(root, file))
"""

In [3]:
def fit(da: DumpAnalyzer) -> DumpAnalyzer:
    """
    Fit the data in a DumpAnalyzer object.
    :param da: DumpAnalyzer object to fit.
    :return: DumpAnalyzer object with the data fitted.
    """
    da.fit_line(step_size=1, r_threshold=0)
    return da

with mp.Pool() as pool:
    das = pool.map(fit, das)
pass

In [4]:
args = [(i,da) for i, da in enumerate(das)]

def generate_plot(args: Tuple[int, DumpAnalyzer]) -> None:
    """
    Generate a plot for a DumpAnalyzer object.
    :param args: Tuple containing the index of the DumpAnalyzer object and the DumpAnalyzer object itself.
    :return: None
    """
    i, da = args
    da.standard_deviation = None
    da.generate_plot(output_filename=f'../plots/plot_{i}.png', title=txt_files[i][txt_files[i].rfind('/')+1:])

with mp.Pool() as pool:
    pool.map(generate_plot, args)
#da.generate_plot(output_filename=None, title=txt_files[i][txt_files[i].rfind('/')+1:])

In [5]:
import csv
def get_params_from_csv(csv_file: str, file_encoding: str) -> Tuple[Tuple[float, float], Tuple[float, float]]:
    """
    Get the parameters from a CSV file.
    :param csv_file: CSV file to read from.
    :param file_encoding: Encoding of the CSV file.
    :return: Two Tuples containing the initial mean and sd and the saddle mean and sd.
    """
    with open(csv_file, 'r', encoding=file_encoding) as file:
        reader = csv.reader(file)
        rows = [row for row in reader]
    init_energies = []
    saddle_energies = []

    for i in range(len(rows)):
        if i == 0:
            continue
        init_energies.append(float(rows[i][2]))
        saddle_energies.append(float(rows[i][3]))
    init_energies = np.array(init_energies)
    saddle_energies = np.array(saddle_energies)

    def reject_outliers(data, m=100.):
        d = np.abs(data - np.median(data))
        mdev = np.median(d)
        s = d / mdev if mdev else np.zeros(len(d))
        return data[s < m]

    init_energies = reject_outliers(init_energies)
    saddle_energies = reject_outliers(saddle_energies)

    init_params = (np.mean(init_energies), np.std(init_energies))
    saddle_params = (np.mean(saddle_energies), np.std(saddle_energies))
    return init_params, saddle_params

In [6]:
diffusivities = []
temperatures = []
init_params, saddle_params = get_params_from_csv('../csv/1.csv', 'utf-8')
barrier_mean = saddle_params[0] - init_params[0]
barrier_sd = np.sqrt(saddle_params[1]**2 + init_params[1]**2)
for i, da_obj in enumerate(das):
    # Use the line of best fit to get the slope (diffusion coefficient)
    slope, _ = da_obj.line_of_best_fit
    
    temp = txt_files[i][txt_files[i].rfind('/')+1:]
    temp = temp[temp.find('_')+1:-4]
    temp = float(temp)
    
    # Calculate the diffusivity using the slope of the linear portion
    diffusivity = slope / 6
    
    # Append the diffusivity and temperature to their respective lists
    diffusivities.append(diffusivity)
    temperatures.append(temp)

# Get rid of any diffusivities that are 0 (we also have to get rid of the corresponding temperature)
# Print out the temperature and diffusivity for each dump
new_diffusivities = []
new_temperatures = []
for i in range(len(diffusivities)):
    if diffusivities[i] <=0:
        print(f'Temperature: {temperatures[i]} K, Diffusivity: {diffusivities[i]}')
    else:
        new_diffusivities.append(diffusivities[i])
        new_temperatures.append(temperatures[i])

diffusivities = new_diffusivities
temperatures = new_temperatures
# Convert lists to NumPy arrays
diffusivities = np.array(diffusivities)
diffusivities = np.log(diffusivities)
temperatures = np.array(temperatures)
temperatures *= 8.617333262145e-5 # Convert from Kelvin to eV
temperatures = 1 / temperatures # Take the reciprocal of the temperatures


# Create a scatter plot of diffusivity vs. temperature
plt.figure(figsize=(8, 6))
plt.scatter(temperatures, diffusivities, marker='o', color='b')
# plot the line of best fit and print the equation
# liner regression
m, b = np.polyfit(temperatures, diffusivities, 1)
plt.plot(temperatures, m*temperatures + b, color='r')
print(f'y = {m}x + {b}')
print(f'barrier mean: {barrier_mean}, barrier sd: {barrier_sd}')
plt.xlabel('1/T (eV^-1)')
plt.ylabel('ln(D)')
plt.title('Diffusivity vs. Temperature')
plt.grid(True)
plt.show()

In [None]:
pass