In [45]:
import os
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.stats import linregress
from z_base import Experiment, Mols

# Plots and load
plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)
plt.rc('font', size=10)
plt.rc('axes', titlesize=16)
plt.rc('axes', labelsize=15)
plt.rc('legend', fontsize=10)
plt.rcParams["figure.figsize"] = (10, 7)

In [3]:
VERBOSE_COLORS = {
    'OK': 'g',
    'OK_inner': 'b',
    'image_sweep_check': 'r',
    'combine_check': 'w',
}


def input_path(path=""):
    while (path == "") or (not os.path.isfile(path)):
        path = input(f"Input data path: ")
    return path


def ask_continue():
    res = None
    while res is None:
        ask = input("Continue [y] and n: ")
        if ask in ["", "y"]:
            res = True
        elif ask in ["n"]:
            res = False
        else:
            print("Incorrect input!")
    return res


def configurate_data(experiment: Experiment) -> Experiment:
    while True:
        exp = experiment.copy()
        time_lim = ()
        while len(time_lim) != 2:
            time_lim = input("Time lim (space as delimiter): ")
            time_lim = [float(i) for i in time_lim.split(" ") if "" != i]
            if len(time_lim) == 1:
                time_lim.append(np.inf)

        y_lim = ()
        while len(y_lim) != 2:
            y_lim = input("Viscosity lim (space as delimiter): ")
            y_lim = [float(i) for i in y_lim.split(" ")]

        exp.d = initial_filter(exp.d, time=time_lim, y=y_lim, x=(12, 42))
        exp.log.append(("initial_filter", {"time": time_lim, "y": y_lim, "x": (12, 42)}))

        temporal_plot(
            exp,
            title="Configurate",
            ylabel="Viscosity [cP]",
            interactive=True,
        )
        if ask_continue():
            break
    return exp


def comparation_plot(
    experiment: Experiment,
    ols_exp: Experiment,
    title,
    xlabel,
    ylabel,
    interactive=False,
    save_folder=None,
):
    fig, ax = plt.subplots()
    colors = experiment.d['Viscosity_verbose'].replace(VERBOSE_COLORS)
    ax.scatter(
        experiment.d['x'],
        experiment.d['y'],
        color=colors,
        marker='.',
        label='Data',
        alpha=0.6,
    )

    E = experiment.info['E']
    D0 = experiment.info['D0']
    ax.plot(
        ols_exp.d['x'],
        ols_exp.d['y'],
        color='black',
        label=f'OLS: \nE= {E/1000: >8.2f} kJ\nD= {D0: >8.2e} m2/s',
    )

    fig.canvas.manager.set_window_title(title + ' plot')
    fig.subplots_adjust(
        top=0.9,
        bottom=0.1,
        left=0.1,
        right=0.9,
        hspace=0.2,
        wspace=0.2,
    )

    ax.set_title(f"{experiment.name}: ({experiment.info['w']}% mass)")
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.legend()

    if interactive:
        plt.show()
    if save_folder is not None:
        os.makedirs(f'{save_folder}\Plots', exist_ok=True)
        fig.savefig(f'{save_folder}\Plots\\{title}_{experiment.name}.jpg', dpi=600)


def temperature_plot(
    experiment: Experiment,
    title='',
    xlabel='',
    ylabel='',
    interactive=False,
    save_folder=None,
):
    fig, ax = plt.subplots()
    colors = experiment.d['Viscosity_verbose'].replace(VERBOSE_COLORS)
    ax.scatter(x=experiment.d['x'], y=experiment.d['y'], c=colors, s=5)
    sns.lineplot(
        ax=ax,
        data=experiment.d,
        x='x',
        y='y',
        estimator='mean',
        errorbar=("sd", 1),
        label='mean',
    )
    sns.lineplot(
        ax=ax,
        data=experiment.d,
        x="x",
        y="y",
        errorbar=('pi', 50),
        estimator="median",
        label='median',
    )

    fig.canvas.manager.set_window_title(title + ' plot')
    fig.subplots_adjust(
        top=0.9,
        bottom=0.1,
        left=0.1,
        right=0.9,
        hspace=0.2,
        wspace=0.2,
    )
    ax.set_title(f"{experiment.name}: ({experiment.info['w']}% mass)")
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)

    if interactive:
        plt.show()
    if save_folder is not None:
        os.makedirs(f'{save_folder}\Plots', exist_ok=True)
        fig.savefig(f'{save_folder}\Plots\\{title}_{experiment.name}.jpg', dpi=600)


def temporal_plot(
    experiment: Experiment,
    title="",
    ylabel="",
    interactive=False,
    save_folder=None,
):
    fig, ax_v = plt.subplots()
    ax_T = ax_v.twinx()
    ax_v.scatter(experiment.d["time"], experiment.d["y"], color="red", marker=".")
    ax_T.scatter(experiment.d["time"], experiment.d["x"], color="blue", marker=".")

    fig.canvas.manager.set_window_title(title + " plot")
    fig.subplots_adjust(
        top=0.9,
        bottom=0.1,
        left=0.1,
        right=0.9,
        hspace=0.2,
        wspace=0.2,
    )
    ax_T.set_title(f"{experiment.name}: ({experiment.info['w']}% mass)")
    ax_v.set_xlabel("Time [s]")
    ax_T.set_ylabel("Temperature [C]", color="blue")
    ax_v.set_ylabel(ylabel, color="red")

    if interactive:
        plt.show()
    if save_folder is not None:
        os.makedirs(f"{save_folder}\Plots", exist_ok=True)
        fig.savefig(f"{save_folder}\Plots\\{title}_{experiment.name}.jpg", dpi=600)

In [4]:
def regress(experiment: Experiment):
    experiment = experiment.apply(functions.C_to_K).apply(functions.linearize)

    df = experiment.d
    df["x0"] = 1
    result = sm.OLS(df["y"], df[["x", "x0"]]).fit()
    means = result.params

    D0 = np.exp(means["x0"])
    E = -8.314 * means["x"]

    conf_int = result.conf_int(0.005).loc
    conf_int["x0"] = np.exp(conf_int["x0"])
    dD0 = (conf_int["x0"].max() - conf_int["x0"].min()) / 2
    conf_int["x"] = -8.314 * conf_int["x"]
    dE = (conf_int["x"].max() - conf_int["x"].min()) / 2

    info = dict(
        E=E,
        D0=D0,
        dD0=dD0,
        dE=dE,
        f_statistic=result.fvalue,
        r2=result.rsquared,
    )
    func = lambda T: D0 * np.exp(-E / (8.314 * T))
    return info, result, func


def create_OLS(exp: Experiment):
    info, result, func = regress(exp)
    exp.set_info(**info)
    x = np.linspace(13, 42, 100) + 273.15
    ols_res = Experiment(
        pd.DataFrame(
            {
                "x": x,
                "y": func(x),
                "time": x * 0,
            }
        ),
        "interpolated",
    )
    return ols_res.apply(functions.K_to_C)

In [6]:
experiments_dir = Path(r'Viscosity')
for folder_name in os.listdir(experiments_dir):
    dir = experiments_dir / folder_name
    if not os.path.isdir(dir):
        continue

    hdf5_file = [dir / file for file in os.listdir(dir) if '.hdf5' in (dir / file).name]
    if not hdf5_file:
        continue
    hdf5_file = hdf5_file[0]
    with pd.HDFStore(hdf5_file) as file:
        lims = file.get_storer("data").attrs.log[1][1]
        res =file.get_storer("data").attrs.info

    csv_file = dir / [file for file in os.listdir(dir) if hdf5_file.stem in file][0]

    # exp = Experiment()


In [31]:
lims

{'time': [160.0, inf], 'y': [20.0, 90.0], 'x': (12, 42)}

In [35]:
def initial_filter(df, x=(0, np.inf), y=(0, np.inf), time=(0, np.inf)):
    temperature_cond = (x[0] < df["T"]) & (df["T"] < x[1])
    viscosity_cond = (y[0] < df["Nu"]) & (df["Nu"] < y[1])
    time_cond = (time[0] < df["time"]) & (df["time"] < time[1])
    return df[temperature_cond & viscosity_cond & time_cond]

In [114]:
## Group filters
# def z_filter(data: pd.Series):
#     mean = data.mean()
#     s = data.std(ddof=0) + 1e-50
#     z_score = np.abs((data - mean) / s) < 1
#     return z_score


# def whisker_iqr_filter(data: pd.Series):
#     whisker_width = 0.5
#     q1 = data.quantile(0.25)
#     q3 = data.quantile(0.75)
#     iqr = q3 - q1 + 1e-50
#     return (data >= q1 - whisker_width * iqr) & (data <= q3 + whisker_width * iqr)


# def iqr_filter(data: pd.Series):
#     q1 = data.quantile(0.25)
#     q3 = data.quantile(0.75)
#     iqr = q3 - q1 + 1e-50
#     return np.abs((data - data.median()) / iqr) < 1

def regress_filter(x: pd.Series,y: pd.Series):
    R = linregress(x, y)
    mean= R.slope*x+R.intercept
    s=(y - mean).std(ddof=0)
    return np.abs((y - mean) / s) < 1

In [None]:
%matplotlib qt

In [115]:
M = {
    Mols.butanol: 74.12,
    Mols.ocm: 418.4,
    Mols.peta: 298.29,
    Mols.dmag: 198.12,
}
rho = {
    Mols.butanol: 0.82668,
    Mols.dmag: 1.07,
    Mols.ocm: 1.72,
    Mols.peta: 1.18,
}

dir = Path(r'Viscosity\PETA1')


# if not os.path.isdir(dir):
#     continue

hdf5_file = [dir / file for file in os.listdir(dir) if '.hdf5' in (dir / file).name]
# if not hdf5_file:
#     continue
hdf5_file = hdf5_file[0]
with pd.HDFStore(hdf5_file) as file:
    lims = file.get_storer("data").attrs.log[1][1]
    res = file.get_storer("data").attrs.info

csv_file = dir / [file for file in os.listdir(dir) if hdf5_file.stem in file][0]

with pd.HDFStore(hdf5_file) as file:
    lims = file.get_storer("data").attrs.log[1][1]
    res = file.get_storer("data").attrs.info


exp = Experiment()
exp.load_csv(csv_file)
exp.d.rename(
    columns={
        "Temperature": "T",
        "Viscosity": "Nu",
        'Viscosity_verbose': 'Nu_status',
        'Temperature_verbose': 'T_status',
    },
    inplace=True,
)
exp.d[['Nu_status', 'T_status']] = exp.d[['Nu_status', 'T_status']].replace(
    {
        'OK': 0,
        'OK_inner': 1,
        'image_sweep_check': 2,
        'combine_check': 3,
    },
)
exp.d = initial_filter(exp.d, **lims)
exp.d['compound'] = res['compound']
compound = Mols[res['compound']]
exp.d['rho'] = rho[compound]

exp.d['w_mass'] = res['w']
exp.d["w_vol"] = exp.d["w_mass"] * exp.d["rho"] / rho[Mols.butanol]
exp.d['x'] = (
    (exp.d["w_mass"] / M[Mols.butanol])
    / (exp.d["w_mass"] / M[Mols.butanol] + (100 - exp.d["w_mass"]) / M[compound])
    * 100
)

exp.d['V']=exp.d['Nu']/exp.d['rho']

exp.d['ln_Nu']=np.log(exp.d['Nu'])
exp.d['T']+=273.15
exp.d['1_T']= 1/exp.d['T']
exp.d = exp.d[regress_filter(exp.d['1_T'],exp.d['ln_Nu'])]

In [87]:
exp.d

In [117]:
exp.d
plt.scatter(
    exp.d['1_T'],
    exp.d['ln_Nu'],
    s=2
)

<matplotlib.collections.PathCollection at 0x1f3e4b0a500>

In [None]:

    df["compound"] = exp.info["compound"]
    df["rho"] = exp.info["rho"]
    df["w_mass"] = exp.info["w"]
    df["w_vol"] = df["w_mass"] * df["rho"] / but_rho
    df["D0"] = exp.info["D0"]
    df["E"] = exp.info["E"]
    df['Kin_viscosity'] = df['Viscosity']/df["rho"]
    df = df[
        [
            "compound",
            "rho",
            "w_mass",
            "w_vol",
            "Temperature",
            "time",
            "Viscosity",
            'Kin_viscosity',
            "D",
            "D0",
            "E",
        ]
    ]