In [1]:
import os
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.stats import linregress
from z_base import Experiment, Mols, T_ZERO

# Plots and load
plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)
plt.rc('font', size=10)
plt.rc('axes', titlesize=16)
plt.rc('axes', labelsize=15)
plt.rc('legend', fontsize=10)
plt.rcParams["figure.figsize"] = (10, 7)

In [2]:
%matplotlib qt

In [3]:
VERBOSE_COLORS = {
    'OK': 'g',
    'OK_inner': 'b',
    'image_sweep_check': 'r',
    'combine_check': 'w',
}


def input_path(path=""):
    while (path == "") or (not os.path.isfile(path)):
        path = input(f"Input data path: ")
    return path


def ask_continue():
    res = None
    while res is None:
        ask = input("Continue [y] and n: ")
        if ask in ["", "y"]:
            res = True
        elif ask in ["n"]:
            res = False
        else:
            print("Incorrect input!")
    return res


def configurate_data(experiment: Experiment) -> Experiment:
    while True:
        exp = experiment.copy()
        time_lim = ()
        while len(time_lim) != 2:
            time_lim = input("Time lim (space as delimiter): ")
            time_lim = [float(i) for i in time_lim.split(" ") if "" != i]
            if len(time_lim) == 1:
                time_lim.append(np.inf)

        y_lim = ()
        while len(y_lim) != 2:
            y_lim = input("Viscosity lim (space as delimiter): ")
            y_lim = [float(i) for i in y_lim.split(" ")]

        exp.d = initial_filter(exp.d, time=time_lim, y=y_lim, x=(12, 42))
        exp.log.append(("initial_filter", {"time": time_lim, "y": y_lim, "x": (12, 42)}))

        temporal_plot(
            exp,
            title="Configurate",
            ylabel="Viscosity [cP]",
            interactive=True,
        )
        if ask_continue():
            break
    return exp


def comparation_plot(
    experiment: Experiment,
    ols_exp: Experiment,
    title,
    xlabel,
    ylabel,
    interactive=False,
    save_folder=None,
):
    fig, ax = plt.subplots()
    colors = experiment.d['Viscosity_verbose'].replace(VERBOSE_COLORS)
    ax.scatter(
        experiment.d['x'],
        experiment.d['y'],
        color=colors,
        marker='.',
        label='Data',
        alpha=0.6,
    )

    E = experiment.info['E']
    D0 = experiment.info['D0']
    ax.plot(
        ols_exp.d['x'],
        ols_exp.d['y'],
        color='black',
        label=f'OLS: \nE= {E/1000: >8.2f} kJ\nD= {D0: >8.2e} m2/s',
    )

    fig.canvas.manager.set_window_title(title + ' plot')
    fig.subplots_adjust(
        top=0.9,
        bottom=0.1,
        left=0.1,
        right=0.9,
        hspace=0.2,
        wspace=0.2,
    )

    ax.set_title(f"{experiment.name}: ({experiment.info['w']}% mass)")
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.legend()

    if interactive:
        plt.show()
    if save_folder is not None:
        os.makedirs(f'{save_folder}\Plots', exist_ok=True)
        fig.savefig(f'{save_folder}\Plots\\{title}_{experiment.name}.jpg', dpi=600)


def temperature_plot(
    experiment: Experiment,
    title='',
    xlabel='',
    ylabel='',
    interactive=False,
    save_folder=None,
):
    fig, ax = plt.subplots()
    colors = experiment.d['Viscosity_verbose'].replace(VERBOSE_COLORS)
    ax.scatter(x=experiment.d['x'], y=experiment.d['y'], c=colors, s=5)
    sns.lineplot(
        ax=ax,
        data=experiment.d,
        x='x',
        y='y',
        estimator='mean',
        errorbar=("sd", 1),
        label='mean',
    )
    sns.lineplot(
        ax=ax,
        data=experiment.d,
        x="x",
        y="y",
        errorbar=('pi', 50),
        estimator="median",
        label='median',
    )

    fig.canvas.manager.set_window_title(title + ' plot')
    fig.subplots_adjust(
        top=0.9,
        bottom=0.1,
        left=0.1,
        right=0.9,
        hspace=0.2,
        wspace=0.2,
    )
    ax.set_title(f"{experiment.name}: ({experiment.info['w']}% mass)")
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)

    if interactive:
        plt.show()
    if save_folder is not None:
        os.makedirs(f'{save_folder}\Plots', exist_ok=True)
        fig.savefig(f'{save_folder}\Plots\\{title}_{experiment.name}.jpg', dpi=600)


def temporal_plot(
    experiment: Experiment,
    title="",
    ylabel="",
    interactive=False,
    save_folder=None,
):
    fig, ax_v = plt.subplots()
    ax_T = ax_v.twinx()
    ax_v.scatter(experiment.d["time"], experiment.d["y"], color="red", marker=".")
    ax_T.scatter(experiment.d["time"], experiment.d["x"], color="blue", marker=".")

    fig.canvas.manager.set_window_title(title + " plot")
    fig.subplots_adjust(
        top=0.9,
        bottom=0.1,
        left=0.1,
        right=0.9,
        hspace=0.2,
        wspace=0.2,
    )
    ax_T.set_title(f"{experiment.name}: ({experiment.info['w']}% mass)")
    ax_v.set_xlabel("Time [s]")
    ax_T.set_ylabel("Temperature [C]", color="blue")
    ax_v.set_ylabel(ylabel, color="red")

    if interactive:
        plt.show()
    if save_folder is not None:
        os.makedirs(f"{save_folder}\Plots", exist_ok=True)
        fig.savefig(f"{save_folder}\Plots\\{title}_{experiment.name}.jpg", dpi=600)

In [3]:
def initial_filter(df, x=(0, np.inf), y=(0, np.inf), time=(0, np.inf)):
    temperature_cond = (x[0] < df["T"]) & (df["T"] < x[1])
    viscosity_cond = (y[0] < df["Nu"]) & (df["Nu"] < y[1])
    time_cond = (time[0] < df["time"]) & (df["time"] < time[1])
    return df[temperature_cond & viscosity_cond & time_cond]

In [4]:
## Group filters
# def z_filter(data: pd.Series):
#     mean = data.mean()
#     s = data.std(ddof=0) + 1e-50
#     z_score = np.abs((data - mean) / s) < 1
#     return z_score


# def whisker_iqr_filter(data: pd.Series):
#     whisker_width = 0.5
#     q1 = data.quantile(0.25)
#     q3 = data.quantile(0.75)
#     iqr = q3 - q1 + 1e-50
#     return (data >= q1 - whisker_width * iqr) & (data <= q3 + whisker_width * iqr)


# def iqr_filter(data: pd.Series):
#     q1 = data.quantile(0.25)
#     q3 = data.quantile(0.75)
#     iqr = q3 - q1 + 1e-50
#     return np.abs((data - data.median()) / iqr) < 1

def regress_filter(x: pd.Series,y: pd.Series):
    R = linregress(x, y)
    mean= R.slope*x+R.intercept
    s=(y - mean).std(ddof=0)
    return np.abs((y - mean) / s) < 1

In [5]:
M = {
    Mols.butanol: 74.12,
    Mols.ocm: 418.4,
    Mols.peta: 298.29,
    Mols.dmeg: 198.12,
}
rho_butanol = 0.82668


experiments_dir = Path(r'Raw_data')


for folder_name in os.listdir(experiments_dir):
    dir = experiments_dir / folder_name

    if not os.path.isdir(dir):
        continue
    hdf5_file = [dir / file for file in os.listdir(dir) if '.hdf5' in (dir / file).name]
    if not hdf5_file:
        continue

    expr = Experiment()
    hdf5_file = hdf5_file[0]
    csv_file = dir / [file for file in os.listdir(dir) if hdf5_file.stem in file][0]
    expr.read_csv(csv_file)
    with pd.HDFStore(hdf5_file) as file:
        lims = file.get_storer("data").attrs.log[1][1]
        res = file.get_storer("data").attrs.info
    expr.lims = {key: tuple(value) for key, value in lims.items()}

    del expr.d['Viscosity_verbose']
    del expr.d['Temperature_verbose']
    # expr.d['compound']=expr.d['compound'].replace({'dmag':'dmeg'})
    expr.d.rename(
        columns={
            "Temperature": "T",
            "Viscosity": "Nu",
        },
        inplace=True,
    )
    expr.d = initial_filter(expr.d, **expr.lims)
    compound = Mols[res['compound']] if res['compound'] != 'dmag' else Mols.dmeg
    expr.d['rho'] = res['rho']

    expr.d['w_mass'] = res['w']
    expr.d["w_vol"] = expr.d["w_mass"] * expr.d["rho"] / rho_butanol
    expr.d['w'] = (
        (expr.d["w_mass"] / M[Mols.butanol])
        / (expr.d["w_mass"] / M[Mols.butanol] + (100 - expr.d["w_mass"]) / M[compound])
        * 100
    )
    expr.d['C_but'] = expr.d["w_mass"] * expr.d['rho']*10 / M[Mols.butanol]
    expr.d['C_comp'] = (100 - expr.d["w_mass"]) * expr.d['rho']*10/ M[compound]

    expr.d['V'] = expr.d['Nu'] / expr.d['rho']

    expr.d['ln_Nu'] = np.log(expr.d['Nu'])
    expr.d['T'] += T_ZERO
    expr.d['T_1'] = 1 / expr.d['T']
    expr.d = expr.d[regress_filter(expr.d['T_1'], expr.d['ln_Nu'])]

    expr.d = expr.d.iloc[:, 1:].groupby(by='T').mean().reset_index()
    expr.d['D'] = expr.d['T'] / expr.d['Nu']
    expr.d['ln_D'] = np.log(expr.d['D'])

    expr.d.insert(0, 'compound', compound.name)
    expr.name = f"{compound.name}_{expr.d['w_mass'][0]:.0f}"
    expr.save_hdf5('Result_data')

    # exp = Experiment()

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->values] [items->None]

  file.put('lims', pd.Series(self.lims))
your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->values] [items->None]

  file.put('lims', pd.Series(self.lims))
your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->values] [items->None]

  file.put('lims', pd.Series(self.lims))
your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->values] [items->None]

  file.put('lims', pd.Series(self.lims))
your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->values] [items->None]

  file.put('lims', pd.Series(self.lims))
your performance may suff

In [16]:
results_dir = Path(r'Result_data')
dfs=[]
for file in os.listdir(results_dir):
    exp = Experiment.from_hdf5(results_dir/file)
    dfs.append(exp.d)
DF=pd.concat(dfs).reset_index(drop=True)
DF.iloc[:,1:]=DF.iloc[:,1:].map(lambda x: round(x,6))
DF.head(3)
DF.to_excel(r'Datas\All_viscosity.xlsx')