In [None]:
import os
import copy

import pandas as pd
import numpy as np
from sklearn import linear_model
import statsmodels.api as sm

import matplotlib.pyplot as plt
from matplotlib.widgets import Slider
import seaborn as sns
import plotly
import plotly.graph_objects as go
import plotly.express as px

%matplotlib qt
pd.set_option('mode.chained_assignment', None)
plt.rc('xtick',labelsize=10)
plt.rc('ytick',labelsize=10)
plt.rc('font', size=10) #controls default text size
plt.rc('axes', titlesize=16) #fontsize of the title
plt.rc('axes', labelsize=15) #fontsize of the x and y labels
plt.rc('legend', fontsize=10)
plt.rcParams["figure.figsize"] = (10,7)

In [None]:
class Container:

    d: pd.DataFrame = None
    log = []

    def __init__(self, data=None, name=''):
        self.d = data
        self.folder = None
        self.experiment_name = name

    @property
    def x(self):
        return f"{self._x} [{self._x_sig}]"

    @property
    def y(self):
        return f"{self._y} [{self._y_sig}]"

    def log_wrapp(func):

        def log_wrapper(self, *args, **kwargs):
            res = func(self, *args, **kwargs)
            print(f'{res[0]} : {res[1]}')
            self.log.append(res)
            return self.d

        return log_wrapper

    @log_wrapp
    def load_csv(self, path=''):
        path, self.folder, self.experiment_name = self._input_path(path)
        self.d = pd.read_csv(path)
        self.d.rename(columns={'Temperature': 'x', 'Viscosity': 'y'}, inplace=True)
        return ('csv loaded', path)

    @log_wrapp
    def load_hdf5(self, path=''):
        path, self.folder, self.experiment_name = self._input_path(path)

        with pd.HDFStore(path) as file:
            data = file['data']
            info = file.get_storer('data').attrs.info
            log = file.get_storer('data').attrs.log

        self.log.extend(log)
        self.info.update(info)
        return ('hdf5 loaded', path)

    def save_hdf5(self, **info):
        file_path = f'{self.folder}\{self.experiment_name}.hdf5'
        with pd.HDFStore(file_path) as file:
            file.put('data', self.d)
            file.get_storer('data').attrs.info = info
            file.get_storer('data').attrs.log = self.log

    def copy(self):
        new = copy.deepcopy(self)
        return new

    @staticmethod
    def _input_path(path):
        while (path == '') or (not os.path.isfile(path)):
            path = input(f"Input data path: ")
        _path_list = (path).split('\\')
        folder = '\\'.join(_path_list[:-1])
        experiment_name = _path_list[-1].split('.')[0]
        return path, folder, experiment_name

    @log_wrapp
    def apply(self, func):
        self.d['time'], self.d['x'], self.d['y'] = func(self.d['time'], self.d['x'], self.d['y'])
        return (func.__name__, [])

    @log_wrapp
    def group_filter(self, filter, by='x', column='y'):
        group = self.d.groupby(by=by)[column]
        mask = group.apply(filter).droplevel([0]).sort_index().to_numpy()
        self.d = self.d[mask]
        return (filter.__name__, [])

    @log_wrapp
    def mask_filter(self, filter, **kwargs):
        self.d = filter(self.d,**kwargs)
        return (filter.__name__, kwargs)


## Functions
def nu_to_diffusion(time, x, y):
    k = 1.380649 * 1e-23
    y = k*x / (y*0.001)
    return time, x, y


def nu_to_v(time, x, y):
    ro = 1.73
    y = y / ro
    return time, x, y


def K_to_C(time, x, y):
    x = x - 273.15
    return time, x, y


def C_to_K(time, x, y):
    x = x + 273.15
    return time, x, y


def linearize(time, x, y):
    x = 1 / x
    y = np.log(y)
    return time, x, y


def delinearize(time, x, y):
    x = 1 / x
    y = np.exp(y)
    return time, x, y


## Mask filters
def initial_filter(df, x=(-np.inf, np.inf), y=(0, np.inf), time=(0, np.inf)):
    temperature_cond = ((x[0] < df['x']) & (df['x'] < x[1]))
    viscosity_cond = ((y[0] < df['y']) & (df['y'] < y[1]))
    time_cond = ((time[0] < df['time']) & (df['time'] < time[1]))
    return df[temperature_cond & viscosity_cond & time_cond]


## Group filters
def z_filter(data: pd.Series):
    mean = data.mean()
    s = data.std(ddof=0) + 1e-50
    z_score = np.abs((data-mean) / s) < 1
    return z_score


def whisker_iqr_filter(data: pd.Series):
    whisker_width = 0.5
    q1 = data.quantile(0.25)
    q3 = data.quantile(0.75)
    iqr = q3 - q1
    return (data >= q1 - whisker_width*iqr) & (data <= q3 + whisker_width*iqr)


def iqr_filter(data: pd.Series):
    q1 = data.quantile(0.25)
    q3 = data.quantile(0.75)
    iqr = q3 - q1

    return np.abs((data - data.median()) / iqr) < 1


# Pipeline

In [None]:
results = pd.DataFrame(
    columns=['E_J', 'dE_J', 'D0_m2s', 'dD0', 'name', 'w_mass', 'desc'])

In [None]:
results.to_excel(r'..\Experiments\MultiplyTemperature\Results.xlsx')

In [None]:
verbose_colors = {
    'OK': 'g',
    'OK_inner': 'b',
    'image_sweep_check': 'r',
    'combine_check': 'w',
}

a = Container()
a.load_csv()

fig, ax_v = plt.subplots()
ax_T = ax_v.twinx()
ax_v.scatter(a.d['time'], a.d['y'], color='red', marker='.')
ax_T.scatter(a.d['time'], a.d['x'], color='blue', marker='.')

fig.subplots_adjust(
    top=0.9,
    bottom=0.1,
    left=0.1,
    right=0.9,
    hspace=0.2,
    wspace=0.2,
)
ax_v.set_xlabel('Time [s]')
ax_T.set_ylabel('Temperature [C]', color='blue')
ax_v.set_ylabel('Viscosity [cP]', color='red')

In [None]:
resolved = Container()
resolved.load_hdf5(path=a.folder+'\\'+a.experiment_name+'.hdf5')
resolved.experiment_name
h={key.replace('viscosity','y').replace('temperature','x'):value  for key,value in resolved.log[1][1].items()}
h

In [None]:
a.mask_filter(initial_filter,time=(0, np.inf), y=(20, 350), x=(12, 42))
base =a.copy()

In [None]:
##Save Temporal plots
plot = a.copy()
fig, ax_v = plt.subplots()
w = plot.folder.split('(')[1][:-1]
ax_v.set_title(f'{plot.experiment_name}: ({w}% mass)')
ax_T = ax_v.twinx()

ax_v.scatter(plot.d['time'], plot.d['y'], color='red', marker='.')
ax_T.scatter(plot.d['time'], plot.d['x'], color='blue', marker='.')


fig.subplots_adjust(
    top=0.9,
    bottom=0.1,
    left=0.1,
    right=0.9,
    hspace=0.2,
    wspace=0.2,
)
ax_v.set_ylabel('Viscosity [cP]', color='red')
ax_v.set_xlabel('Time [s]')
ax_T.set_ylabel('Temperature [C]', color='blue')

# fig.savefig(f'{plot.folder}\Plots\\1{plot.experiment_name}_Time.jpg',dpi =600)

In [None]:
##Save Temperature plots
plot = a.copy()

fig, ax = plt.subplots()
colors = plot.d['Viscosity_verbose'].replace(verbose_colors)
ax.scatter(x=plot.d['x'], y=plot.d['y'], c=colors, s=5)

sns.lineplot(
    ax=ax,
    # data=plot.d,
    x=plot.d["x"],
    y=plot.d["y"],
    estimator='mean',
    errorbar=("sd", 1),
    label='mean',
)
sns.lineplot(
    ax=ax,
    data=plot.d,
    x="x",
    y="y",
    errorbar=('pi', 68),
    estimator="median",
    label='median',
)


fig.subplots_adjust(
    top=0.9,
    bottom=0.1,
    left=0.1,
    right=0.9,
    hspace=0.2,
    wspace=0.2,
)
w = plot.folder.split('(')[1][:-1]
ax.set_title(f'{plot.experiment_name}: ({w}% mass)')
ax.set_xlabel('Temperature [C]')
ax.set_ylabel('Viscosity [cP]')
# fig.savefig(f'{plot.folder}\Plots\\2{plot.experiment_name}_Temperature.jpg',dpi =600)

In [None]:
a.apply(C_to_K)
a.apply(nu_to_diffusion)
a.apply(linearize)
a.group_filter(iqr_filter)
1;

In [None]:
## Diffusion plot
plot = a.copy()
fig, ax = plt.subplots()

colors = plot.d['Viscosity_verbose'].replace(verbose_colors)
ax.scatter(x=plot.d['x'], y=plot.d['y'], c=colors, s=5)

sns.lineplot(
    ax=ax,
    data=plot.d,
    x="x",
    y="y",
    estimator='mean',
    errorbar=("sd", 1),
    # err_style="bars",
    label='mean',
)
sns.lineplot(
    ax=ax,
    data=plot.d,
    x="x",
    y="y",
    errorbar=('pi', 50),
    estimator="median",
    label='median',
)


fig.subplots_adjust(
    top=0.9,
    bottom=0.1,
    left=0.1,
    right=0.9,
    hspace=0.2,
    wspace=0.2,
)
w = plot.folder.split('(')[1][:-1]
ax.set_title(f'{plot.experiment_name}: ({w}% mass)')
ax.set_xlabel('Temperature [1/K]')
ax.set_ylabel('D [log(m3/s)]')
fig.savefig(f'{plot.folder}\Plots\\3{plot.experiment_name}_Line.jpg',dpi =600)

In [None]:
##Save fast OLS plot
plot = a.copy()


def regress(data):
    reg = linear_model.LinearRegression(fit_intercept=True)
    X = np.array([data['x']]).T
    Y = np.array(data['y'])
    reg.fit(X, Y)

    w_T = reg.coef_[0]
    w_D = reg.intercept_

    D0 = np.exp(w_D)
    E = -8.314 * w_T

    def TC_func(T, E=E, D0=D0):
        return D0 * np.exp(-E / (8.314*T))

    return D0, E, TC_func


fig, ax = plt.subplots()

ax.scatter(plot.d['x'], plot.d['y'], color='gray', marker='.')
D0, E, OLS_func = regress(plot.d)
x = np.linspace(273.15 + 13, 273.15 + 42, 100)
ols_res = Container(pd.DataFrame({'x': x, 'y': OLS_func(x),'time':x*0}), 'interpolated')

sns.regplot(
    ax=ax,
    data=plot.d,
    x='x',
    y='y',
    scatter=False,
    truncate=False,
    order=1,
    label=f'All:\nE= {E/1000: >8.2f} kJ\nD= {D0: >8.2e} m3/s',
)

fig.subplots_adjust(
    top=0.9,
    bottom=0.1,
    left=0.1,
    right=0.9,
    hspace=0.2,
    wspace=0.2,
)
w = plot.folder.split('(')[1][:-1]
ax.set_title(f'{plot.experiment_name}: ({w}% mass)')
ax.set_xlabel('Temperature [1/K]')
ax.set_ylabel('D [log(m3/s)]')
plt.legend()
fig.savefig(f'{plot.folder}\Plots\\4{plot.experiment_name}_OLS.jpg',dpi =600)

In [None]:
## Save Comparation plot
plot = a.copy()


fig, ax = plt.subplots()
colors = plot.d['Viscosity_verbose'].replace(verbose_colors)

plot.apply(delinearize)
plot.apply(K_to_C)
ax.scatter(
    x=plot.d['x'],
    y=plot.d['y'],
    c=colors,
    s=5,
    label='Real',
)


res_plot = ols_res.copy()
res_plot.apply(K_to_C)
sns.lineplot(
    ax=ax,
    data=res_plot.d,
    x="x",
    y="y",
    estimator='mean',
    label='OLS',
)


fig.subplots_adjust(
    top=0.9,
    bottom=0.1,
    left=0.1,
    right=0.9,
    hspace=0.2,
    wspace=0.2,
)
w = plot.folder.split('(')[1][:-1]
ax.set_title(f'{plot.experiment_name}: ({w}% mass)')
ax.set_xlabel('Temperature K')
ax.set_ylabel('D [m3/s]')
fig.savefig(f'{plot.folder}\Plots\\5{plot.experiment_name}_Comparation.jpg',dpi =600)

In [None]:
## Regression
reg_a = a.copy()

df = reg_a.d
df['x0'] = 1
result_OLS = sm.OLS(df['y'], df[['x', 'x0']]).fit()
means = result_OLS.params
conf_int = result_OLS.conf_int(0.005).loc

conf_int['x0'] = np.exp(conf_int['x0'])
D0 = np.exp(means['x0'])
dD0 = (conf_int['x0'].max() - conf_int['x0'].min()) / 2
nu0_power = round(np.log10(dD0))

conf_int['x'] = -8.314 * conf_int['x'] 
E = -8.314 * means['x'] 
dE = (conf_int['x'].max() - conf_int['x'].min()) / 2
EkJ_power = round(np.log10(dE)) + 1

exp_name = reg_a.experiment_name
w = reg_a.folder.split('(')[1][:-1]
print(
    f'Constants {exp_name} ({w}% mass):',
    f'E  = {E/10**EkJ_power: <7.3f} ± {dE/10**EkJ_power: <3.2f} J *1e{EkJ_power}',
    f'D0 = {D0/10**nu0_power: <7.3f} ± {dD0/10**nu0_power: <3.2f} m2*s *1e{nu0_power}',
    sep='\n',
)
print(result_OLS.summary2())

In [None]:
## Save result
desc = input('Description')
results.loc[len(results)] = [
    E, dE, D0, dD0, reg_a.experiment_name, w, desc
]
reg_a.save_hdf5(**results.loc[len(results) - 1].to_dict())
results