In [33]:
import os
import copy

import pandas as pd
import numpy as np
import sympy as sp
from sklearn import linear_model
import statsmodels.api as sm

import matplotlib.pyplot as plt
from matplotlib.widgets import Slider
import seaborn as sns
import plotly
import plotly.graph_objects as go
import plotly.express as px

%matplotlib qt
pd.set_option('mode.chained_assignment', None)
plt.rc('xtick',labelsize=10)
plt.rc('ytick',labelsize=10)
plt.rc('font', size=10) #controls default text size
plt.rc('axes', titlesize=16) #fontsize of the title
plt.rc('axes', labelsize=15) #fontsize of the x and y labels
plt.rc('legend', fontsize=10)
plt.rcParams["figure.figsize"] = (10,7)

In [37]:
class XYContainer:

    d: pd.DataFrame = None
    
    def __init__(self, data=None, name=''):
        self.d = data
        self.folder = None
        self.experiment_name = name
        self._log = []

    @property
    def x(self):
        return f"{self._x} [{self._x_sig}]"

    @property
    def y(self):
        return f"{self._y} [{self._y_sig}]"

    def logger(func):

        def inner_func(self, *args, **kwargs):
            res = func(self,*args, **kwargs)
            self.log(res)
            return 

        return inner_func
    @logger
    def load_csv(self, path=''):
        path, self.folder, self.experiment_name = self._input_path(path)
        self.d = pd.read_csv(path)

        print(f'Loaded: {self.experiment_name}')
        # self.log()
        return f'csv loaded from: {path}'
    
    def load_hdf5(self, path=''):
        path, self.folder, self.experiment_name = self._input_path(path)

        with pd.HDFStore(path) as file:
            data = file['data']
            meta = file.get_storer('data').attrs.meta

        self._log.extend(meta['log'])
        self.log(f'hdf5 loaded from: {path}')
        meta.pop('log')
        self.info.update(meta)
        return self.d

    def dump(self, **kwargs):
        path = f'{self.folder}\{self.experiment_name}'
        kwargs.update({'log': self.log()})
        with pd.HDFStore(f'{path}.hdf5') as file:
            file.put('data', self.d)
            file.get_storer('data').attrs.meta = kwargs

    def copy(self):
        new = copy.deepcopy(self)
        return new

    def log(self, msg=None):
        if msg is not None: self._log.append(msg)
        return self._log

    @staticmethod
    def _input_path(path):
        while (path == '') or (not os.path.isfile(path)):
            path = input(f"Input data path: ")
        _path_list = (path).split('\\')
        folder = '\\'.join(_path_list[:-1])
        experiment_name = _path_list[-1].split('.')[0]
        return path, folder, experiment_name


class Filters(XYContainer):

    def initial_filter(
            self,
            time_in_minutes=False,
            x=(-np.inf, np.inf),
            y=(0, np.inf),
            time=(0, np.inf),
    ):

        conditions = dict(x=x, y=y, time=time)
        self.d.rename(
            columns={
                'Temperature': 'x', 'Viscosity': 'y'
            },
            inplace=True,
        )
        temperature_cond = ((x[0] < self.d['x']) & (self.d['x'] < x[1]))
        viscosity_cond = ((y[0] < self.d['y']) & (self.d['y'] < y[1]))
        time_cond = ((time[0] < self.d['time']) & (self.d['time'] < time[1]))

        self.d = self.d[temperature_cond & viscosity_cond & time_cond]
        if time_in_minutes: self.d['time'] / 60
        self.log(('initial_filter', conditions))
        return self.d

    def mask_filter(
        self,
        filter_func,
        by='x',
        column='y',
    ):
        group = self.d.groupby(by=by)[column]
        mask = group.apply(filter_func).droplevel([0]).sort_index().to_numpy()
        self.d = self.d[mask]
        self.log(('mask_filter', filter_func.__name__))
        return self.d


class Operations(XYContainer):

    def nu_to_diffusion(self):
        k = 1.380649 * 1e-23
        df = self.d
        df['x'] = df['x'] + 273.15
        df['y'] = k * df['x'] / (df['y'] * 0.001)

        self._x, self._x_sig = x_T, c_K
        self._y, self._y_sig = x_D, c_m2s
        self.log('to_diffusion')
        return self.d

    def K_to_C(self):
        df = self.d
        df['x'] = df['x'] - 273.15
        return self.d

    def linearize(self):
        T_func = lambda t: 1 / t
        v_func = lambda nu: np.log(nu)
        df = self.d
        df['x'] = df['x'].apply(T_func)
        df['y'] = df['y'].apply(v_func)

        self._x, self._x_sig = 1 / self._x, 1 / self._x_sig
        self._y, self._y_sig = sp.ln(self._y), sp.ln(self._y_sig)
        self.log('linearize')
        return self.d

    def delinearize(self):
        t_func = lambda T: 1 / T
        nu_func = lambda v: np.exp(v)
        self.d['x'] = self.d['x'].apply(t_func)
        self.d['y'] = self.d['y'].apply(nu_func)

        self._x, self._x_sig = 1 / self._x, 1 / self._x_sig
        self._y, self._y_sig = sp.E**(self._y), sp.E**(self._y_sig)
        self.log('delinearize')
        return self.d


class Container(Filters, Operations):
    ...

In [38]:
a = Container()
a.load_csv()

Loaded: Exp0_up


In [39]:
a.log()

['csv loaded from: D:\\Works\\Diplom-work\\Experiments\\MultiplyTemperature\\Exp0(0)\\Exp0_up.csv']

In [None]:
## Filtration
def z_filter(data: pd.Series):
    mean = data.mean()
    s = data.std(ddof=0) + 1e-50
    z_score = np.abs((data-mean) / s) < 1
    return z_score


# def z_filter(data: pd.Series):
#     return stats.zscore(data)<1


def whisker_iqr_filter(data: pd.Series):
    whisker_width = 0.5
    q1 = data.quantile(0.25)
    q3 = data.quantile(0.75)
    iqr = q3 - q1
    return (data >= q1 - whisker_width*iqr) & (data <= q3 + whisker_width*iqr)


def iqr_filter(data: pd.Series):
    q1 = data.quantile(0.25)
    q3 = data.quantile(0.75)
    iqr = q3 - q1

    return np.abs((data - data.median()) / iqr) < 1


def my_z_filter(data: pd.Series):
    mean = data.median()
    s = data.std() + 1e-50
    z_score = np.abs((data-mean) / s) < 0.1
    return z_score


# Pipeline

In [1]:
import pint

In [2]:
u= pint.UnitRegistry()

In [5]:
a= 25*u.m

In [10]:
import numpy as np
t= np.linspace(0,10,100)*u.m

In [18]:
import pandas as pd

In [49]:
df= pd.DataFrame(
    {'a':[u.m],
     'b':[u.s]}
)

0    1000 meter
Name: a, dtype: object

In [None]:
results = pd.DataFrame(
    columns=['E_J', 'dE_J', 'D0_m2s', 'dD0', 'name', 'w_mass', 'desc'])

In [None]:
results.to_excel(r'..\Experiments\MultiplyTemperature\Results.xlsx')

In [41]:
a = Container()
a.load_csv()

fig, ax_v = plt.subplots()
ax_T = ax_v.twinx()
ax_v.scatter( a.d['time'],  a.d['Viscosity'], color='red', marker='.')
ax_v.set_ylabel('Viscosity [cP]', color='red')
ax_v.set_xlabel('time [s]')
ax_T.scatter( a.d['time'],  a.d['Temperature'], color='blue', marker='.')
ax_T.set_ylabel('Temperature [C]', color='blue')
a.experiment_name

Loaded: Exp0_up


'Exp0_up'

In [42]:
resolved = Container()
resolved.load_hdf5(path=a.folder+'\\'+a.experiment_name+'.hdf5')
resolved.experiment_name

'Exp0_up'

In [44]:
h={key.replace('viscosity','y').replace('temperature','x'):value  for key,value in resolved.log()[1][1].items()}
h

{'x': (12, 42), 'y': (20, 350), 'time': (0, inf)}

In [45]:
a.initial_filter(**h)
base =a.copy()

In [None]:
##Save Temporal plots
plot = a.copy()
fig, ax_v = plt.subplots()
w = plot.folder.split('(')[1][:-1]
ax_v.set_title(f'{plot.experiment_name}: ({w}% mass)')
ax_T = ax_v.twinx()

ax_v.scatter(plot.d['time'], plot.d['y'], color='red', marker='.')
ax_v.set_ylabel(plot.y, color='red')
ax_v.set_xlabel('time [s]')

ax_T.scatter(plot.d['time'], plot.d['x'], color='blue', marker='.')
ax_T.set_ylabel(plot.x, color='blue')

fig.savefig(fname=a.folder+r'\1Temporary.pdf',format='pdf',)

In [47]:
##Save Temperature plots
plot = a.copy()

fig, ax = plt.subplots()
w = plot.folder.split('(')[1][:-1]
ax.set_title(f'{plot.experiment_name}: ({w}% mass)')
colors = plot.d['Viscosity_verbose'].replace({
    'OK': 'g',
    'OK_inner': 'b',
    'image_sweep_check': 'r',
    'combine_check': 'w',
})
ax.set_xlabel(plot.x)
ax.set_ylabel(plot.y)

ax.scatter(x=plot.d['x'], y=plot.d['y'], c=colors, s=5)

sns.lineplot(
    ax=ax,
    # data=plot.d,
    x=plot.d["x"],
    y=plot.d["y"],
    estimator='mean',
    errorbar=("sd", 1),
    label='mean',
)
# sns.lineplot(
#     ax=ax,
#     data=plot.d,
#     x="x",
#     y="y",
#     errorbar=('pi', 68),
#     estimator="median",
#     label='median',
# )
fig.savefig(fname=a.folder+r'\2Temperature.pdf',format='pdf',)

In [None]:
a.nu_to_diffusion()
a.linearize()
a.mask_filter(iqr_filter)

In [None]:
## Diffusion plot
plot = a.copy()
fig, ax = plt.subplots()
w = plot.folder.split('(')[1][:-1]
ax.set_title(f'{plot.experiment_name}: ({w}% mass)')
ax.set_xlabel(plot.x)
ax.set_ylabel(plot.y)
colors = plot.d['Viscosity_verbose'].replace({
    'OK': 'g',
    'OK_inner': 'b',
    'image_sweep_check': 'r',
    'combine_check': 'w',
})
ax.scatter(x=plot.d['x'], y=plot.d['y'], c=colors, s=5)

sns.lineplot(
    ax=ax,
    data=plot.d,
    x="x",
    y="y",
    estimator='mean',
    errorbar=("sd", 1),
    # err_style="bars",
    label='mean',
)
sns.lineplot(
    ax=ax,
    data=plot.d,
    x="x",
    y="y",
    errorbar=('pi', 50),
    estimator="median",
    label='median',
)
fig.savefig(fname=a.folder+r'\3Linearized.pdf',format='pdf',)

In [None]:
##Save fast OLS plot
def regress(data):
    reg = linear_model.LinearRegression(fit_intercept=True)
    X = np.array([data['x']]).T
    Y = np.array(data['y'])
    reg.fit(X, Y)

    w_T = reg.coef_[0]
    w_D = reg.intercept_

    D0 = np.exp(w_D)
    E = -8.314 * w_T 

    def TC_func(T, E=E, D0=D0):
        return D0 * np.exp(-E / (8.314*T))

    return D0, E, TC_func


fig, ax = plt.subplots()
w = plot.folder.split('(')[1][:-1]
ax.set_title(f'{plot.experiment_name}: ({w}% mass)')
fig.subplots_adjust(
    top=0.9,
    bottom=0.1,
    left=0.1,
    right=0.9,
    hspace=0.2,
    wspace=0.2,
)

plot = a.copy()
ax.scatter(plot.d['x'], plot.d['y'], color='gray', marker='.')
D0, E, func = regress(plot.d)
sns.regplot(
    ax=ax,
    data=plot.d,
    x='x',
    y='y',
    scatter=False,
    truncate=False,
    order=1,
    label=f'All:\nE= {E/1000:5.2f} kJ\nD= {D0:5.3e} m2*s',
)
ax.set_xlabel(plot.x)
ax.set_ylabel(plot.y)
plt.legend()
fig.savefig(fname=a.folder+r'\4OLS.pdf',format='pdf',)

In [None]:
## Save Comparation plot
x = np.linspace(273.15 + 13, 273.15 + 42, 100)
ols_res = Container(
    pd.DataFrame({
        'x': x, 'y': func(x)
    }),
    x=(x_T, c_K),
    y=(x_D, c_m2s),
)


fig, ax = plt.subplots()
w = a.folder.split('(')[1][:-1]
ax.set_title(f'{a.experiment_name}: ({w}% mass)')
colors = a.d['Viscosity_verbose'].replace({
    'OK': 'g',
    'OK_inner': 'b',
    'image_sweep_check': 'r',
    'combine_check': 'w',
})


plot = a.copy()
plot.delinearize()
ax.scatter(
    x=plot.d['x'],
    y=plot.d['y'],
    c=colors,
    s=5,
    label='Real',
)
ax.set_xlabel(plot.x)
ax.set_ylabel(plot.y)

res_plot = ols_res.copy()
sns.lineplot(
    ax=ax,
    data=res_plot.d,
    x="x",
    y="y",
    estimator='mean',
    label='OLS',
)
fig.savefig(
    fname=a.folder + r'\5Comparation.pdf',
    format='pdf',
)


In [None]:
## Regression
reg_a = a.copy()

df = reg_a.d
df['x0'] = 1
result_OLS = sm.OLS(df['y'], df[['x', 'x0']]).fit()
means = result_OLS.params
conf_int = result_OLS.conf_int(0.005).loc

conf_int['x0'] = np.exp(conf_int['x0'])
D0 = np.exp(means['x0'])
dD0 = (conf_int['x0'].max() - conf_int['x0'].min()) / 2
nu0_power = round(np.log10(dD0))

conf_int['x'] = -8.314 * conf_int['x'] 
E = -8.314 * means['x'] 
dE = (conf_int['x'].max() - conf_int['x'].min()) / 2
EkJ_power = round(np.log10(dE)) + 1

exp_name = reg_a.experiment_name
w = reg_a.folder.split('(')[1][:-1]
print(
    f'Constants {exp_name} ({w}% mass):',
    f'E  = {E/10**EkJ_power: <7.3f} ± {dE/10**EkJ_power: <3.2f} J *1e{EkJ_power}',
    f'D0 = {D0/10**nu0_power: <7.3f} ± {dD0/10**nu0_power: <3.2f} m2*s *1e{nu0_power}',
    sep='\n',
)
print(result_OLS.summary2())

In [None]:
## Save result
desc = input('Description')
results.loc[len(results)] = [
    E, dE, D0, dD0, reg_a.experiment_name, w, desc
]
reg_a.dump(**results.loc[len(results) - 1].to_dict())
results