In [None]:
import os
import copy

import pandas as pd
import numpy as np
import sympy as sp
from sklearn import linear_model
import statsmodels.api as sm

import matplotlib.pyplot as plt
from matplotlib.widgets import Slider
import seaborn as sns
import plotly
import plotly.graph_objects as go
import plotly.express as px

%matplotlib qt
pd.set_option('mode.chained_assignment', None)
plt.rc('xtick',labelsize=10)
plt.rc('ytick',labelsize=10)
plt.rc('font', size=10) #controls default text size
plt.rc('axes', titlesize=16) #fontsize of the title
plt.rc('axes', labelsize=15) #fontsize of the x and y labels
plt.rc('legend', fontsize=10)
plt.rcParams["figure.figsize"] = (10,7)

In [None]:
x_T, x_V, x_D = sp.symbols('T nu D')
c_K, c_C, c_cP, c_m2s = sp.symbols('K C cP m2*s')


class XYContainer:

    d: pd.DataFrame = None
    _x = x_T
    _x_sig = c_C
    _y = x_V
    _y_sig = c_cP

    def __init__(self, data=None, name='',x=(x_T,c_C),y=(x_V,c_cP)):
        self.d = data
        self.folder = None
        self.experiment_name = name
        self._log = []
        self.info = {}

    @property
    def x_label(self):
        return f"{self._x} [{self._x_sig}]"

    @property
    def y_label(self):
        return f"{self._y} [{self._y_sig}]"

    def load_csv(self, path=''):
        path, self.folder, self.experiment_name = self._input_path(path)
        self.d = pd.read_csv(path)

        print(f'Loaded: {self.experiment_name}')
        self.log(f'csv loaded from: {path}')
        return self.d

    def load_hdf5(self, path=''):
        path, self.folder, self.experiment_name = self._input_path(path)

        with pd.HDFStore(path) as file:
            data = file['data']
            meta = file.get_storer('data').attrs.meta

        self._log.extend(meta['log'])
        self.log(f'hdf5 loaded from: {path}')
        meta.pop('log')
        self.info.update(meta)
        return self.d

    def dump(self, **kwargs):
        path = f'{self.folder}\{self.experiment_name}'
        kwargs.update({'log': self.log()})
        with pd.HDFStore(f'{path}.hdf5') as file:
            file.put('data', self.d)
            file.get_storer('data').attrs.meta = kwargs

    def copy(self):
        new = copy.deepcopy(self)
        return new

    def log(self, msg=None):
        if msg is not None: self._log.append(msg)
        return self._log

    @staticmethod
    def _input_path(path):
        while (path == '') or (not os.path.isfile(path)):
            path = input(f"Input data path: ")
        _path_list = (path).split('\\')
        folder = '\\'.join(_path_list[:-1])
        experiment_name = _path_list[-1].split('.')[0]
        return path, folder, experiment_name


class FilterContainer(XYContainer):

    def initial_filter(
            self,
            time_in_minutes=False,
            x=(-np.inf, np.inf),
            y=(0, np.inf),
            time=(0, np.inf),
    ):

        conditions = dict(x=x, y=y, time=time)
        self.d.rename(
            columns={
                'Temperature': 'x', 'Viscosity': 'y'
            },
            inplace=True,
        )
        temperature_cond = ((x[0] < self.d['x']) & (self.d['x'] < x[1]))
        viscosity_cond = ((y[0] < self.d['y']) & (self.d['y'] < y[1]))
        time_cond = ((time[0] < self.d['time']) & (self.d['time'] < time[1]))

        self.d = self.d[temperature_cond & viscosity_cond & time_cond]
        if time_in_minutes: self.d['time'] / 60
        self.log(('initial_filter', conditions))
        return self.d


    def mask_filter(
        self,
        filter_func,
        by='x',
        column='y',
    ):
        group = self.d.groupby(by=by)[column]
        mask = group.apply(filter_func).droplevel([0]).sort_index().to_numpy()
        self.d = self.d[mask]
        self.log(('mask_filter', filter_func.__name__))
        return self.d



class MutableContainer(XYContainer):

    def nu_to_diffusion(self):
        k = 1.380649 * 1e-23
        df = self.d
        df['x'] = df['x'] + 273.15
        df['y'] = k * df['x'] / (df['y'] * 0.001)

        self._x, self._x_sig = x_T, c_K
        self._y, self._y_sig = x_D, c_m2s
        self.log('to_diffusion')
        return self.d

    def K_to_C(self):
        df = self.d
        df['x'] = df['x'] - 273.15
        return self.d

    def linearize(self):
        T_func = lambda t: 1 / t
        v_func = lambda nu: np.log(nu)
        df = self.d
        df['x'] = df['x'].apply(T_func)
        df['y'] = df['y'].apply(v_func)

        self._x, self._x_sig = 1 / self._x, 1 / self._x_sig
        self._y, self._y_sig = sp.ln(self._y), sp.ln(self._y_sig)
        self.log('linearize')
        return self.d

    def delinearize(self):
        t_func = lambda T: 1 / T
        nu_func = lambda v: np.exp(v)
        self.d['x'] = self.d['x'].apply(t_func)
        self.d['y'] = self.d['y'].apply(nu_func)

        self._x, self._x_sig = 1 / self._x, 1 / self._x_sig
        self._y, self._y_sig = sp.E**(self._y), sp.E**(self._y_sig)
        self.log('delinearize')
        return self.d

class SpecialContainer(FilterContainer, MutableContainer):
    ...

In [None]:
## Filtration
def z_filter(data: pd.Series):
    mean = data.mean()
    s = data.std(ddof=0) + 1e-50
    z_score = np.abs((data-mean) / s) < 1
    return z_score


# def z_filter(data: pd.Series):
#     return stats.zscore(data)<1


def whisker_iqr_filter(data: pd.Series):
    whisker_width = 0.5
    q1 = data.quantile(0.25)
    q3 = data.quantile(0.75)
    iqr = q3 - q1
    return (data >= q1 - whisker_width*iqr) & (data <= q3 + whisker_width*iqr)


def iqr_filter(data: pd.Series):
    q1 = data.quantile(0.25)
    q3 = data.quantile(0.75)
    iqr = q3 - q1

    return np.abs((data - data.median()) / iqr) < 1


def my_z_filter(data: pd.Series):
    mean = data.median()
    s = data.std() + 1e-50
    z_score = np.abs((data-mean) / s) < 0.1
    return z_score


# Pipeline

In [None]:
results = pd.DataFrame(
    columns=['E_J', 'dE_J', 'D0_m2s', 'dD0', 'name', 'w_mass', 'desc'])

In [None]:
results.to_excel(r'Experiments\MultiplyTemperature\Results.xlsx')

In [None]:
a = SpecialContainer()
a.load_csv()

fig, ax_v = plt.subplots()
ax_T = ax_v.twinx()
ax_v.scatter( a.d['time'],  a.d['Viscosity'], color='red', marker='.')
ax_v.set_ylabel('Viscosity [cP]', color='red')
ax_v.set_xlabel('time [s]')
ax_T.scatter( a.d['time'],  a.d['Temperature'], color='blue', marker='.')
ax_T.set_ylabel('Temperature [C]', color='blue')
a.experiment_name

In [None]:
a.initial_filter(time=(0, np.inf), y=(20, 350), x=(12, 42))

In [None]:
##Save Temporal plots
plot = a.copy()
fig, ax_v = plt.subplots()
ax_T = ax_v.twinx()

ax_v.scatter(plot.d['time'], plot.d['y'], color='red', marker='.')
ax_v.set_ylabel(plot.y_label, color='red')
ax_v.set_xlabel('time [s]')

ax_T.scatter(plot.d['time'], plot.d['x'], color='blue', marker='.')
ax_T.set_ylabel(plot.x_label, color='blue')

In [None]:
##Save Temperature plots
plot = a.copy()

fig, ax = plt.subplots()
colors = plot.d['Viscosity_verbose'].replace({
    'OK': 'g',
    'OK_inner': 'b',
    'image_sweep_check': 'r',
    'combine_check': 'w',
})
ax.set_xlabel(plot.x_label)
ax.set_ylabel(plot.y_label)

ax.scatter(x=plot.d['x'], y=plot.d['y'], c=colors, s=5)

sns.lineplot(
    ax=ax,
    data=plot.d,
    x="x",
    y="y",
    estimator='mean',
    errorbar=("sd", 1),
    label='mean',
)
sns.lineplot(
    ax=ax,
    data=plot.d,
    x="x",
    y="y",
    errorbar=('pi', 68),
    estimator="median",
    label='median',
)


In [None]:
a.nu_to_diffusion()
a.linearize()
a.mask_filter(iqr_filter)

plot = a.copy()
fig, ax = plt.subplots()
ax.set_xlabel(plot.x_label)
ax.set_ylabel(plot.y_label)
colors = plot.d['Viscosity_verbose'].replace({
    'OK': 'g',
    'OK_inner': 'b',
    'image_sweep_check': 'r',
    'combine_check': 'w',
})
ax.scatter(x=plot.d['x'], y=plot.d['y'], c=colors, s=5)

sns.lineplot(
    ax=ax,
    data=plot.d,
    x="x",
    y="y",
    estimator='mean',
    errorbar=("sd", 1),
    # err_style="bars",
    label='mean',
)
sns.lineplot(
    ax=ax,
    data=plot.d,
    x="x",
    y="y",
    errorbar=('pi', 50),
    estimator="median",
    label='median',
)


In [None]:
##Save fast OLS plot
def regress(data):
    reg = linear_model.LinearRegression(fit_intercept=True)
    X = np.array([data['x']]).T
    Y = np.array(data['y'])
    reg.fit(X, Y)

    w_T = reg.coef_[0]
    w_D = reg.intercept_

    D0 = np.exp(w_D)
    E = -8.314 * w_T 

    def TC_func(T, E=E, D0=D0):
        return D0 * np.exp(-E / (8.314*T))

    return D0, E, TC_func


fig, ax = plt.subplots()
fig.subplots_adjust(
    top=0.9,
    bottom=0.1,
    left=0.1,
    right=0.9,
    hspace=0.2,
    wspace=0.2,
)

plot = a.copy()
w = plot.folder.split('(')[1][:-1]
ax.set_title(f'{a.experiment_name}: ({w}% mass)')

ax.scatter(plot.d['x'], plot.d['y'], color='gray', marker='.')
D0, E, func = regress(plot.d)
sns.regplot(
    ax=ax,
    data=plot.d,
    x='x',
    y='y',
    scatter=False,
    truncate=False,
    order=1,
    label=f'All:\nE= {E/1000:5.2f} kJ\nNu0= {D0:5.3e} cP',
)
ax.set_xlabel(plot.x_label)
ax.set_ylabel(plot.y_label)
plt.legend()

In [None]:
## Save Temperature plots
x=np.linspace(273.15 + 13, 273.15 + 42, 100)
ols_res = SpecialContainer(
    pd.DataFrame({
        'x': x,
        'y': func(x)
    }),x=(x_T,c_K),y=(x_D,c_m2s))

plot = a.copy()
plot.delinearize()
res_plot = ols_res.copy()

fig, ax = plt.subplots()
colors = plot.d['Viscosity_verbose'].replace({
    'OK': 'g',
    'OK_inner': 'b',
    'image_sweep_check': 'r',
    'combine_check': 'w',
})
ax.set_xlabel(plot.x_label)
ax.set_ylabel(plot.y_label)

ax.scatter(x=plot.d['x'], y=plot.d['y'], c=colors, s=5,label='Real')
sns.lineplot(
    ax=ax,
    data=res_plot.d,
    x="x",
    y="y",
    estimator='mean',
    label ='OLS'
)

In [None]:
## Regression
reg_a = a.copy()

df = reg_a.d
df.rename(inplace=True, columns={'x': 'E', 'y': 'nu'})
df['nu0'] = 1
result_OLS = sm.OLS(df['nu'], df[['E', 'nu0']]).fit()
means = result_OLS.params
conf_int = result_OLS.conf_int(0.005).loc

conf_int['nu0'] = np.exp(conf_int['nu0'])
nu0 = np.exp(means['nu0'])
nu0_diap = (conf_int['nu0'].max() - conf_int['nu0'].min()) / 2
nu0_power = round(np.log10(nu0_diap))

conf_int['E'] = 8.314 * conf_int['E'] / 1000
EkJ = 8.314 * means['E'] / 1000
EkJ_diap = (conf_int['E'].max() - conf_int['E'].min()) / 2
EkJ_power = round(np.log10(EkJ_diap)) + 1

exp_name = reg_a.experiment_name
w = reg_a.folder.split('(')[1][:-1]
print(
    f'Constants {exp_name} ({w}% mass):',
    f'E   = {EkJ/10**EkJ_power: <10.3f} ± {EkJ_diap/10**EkJ_power: <3.2f} kJ *1e{EkJ_power}',
    f'Nu0 = {nu0/10**nu0_power: <10.3f} ± {nu0_diap/10**nu0_power: <3.2f} cP *1e{nu0_power}',
    sep='\n',
)
print(result_OLS.summary2())

In [None]:
## Save result
desc = input('Description')
results.loc[len(results)] = [
    EkJ, EkJ_diap, nu0, nu0_diap, reg_a.experiment_name, w, desc
]
reg_a.dump(**results.loc[len(results) - 1].to_dict())
results

# Load raw and save ready data

In [None]:
def read_raw(path='', index_col=None):
    path = ''
    while (path == '') or (not os.path.isfile(path)):
        path = input(f"Input data path: ")
    _path_list = (path).split('\\')
    folder_path = '\\'.join(_path_list[:-1])
    data_name = _path_list[-1]
    print(f'    Folder:\n{folder_path}\n    Data name:\n {data_name}')
    return (folder_path, data_name), pd.read_csv(path, index_col=index_col)


In [None]:
(folder_path, data_name), df = read_raw(index_col=0)
df

In [None]:
rules = {
    1: 'image_sweep_check',
    2: 'combine_check',  # 'inner_processor_check':'OK_inner'
    0: 'OK'
}
col_rules = {
    'Viscosity_mark': 'Viscosity_verbose',
    'Temperature_mark': 'Temperature_verbose',
}

# df1 = df1.rename(columns=col_rules)
# df1[['Viscosity_verbose','Temperature_verbose']] =\
#     df1[['Viscosity_verbose','Temperature_verbose']].replace(rules)

# df2 = df2.rename(columns=col_rules)
# df2[['Viscosity_verbose','Temperature_verbose']] = df2[['Viscosity_verbose','Temperature_verbose']].replace(rules)
# df2['time'] =df2['time']+ df1['time'].max()+1

# df = pd.concat([df1,df2])
np.any(df['time'].duplicated())

In [None]:
df.dropna().to_csv(f'{folder_path}\{data_name}', index=0)

# Load

In [None]:
a = SpecialContainer()
a.load_csv('Experiments\MultiplyTemperature\Exp0(0)\Exp0_up.csv')

## Temporal plots
df = a.d
# df = test_data

fig, ax_v = plt.subplots()
ax_T = ax_v.twinx()

ax_v.scatter(df['time'], df['Viscosity'], color='red', marker='.')
ax_v.set_ylabel('Viscosity [cP]', color='red')
ax_v.set_xlabel('time [s]')

ax_T.scatter(df['time'], df['Temperature'], color='blue', marker='.')
ax_T.set_ylabel('Temperature [C]', color='blue')
a.experiment_name

# Process

In [None]:
a.initial_filter(time=(0, np.inf), y=(20, 350), x=(12, 42))
base = a.copy()

In [None]:
a = base.copy()

In [None]:
a.nu_to_diffusion();

In [None]:
a.linearize();

In [None]:
a.mask_filter(iqr_filter);

In [None]:
a.delinearize()
b.delinearize()

In [None]:
a.K_to_C()
b.K_to_C()

 # Plots

In [None]:
## Temporal plots
plot = a.copy()
fig, ax_v = plt.subplots()
ax_T = ax_v.twinx()

ax_v.scatter(plot.d['time'], plot.d['y'], color='red', marker='.')
ax_v.set_ylabel(plot.y_label, color='red')
ax_v.set_xlabel('time [s]')

ax_T.scatter(plot.d['time'], plot.d['x'], color='blue', marker='.')
ax_T.set_ylabel(plot.x_label, color='blue')


In [None]:
## Temperature plots
plot = a.copy()

fig, ax = plt.subplots()
colors = plot.d['Viscosity_verbose'].replace({
    'OK': 'g',
    'OK_inner': 'b',
    'image_sweep_check': 'r',
    'combine_check': 'w',
})
ax.set_xlabel(plot.x_label)
ax.set_ylabel(plot.y_label)

ax.scatter(x=plot.d['x'], y=plot.d['y'], c=colors, s=5)

sns.lineplot(ax=ax,
             data=plot.d,
             x="x",
             y="y",
             estimator='mean',
             errorbar=("sd", 1),
             label='mean')
sns.lineplot(ax=ax,
             data=plot.d,
             x="x",
             y="y",
             errorbar=('pi', 68),
             estimator="median",
             label='median')

plot= b.copy()
sns.lineplot(
    ax=ax,
    data=plot.d,
    x="x",
    y="y",
    estimator='mean',
    # errorbar=("sd", 1),
    # err_style="bars",
    label ='OLS'
)

# sns.lineplot(
#     ax=ax,
#     data=plot.data,
#     x="Temperature",
#     y="Viscosity",
#     errorbar=('pi',68),
#     estimator="median",
#     label ='median2'
# )

In [None]:
##OLS plot
def regress(data):
    reg = linear_model.LinearRegression(fit_intercept=True)
    X = np.array([data['x']]).T
    Y = np.array(data['y'])
    reg.fit(X, Y)

    w_T = reg.coef_[0]
    w_D = reg.intercept_

    D0 = np.exp(w_D)
    E = -8.314 * w_T 

    def TC_func(T, E=E, D0=D0):
        return D0 * np.exp(-E / (8.314*T))

    return D0, E, TC_func


fig, ax = plt.subplots()
fig.subplots_adjust(
    top=0.9,
    bottom=0.1,
    left=0.1,
    right=0.9,
    hspace=0.2,
    wspace=0.2,
)

plot = a.copy()
w = plot.folder.split('(')[1][:-1]
ax.set_title(f'{a.experiment_name}: ({w}% mass)')

ax.scatter(plot.d['x'], plot.d['y'], color='gray', marker='.')
D0, E, func = regress(plot.d)
sns.regplot(
    ax=ax,
    data=plot.d,
    x='x',
    y='y',
    scatter=False,
    truncate=False,
    order=1,
    label=f'All:\nE= {E/1000:5.2f} kJ\nNu0= {D0:5.3e} cP',
)
ax.set_xlabel(plot.x_label)
ax.set_ylabel(plot.y_label)
plt.legend()

In [None]:
x=np.linspace(273.15 + 13, 273.15 + 42, 100)
b = SpecialContainer(
    pd.DataFrame({
        'x': x,
        'y': func(x)
    }),x=(x_T,c_K),y=(x_D,c_m2s))
b.linearize()

# Regression

In [None]:
## Regression
reg_a = a.copy()

df = reg_a.d
df.rename(inplace=True, columns={'x': 'E', 'y': 'ln_nu'})
df['ln_nu0'] = 1
result_OLS = sm.OLS(df['ln_nu'], df[['E', 'ln_nu0']]).fit()
means = result_OLS.params
conf_int = result_OLS.conf_int(0.005).loc

conf_int['ln_nu0'] = np.exp(conf_int['ln_nu0'])
nu0 = np.exp(means['ln_nu0'])
nu0_diap = (conf_int['ln_nu0'].max() - conf_int['ln_nu0'].min()) / 2
nu0_power = round(np.log10(nu0_diap))

conf_int['E'] = 8.314 * conf_int['E'] / 1000
EkJ = 8.314 * means['E'] / 1000
EkJ_diap = (conf_int['E'].max() - conf_int['E'].min()) / 2
EkJ_power = round(np.log10(EkJ_diap)) + 1

exp_name = reg_a.experiment_name
w = reg_a.folder.split('(')[1][:-1]
print(
    f'Constants {exp_name} ({w}% mass):',
    f'E   = {EkJ/10**EkJ_power: <10.3f} ± {EkJ_diap/10**EkJ_power: <3.2f} kJ *1e{EkJ_power}',
    f'Nu0 = {nu0/10**nu0_power: <10.3f} ± {nu0_diap/10**nu0_power: <3.2f} cP *1e{nu0_power}',
    sep='\n',
)
print(result_OLS.summary2())

# Interactive

In [None]:
## Time
def temp_plot(
    data,
    name,
    y2_axix=False,
    symbol='circle',
    colors=plotly.colors.qualitative.Plotly,
):
    verboses = data[f'{name}_verbose'].unique()
    rules = zip(
        verboses,
        [round(i) for i in np.linspace(0, len(colors) - 1, len(verboses))],
    )
    colors_rule = {key: colors[color] for key, color in rules}

    scatter = go.Scatter(
        x=data['time'],
        y=data[name],
        name=name,
        mode='markers',
        yaxis='y2' if y2_axix else 'y',
        text=data[f'{name}_verbose'],
        hovertemplate='%{y}<br>%{text}',
        marker=dict(
            size=5,
            symbol=symbol,
            color=data[f'{name}_verbose'].replace(colors_rule),
        ),
    )
    return scatter


a = a
fig = go.Figure()
fig.add_traces([
    temp_plot(
        a.d,
        'y',
        colors=plotly.colors.sequential.Aggrnyl,
    ),
    temp_plot(
        a.d,
        'x',
        y2_axix=True,
        symbol='star',
        colors=plotly.colors.sequential.Sunsetdark_r,
    )
])
fig.update_layout(
    hovermode="x unified",
    margin=dict(l=0, r=0, b=0, t=0, pad=0),
    yaxis2=dict(
        overlaying='y',
        side='right',  # range=[15, 20],
    ),  # xaxis=dict(range=[0, 500], ),
)
fig.show()

In [None]:
## Temperature
fig = go.Figure()
fig = px.scatter(
    a.d,
    x="Temperature",
    y="Viscosity",
    color='Viscosity_verbose',
)

## Mean and median
fig.add_traces([
    go.Scatter(name='Mean',
               x=a.groupT.mean().index,
               y=a.groupT.mean()['y'],
               mode='lines',
               line=dict(color='red')),
    go.Scatter(name='Median',
               x=a.groupT.median().index,
               y=a.groupT.median()['y'],
               mode='lines',
               line=dict(color='black')),
])

## Comparation
# test_plot= test
# fig.add_trace(
#     go.Scatter(name='Test',
#                x=test_plot.data['x'],
#                y=test_plot.data['y'],
#                mode='markers',
#                line=dict(color='green'),
#                marker=dict(size=10)), )

# fig.add_traces([
#     go.Scatter(name='Mean comp',
#                x=test_plot.groupT.mean().index,
#                y=test_plot.groupT.mean()['y'],
#                mode='lines',
#                line=dict(color='red', width=2, dash='dot')),
#     go.Scatter(name='Median comp',
#                 x=test_plot.groupT.median().index,
#                 y=test_plot.groupT.median()['y'],
#                 mode='lines',
#                 line=dict(color='black', width=2, dash='dot')),
# ])

##Regression
# x = reg_data.index
# fig.add_trace(
#     go.Scatter(
#         name='Regression',
#         x=x,
#         y=reg_func(x),
#         mode='lines',
#         line=dict(color='green', width=2, dash='dot'),
#     ), )

fig.update_layout(hovermode="x unified", margin=dict(l=0, r=0, b=0, t=0, pad=0))
fig.show()

In [None]:
## Distribution
fig, ax = plt.subplots()
sns.histplot(
    ax=ax,
    data=df[df['x'] == 34.0],
    x="Viscosity",
    hue="Viscosity_verbose",
    binwidth=0.1,
)


def update(val):
    temp = slider_temp.val
    ax.cla()
    # ax.hist(
    #     df[df['x'] == temp]['y'],
    #     bins=5,
    #     alpha=0.1,
    # )
    sns.histplot(
        ax=ax,
        data=df[df['x'] == temp],
        x="Viscosity",
        hue="Viscosity_verbose",
        binwidth=0.1,
    )
    plt.draw()


time_slider_ax = fig.add_axes([0.25, 0.1, 0.65, 0.03])
slider_temp = Slider(
    ax=time_slider_ax,
    label='x',
    valmin=14,
    valmax=41,
    valinit=14,
    valstep=0.1,
)
slider_temp.on_changed(update)
