In [35]:
import os
import json
import pandas as pd
import numpy as np
from scipy import stats
from sklearn import linear_model
import statsmodels.api as sm

import matplotlib.pyplot as plt
import seaborn as sns
import plotly 
import plotly.graph_objects as go
import plotly.express as px

%matplotlib qt
pd.set_option('mode.chained_assignment', None)

# Process modules

In [2]:
class Container:

    data: pd.DataFrame = None

    def __init__(self, data=None, name=''):
        self.data = data
        self.experiment_name = name
        self._log = []

    def load_csv(self, path=''):
        path, self.folder, self.experiment_name = self._input_path(path)

        self.raw_data = pd.read_csv(path)
        print(f'Loaded: {self.experiment_name}')
        self.log(f'csv loaded from: {path}')
        return self.raw_data,self.log()

    def load_hdf5(self, path=''):
        path, self.folder, self.experiment_name = self._input_path(path)

        with pd.HDFStore(path) as file:
            data = file['data']
            log = file.get_storer('data').attrs.log
        self.log(f'hdf5 loaded from: {path}')
        return data, log

    def dump(self):
        path = f'{self.folder}\{self.experiment_name}'

        with pd.HDFStore(f'{path}.hdf5') as file:
            file.put('data', self.data)
            file.get_storer('data').attrs.log = self.log()

    def initial_filter(
            self,
            time_in_minutes=False,
            temperature=(-np.inf, np.inf),
            viscosity=(0, np.inf),
            time=(0, np.inf),
    ):
        conditions = dict(
            temperature=temperature,
            viscosity=viscosity,
            time=time,
        )

        temperature_cond = (
            (temperature[0] < self.raw_data['Temperature']) &\
            (self.raw_data['Temperature'] < temperature[1])
            )
        viscosity_cond = (
            (viscosity[0] < self.raw_data['Viscosity']) &\
            (self.raw_data['Viscosity'] < viscosity[1])
            )
        time_cond = (
            (time[0] < self.raw_data['time']) &\
            (self.raw_data['time'] < time[1])
            )

        self.data = self.raw_data[
            temperature_cond &\
            viscosity_cond &\
            time_cond]
        if time_in_minutes: self.data['time'] / 60
        self.log(('initialy filtered', conditions))
        return self.data

    def mask_filter(
        self,
        filter_func,
        by='Temperature',
        column='Viscosity',
    ):
        group = self.data.groupby(by=by)[column]
        mask = group.apply(filter_func).droplevel([0]).sort_index().to_numpy()
        self.data = self.data[mask]
        self.log(('filtered', filter_func.__name__))
        return self.data

    def linearize(self):
        T_func = lambda t: 1 / (t+273.15)
        v_func = lambda nu: np.log(nu)
        self.data['Temperature'] = self.data['Temperature'].apply(T_func)
        self.data['Viscosity'] = self.data['Viscosity'].apply(v_func)
        self.log('linearised')
        return self.data

    def copy(self):
        new = Container(self.data.copy(), name=f'{self.experiment_name}')
        new._log.extend(self.log()[:])
        return new

    def log(self, msg=None):
        if msg is not None: self._log.append(msg)
        return self._log

    @staticmethod
    def _input_path(path):
        while (path == '') or (not os.path.isfile(path)):
            path = input(f"Input data path: ")
        _path_list = (path).split('\\')
        folder = '\\'.join(_path_list[:-1])
        experiment_name = _path_list[-1].split('.')[0]
        return path, folder, experiment_name


In [3]:
## Filtration
def z_filter(data: pd.Series):
    mean = data.mean()
    s = data.std(ddof=0)+1e-50
    z_score= np.abs((data-mean) / s) <1
    return z_score

# def z_filter(data: pd.Series):
#     return stats.zscore(data)<1

def whisker_iqr_filter(data: pd.Series):
    whisker_width=0.5
    q1 = data.quantile(0.25)
    q3 = data.quantile(0.75)
    iqr = q3 - q1
    return (data >= q1 - whisker_width*iqr) & (data <= q3 + whisker_width*iqr)

def iqr_filter(data: pd.Series):
    q1 = data.quantile(0.25)
    q3 = data.quantile(0.75)
    iqr = q3 - q1
    
    return np.abs((data - data.median()) / iqr) < 1

def my_z_filter(data: pd.Series):
    mean = data.median()
    s = data.std()+1e-50
    z_score= np.abs((data-mean) / s) <0.1
    return z_score

In [4]:
def regress(data):
    reg = linear_model.LinearRegression(fit_intercept=True)
    X = np.array([data['Temperature']]).T
    Y = np.array(data['Viscosity'])
    reg.fit(X, Y)

    w = reg.coef_[0]
    w0 = reg.intercept_

    nu0 = np.exp(w0)
    EkJ = 8.314 * w / 1000

    def TC_func(t,EkJ=EkJ,nu0 =nu0):
        Tt = 1 / (t+273.15)
        return nu0*np.exp(EkJ*1000/8.314*Tt)

    return nu0, EkJ,TC_func


# Load raw and save ready data

In [None]:
def read_raw(path='',index_col=None):
    path = ''
    while (path == '') or (not os.path.isfile(path)):
        path = input(f"Input data path: ")
    _path_list = (path).split('\\')
    folder_path = '\\'.join(_path_list[:-1])
    data_name = _path_list[-1]
    print(f'    Folder:\n{folder_path}\n    Data name:\n {data_name}')
    return (folder_path,data_name), pd.read_csv(path,index_col=index_col)

In [None]:
(folder_path,data_name),df = read_raw(index_col=0)
df

In [None]:
rules = {
    1: 'image_sweep_check',
    2: 'combine_check',  # 'inner_processor_check':'OK_inner'
    0: 'OK'
}
col_rules = {
    'Viscosity_mark': 'Viscosity_verbose',
    'Temperature_mark': 'Temperature_verbose',
}

# df1 = df1.rename(columns=col_rules)
# df1[['Viscosity_verbose','Temperature_verbose']] =\
#     df1[['Viscosity_verbose','Temperature_verbose']].replace(rules)

# df2 = df2.rename(columns=col_rules)
# df2[['Viscosity_verbose','Temperature_verbose']] = df2[['Viscosity_verbose','Temperature_verbose']].replace(rules)
# df2['time'] =df2['time']+ df1['time'].max()+1

# df = pd.concat([df1,df2])
np.any(df['time'].duplicated())

In [None]:
df.dropna().to_csv(f'{folder_path}\{data_name}', index=0)

# Load

In [5]:
a = Container()
a.load_csv('Experiments\MultiplyTemperature\Exp5(12.5)\Exp5_down.csv')[0]

Loaded: Exp5_down


Unnamed: 0,time,Viscosity,Viscosity_verbose,Temperature,Temperature_verbose
0,0.0,17.9,OK,40.2,OK
1,1.0,17.9,OK_inner,40.2,OK_inner
2,2.0,17.9,OK,40.2,OK_inner
3,3.0,17.9,OK_inner,40.2,OK
4,4.0,17.9,OK_inner,40.2,OK_inner
...,...,...,...,...,...
7509,7898.0,83.4,OK_inner,12.8,OK
7510,7899.0,83.5,OK,12.8,OK
7511,7900.0,83.5,OK,12.8,OK
7512,7901.0,83.2,OK_inner,12.8,OK


In [154]:
b = Container()
b.load_csv('Experiments\MultiplyTemperature\Exp5(12.5)\Exp5_up.csv')

Loaded: Exp5_up


(        time  Viscosity  Viscosity_verbose  Temperature Temperature_verbose
 0        0.0       69.1  image_sweep_check         14.8                  OK
 1        1.0       69.0           OK_inner         14.8                  OK
 2        2.0       69.3  image_sweep_check         14.8                  OK
 3        3.0       69.1  image_sweep_check         14.7            OK_inner
 4        4.0       69.1           OK_inner         14.7                  OK
 ...      ...        ...                ...          ...                 ...
 7396  7602.0       17.9           OK_inner         40.2                  OK
 7397  7603.0       17.9           OK_inner         40.2                  OK
 7398  7604.0       17.9           OK_inner         40.2                  OK
 7399  7605.0       17.9                 OK         40.2                  OK
 7400  7606.0       17.9           OK_inner         40.2                  OK
 
 [7401 rows x 5 columns],
 ['csv loaded from: Experiments\\MultiplyTempera

# Process

In [155]:
# Raw data info
_data = b.raw_data

_lim = _data.groupby(by='Viscosity')['Temperature'].median()
print(f'Temperature: {_lim.min()} - {_lim.max()}')

_lim =_data.groupby(by='Temperature')['Viscosity'].median()
print(f'Viscosity: {_lim.min()} - {_lim.max()}')

fig, ax_v = plt.subplots()
ax_T = ax_v.twinx()

ax_v.scatter(_data['time'], _data['Viscosity'], color='red', marker='.')
ax_v.set_ylabel('Viscosity', color='red')
ax_v.set_xlabel('time')

ax_T.scatter(_data['time'], _data['Temperature'], color='blue', marker='.')
ax_T.set_ylabel('Temperature', color='blue')

Temperature: 14.7 - 40.2
Viscosity: 17.9 - 70.0


Text(0, 0.5, 'Temperature')

In [6]:
a.initial_filter(time=(63, np.inf), viscosity=(0, 100), temperature=(12, 40))

Unnamed: 0,time,Viscosity,Viscosity_verbose,Temperature,Temperature_verbose
64,64.0,18.0,OK,39.7,OK_inner
65,65.0,18.1,OK_inner,39.6,OK
66,66.0,18.1,OK_inner,39.6,OK_inner
67,67.0,18.1,OK_inner,39.6,OK
68,68.0,18.1,OK_inner,39.6,OK
...,...,...,...,...,...
7509,7898.0,83.4,OK_inner,12.8,OK
7510,7899.0,83.5,OK,12.8,OK
7511,7900.0,83.5,OK,12.8,OK
7512,7901.0,83.2,OK_inner,12.8,OK


In [7]:
a.linearize()

Unnamed: 0,time,Viscosity,Viscosity_verbose,Temperature,Temperature_verbose
64,64.0,2.890372,OK,0.003196,OK_inner
65,65.0,2.895912,OK_inner,0.003197,OK
66,66.0,2.895912,OK_inner,0.003197,OK_inner
67,67.0,2.895912,OK_inner,0.003197,OK
68,68.0,2.895912,OK_inner,0.003197,OK
...,...,...,...,...,...
7509,7898.0,4.423648,OK_inner,0.003497,OK
7510,7899.0,4.424847,OK,0.003497,OK
7511,7900.0,4.424847,OK,0.003497,OK
7512,7901.0,4.421247,OK_inner,0.003497,OK


In [None]:
base = a.copy()

# Interactive

In [None]:
#Time
def temp_plot(
    data,
    name,
    y2_axix=False,
    symbol='circle',
    colors=plotly.colors.qualitative.Plotly,
):
    verboses = data[f'{name}_verbose'].unique()
    rules = zip(
        verboses,
        [round(i) for i in np.linspace(0, len(colors) - 1, len(verboses))],
    )
    colors_rule = {key: colors[color] for key, color in rules}

    scatter = go.Scatter(
        x=data['time'],
        y=data[name],
        name=name,
        mode='markers',
        yaxis='y2' if y2_axix else 'y',
        text=data[f'{name}_verbose'],
        hovertemplate='%{y}<br>%{text}',
        marker=dict(
            size=5,
            symbol=symbol,
            color=data[f'{name}_verbose'].replace(colors_rule),
        ),
    )
    return scatter


a = a
fig = go.Figure()
fig.add_traces([
    temp_plot(
        a.data,
        'Viscosity',
        colors=plotly.colors.sequential.Aggrnyl,
    ),
    temp_plot(
        a.data,
        'Temperature',
        y2_axix=True,
        symbol='star',
        colors=plotly.colors.sequential.Sunsetdark_r,
    )
])
fig.update_layout(
    hovermode="x unified",
    margin=dict(l=0, r=0, b=0, t=0, pad=0),
    yaxis2=dict(
        overlaying='y',
        side='right',  # range=[15, 20],
    ),  # xaxis=dict(range=[0, 500], ),
)
fig.show()

In [None]:
fig = go.Figure()
fig = px.scatter(
    a.data,
    x="Temperature",
    y="Viscosity",
    color='Viscosity_verbose',
)

## Mean and median
fig.add_traces([
    go.Scatter(name='Mean',
               x=a.groupT.mean().index,
               y=a.groupT.mean()['Viscosity'],
               mode='lines',
               line=dict(color='red')),
    go.Scatter(name='Median',
               x=a.groupT.median().index,
               y=a.groupT.median()['Viscosity'],
               mode='lines',
               line=dict(color='black')),
])

## Comparation
# test_plot= test
# fig.add_trace(
#     go.Scatter(name='Test',
#                x=test_plot.data['Temperature'],
#                y=test_plot.data['Viscosity'],
#                mode='markers',
#                line=dict(color='green'),
#                marker=dict(size=10)), )

# fig.add_traces([
#     go.Scatter(name='Mean comp',
#                x=test_plot.groupT.mean().index,
#                y=test_plot.groupT.mean()['Viscosity'],
#                mode='lines',
#                line=dict(color='red', width=2, dash='dot')),
#     go.Scatter(name='Median comp',
#                 x=test_plot.groupT.median().index,
#                 y=test_plot.groupT.median()['Viscosity'],
#                 mode='lines',
#                 line=dict(color='black', width=2, dash='dot')),
# ])

##Regression
# x = reg_data.index
# fig.add_trace(
#     go.Scatter(
#         name='Regression',
#         x=x,
#         y=reg_func(x),
#         mode='lines',
#         line=dict(color='green', width=2, dash='dot'),
#     ), )

fig.update_layout(hovermode="x unified", margin=dict(l=0, r=0, b=0, t=0, pad=0))
fig.show()

# Plots

In [8]:
## Temporal plots
df =a.data
# df = test_data

fig, ax_v = plt.subplots()
ax_T = ax_v.twinx()

ax_v.scatter(df['time'], df['Viscosity'], color='red', marker='.')
ax_v.set_ylabel('Viscosity', color='red')
ax_v.set_xlabel('time')

ax_T.scatter(df['time'], df['Temperature'], color='blue', marker='.')
ax_T.set_ylabel('Temperature', color='blue')


Text(0, 0.5, 'Temperature')

In [10]:
## Temperature plots
df = a.data

fig, ax = plt.subplots()
colors= df['Viscosity_verbose'].replace({
    'OK':'g', 'OK_inner':'b', 'image_sweep_check':'r',
})
# ax.scatter(x=df['Temperature'],y=df['Viscosity'],c= colors,s=5)

sns.lineplot(
    ax=ax,
    data=df,
    x="Temperature",
    y="Viscosity",
    estimator='mean',
    errorbar=("sd", 1),
    # err_style="bars", 
    label ='mean'  
)
sns.lineplot(
    ax=ax,
    data=df,
    x="Temperature",
    y="Viscosity",
    errorbar=('pi',68),
    estimator="median",
    label ='median'  
)

# df = b.data
# sns.lineplot(
#     ax=ax,
#     data=df,
#     x="Temperature",
#     y="Viscosity",
#     estimator='mean',
#     errorbar=("sd", 1),
#     # err_style="bars", 
#     label ='mean2'  
# )
# sns.lineplot(
#     ax=ax,
#     data=df,
#     x="Temperature",
#     y="Viscosity",
#     errorbar=('pi',68),
#     estimator="median",
#     label ='median2'  
# )


<Axes: xlabel='Temperature', ylabel='Viscosity'>

In [110]:
## fast OLS plot
reg_data = a.data
nu0, EkJ, func = regress(reg_data)
sns.regplot(
    data=reg_data,
    x="Temperature",
    y="Viscosity",
    scatter=True,
    truncate=False,
    order=1,
    label=f'All:\nE kJ= {EkJ:5.3f}\nNu0= {nu0:5.3e}',
)

reg_data = a.data.groupby(by='Temperature')['Viscosity'].median().reset_index()
nu0, EkJ, func = regress(reg_data)
sns.regplot(
    data=reg_data,
    x="Temperature",
    y="Viscosity",
    scatter=False,
    truncate=False,
    order=1,
    label=f'Median:\nE kJ= { EkJ:5.3f}\nNu0= {nu0:5.3e}',
)

# reg_data = b.data
# nu0, EkJ, func = regress(reg_data)
# sns.regplot(
#     data=reg_data,
#     x="Temperature",
#     y="Viscosity",
#     scatter=True,
#     truncate=False,
#     order=1,
#     label=f'All2:\nE kJ= {EkJ:5.3f}\nNu0= {nu0:5.3e}',
# )

# reg_data = b.data.groupby(by='Temperature')['Viscosity'].median().reset_index()
# nu0, EkJ, func = regress(reg_data)
# sns.regplot(
#     data=reg_data,
#     x="Temperature",
#     y="Viscosity",
#     scatter=False,
#     truncate=False,
#     order=1,
#     label=f'Median2:\nE kJ= { EkJ:5.3f}\nNu0= {nu0:5.3e}',
# )

# reg_data = pd.concat([a.data, b.data])
# nu0, EkJ, func = regress(reg_data)
# sns.regplot(
#     data=reg_data,
#     x="Temperature",
#     y="Viscosity",
#     scatter=False,
#     truncate=False,
#     order=1,
#     label=f'All:\nE kJ= {EkJ:5.3f}\nNu0= {nu0:5.3e}',
# )

plt.legend()

<matplotlib.legend.Legend at 0x20118af9750>

In [None]:
Container()

In [151]:
## Regression
reg_a = a.copy()

df = reg_a.data
df.rename(inplace=True, columns={'Temperature': 'E', 'Viscosity': 'nu'})
df['nu0'] = 1
results = sm.OLS(df['nu'], df[['E', 'nu0']]).fit()
conf_int = results.conf_int(0.005)

conf_int.loc['nu0'] = np.exp(conf_int.loc['nu0']) * 1e6
nu0_6 = np.exp(results.params['nu0']) * 1e6
nu0_6_diap = (conf_int.loc['nu0'].max() - conf_int.loc['nu0'].min()) / 2

conf_int.loc['E'] = 8.314 * conf_int.loc['E'] / 1000
EkJ = 8.314 * results.params['E'] / 1000
EkJ_diap = (conf_int.loc['E'].max() - conf_int.loc['E'].min()) / 2

print(
    f'Constants {reg_a.experiment_name}:',
    f'E   = { EkJ: <6.3f} ± {EkJ_diap: <3.2f} kJ',
    f'Nu0 = {nu0_6: <6.3f} ± {nu0_6_diap: <3.2f} sS * 1e-6',
    sep='\n',
)
print(results.summary())

Constants Exp5_down:
E   = 41.549 ± 0.10 kJ
Nu0 = 1.998  ± 0.08 sS * 1e-6
                            OLS Regression Results                            
Dep. Variable:                     nu   R-squared:                       0.995
Model:                            OLS   Adj. R-squared:                  0.995
Method:                 Least Squares   F-statistic:                 1.436e+06
Date:                Sun, 12 Nov 2023   Prob (F-statistic):               0.00
Time:                        01:43:33   Log-Likelihood:                 16230.
No. Observations:                7436   AIC:                        -3.246e+04
Df Residuals:                    7434   BIC:                        -3.244e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------