In [1]:
import os
import pandas as pd
import numpy as np
from scipy import stats

from sklearn import linear_model

import matplotlib.pyplot as plt
import seaborn as sns
import plotly 
import plotly.graph_objects as go
import plotly.express as px

%matplotlib qt
pd.set_option('mode.chained_assignment', None)

# Process modules

In [2]:
class Container:

    data: pd.DataFrame = None
   

    def __init__(self, data=None, name=''):
        self.data = data
        self.experiment_name = name
        self._log=[]

    def load(self, path=''):
        while (path == '') or (not os.path.isfile(path)):
            path = input(f"Input data path: ")
        _path_list = (path).split('\\')
        self.folder = '\\'.join(_path_list[:-1])
        self.experiment_name = _path_list[-1]

        self._raw_data = pd.read_csv(path)
        print(f'Loaded: {self.experiment_name}')
        self.log(f'loaded from: {path}')
        return self

    def initial_filter(
            self,
            time_in_minutes=False,
            temperature=(-np.inf, np.inf),
            viscosity=(0, np.inf),
            time=(0, np.inf),
    ):
        conditions = dict(temperature=temperature,
                          viscosity=viscosity,
                          time=time)

        temperature_cond = ((temperature[0] < self._raw_data['Temperature']) &
                            (self._raw_data['Temperature'] < temperature[1]))
        viscosity_cond = ((viscosity[0] < self._raw_data['Viscosity']) &
                          (self._raw_data['Viscosity'] < viscosity[1]))
        time_cond = ((time[0] < self._raw_data['time']) &
                     (self._raw_data['time'] < time[1]))

        self.data = self._raw_data[temperature_cond & viscosity_cond
                                   & time_cond]
        if time_in_minutes: self.data['time'] / 60
        self.log(f'Initialy filtered: {conditions}')
        return self.data

    def copy(self):
        new = Container(self.data.copy(), name=f'{self.experiment_name}')
        new._log.extend(self.log()[:]) 
        return new
    
    def log(self,msg=None):
        if msg is not None: self._log.append(msg)
        return self._log

    @property
    def groupT(self):
        return self.data[['Temperature', 'Viscosity']].groupby(by='Temperature')

    @property
    def groupV(self):
        return self.data[['Temperature', 'Viscosity']].groupby(by='Viscosity')


In [28]:
## Filtration
def z_filter(data: pd.Series):
    mean = data.mean()
    s = data.std(ddof=0)+1e-50
    z_score= np.abs((data-mean) / s) <1
    return z_score

# def z_filter(data: pd.Series):
#     return stats.zscore(data)<1

def whisker_iqr_filter(data: pd.Series):
    whisker_width=0.5
    q1 = data.quantile(0.25)
    q3 = data.quantile(0.75)
    iqr = q3 - q1
    return (data >= q1 - whisker_width*iqr) & (data <= q3 + whisker_width*iqr)

def iqr_filter(data: pd.Series):
    q1 = data.quantile(0.25)
    q3 = data.quantile(0.75)
    iqr = q3 - q1
    
    return np.abs((data - data.median()) / iqr) < 1

def my_z_filter(data: pd.Series):
    mean = data.median()
    s = data.std()+1e-50
    z_score= np.abs((data-mean) / s) <0.1
    return z_score

In [4]:
## Linearization
def linearize(data):
    data['Temperature'] =data['Temperature'].apply( lambda t: 1/(t+273.15))
    data['Viscosity'] = data['Viscosity'].apply(lambda nu: np.log(nu))   

In [82]:
def regress(data):
    reg = linear_model.LinearRegression(fit_intercept=True)
    X = np.array([data['Temperature']]).T
    Y = np.array(data['Viscosity'])
    reg.fit(X, Y)

    w = reg.coef_[0]
    w0 = reg.intercept_

    nu0 = np.exp(w0)
    EkJ = 8.314 * w / 1000

    def TC_func(t,EkJ=EkJ,nu0 =nu0):
        Tt = 1 / (t+273.15)
        return nu0*np.exp(EkJ*1000/8.314*Tt)

    return nu0, EkJ,TC_func


# Load raw and save ready data

In [None]:
# path = f'Experiments\MultiplyTemperature\Exp5(12.5)\Exp5_down.csv'

path = ''
while (path == '') or (not os.path.isfile(path)):
    path = input(f"Input data path: ")
_path_list = (path).split('\\')
folder_path = '\\'.join(_path_list[:-1])
data_name = _path_list[-1]
print(f'    Folder:\n{folder_path}\n    Data name:\n {data_name}')

In [None]:
df1 = pd.read_csv('Experiments\\MultiplyTemperature\\Exp0(0)\\Exp0_up1.csv',
                  index_col=0)
# df2 = pd.read_csv('Experiments\\MultiplyTemperature\\Exp0(0)\\Exp0_up2.csv',index_col=0)
# df3 = pd.read_csv('Experiments\\MultiplyTemperature\\Exp0(0)\\Exp0_up3.csv',index_col=0)
# df4 = pd.read_csv('Experiments\\MultiplyTemperature\\Exp0(0)\\Exp0_up4.csv',index_col=0)


In [None]:
rules = {
    1: 'image_sweep_check',
    2: 'combine_check',  # 'inner_processor_check':'OK_inner'
    0: 'OK'
}
col_rules = {
    'Viscosity_mark': 'Viscosity_verbose',
    'Temperature_mark': 'Temperature_verbose',
}

df1 = df1.rename(columns=col_rules)
df1[['Viscosity_verbose','Temperature_verbose']] =\
    df1[['Viscosity_verbose','Temperature_verbose']].replace(rules)

# df2 = df2.rename(columns=col_rules)
# df2[['Viscosity_verbose','Temperature_verbose']] = df2[['Viscosity_verbose','Temperature_verbose']].replace(rules)
# df2['time'] =df2['time']+ df1['time'].max()+1

# df3 = df3.rename(columns=col_rules)
# df3[['Viscosity_verbose','Temperature_verbose']] = df3[['Viscosity_verbose','Temperature_verbose']].replace({np.nan:'OK'})
# df3['time'] =df3['time']+ df2['time'].max()+1

# df4 = df4.rename(columns=col_rules)
# df4[['Viscosity_verbose','Temperature_verbose']] = df4[['Viscosity_verbose','Temperature_verbose']].replace({np.nan:'OK'})
# df4['time'] =df4['time']+ df3['time'].max()+1
# df = pd.concat([df1,df2,df3,df4])

df = df1.dropna(subset=['Viscosity', 'Temperature'])
df

In [None]:
np.any(df['time'].duplicated())

In [None]:
assert not os.path.isfile(f'{folder_path}\AData_{data_name}'), 'Data already exists'
df.to_csv(f'{folder_path}\AData_{data_name}', index=0)

# Load

In [5]:
a = Container()
a.load('Experiments\MultiplyTemperature\Exp2(5)\AData_Exp2_up.csv')
a.initial_filter(
    # time=(80, np.inf),
    viscosity=(20, 200),
    temperature=(13.8,41)
)

Loaded: AData_Exp2_up.csv


Unnamed: 0,time,Viscosity,Viscosity_verbose,Temperature,Temperature_verbose
0,5.0,159.6,image_sweep_check,14.4,OK
1,6.0,159.6,OK_inner,14.4,OK
2,7.0,159.3,OK,14.4,OK
3,8.0,159.0,OK_inner,14.4,OK
4,9.0,159.0,image_sweep_check,14.4,OK
...,...,...,...,...,...
2827,3317.0,31.8,OK,39.9,OK
2828,3318.0,31.7,image_sweep_check,39.9,OK
2829,3319.0,31.6,OK,39.9,OK
2830,3320.0,31.7,image_sweep_check,39.9,OK


In [None]:
b = Container()
b.load('Experiments\MultiplyTemperature\Exp2(5)\AData_Exp2_up.csv')
b.initial_filter(
    # time=(80, np.inf),
    viscosity=(20, 200),
    temperature=(13.8,41)
)

# Process

In [6]:
a_f = a.copy()
filter_func =iqr2_filter

a_f.data = a_f.data[
    (a_f.groupT['Viscosity'].apply(filter_func)
     .droplevel([0])
     .reset_index(drop=True)
     .to_numpy())
    ].dropna()
a_f.log(f'filtered {filter_func.__name__}')

['loaded from: Experiments\\MultiplyTemperature\\Exp2(5)\\AData_Exp2_up.csv',
 "Initialy filtered: {'temperature': (13.8, 41), 'viscosity': (20, 200), 'time': (0, inf)}",
 'filtered iqr2_filter']

In [7]:
a_l = a.copy()
linearize(a_l.data)
a_l.log('linearised')

['loaded from: Experiments\\MultiplyTemperature\\Exp2(5)\\AData_Exp2_up.csv',
 "Initialy filtered: {'temperature': (13.8, 41), 'viscosity': (20, 200), 'time': (0, inf)}",
 'linearised']

In [24]:
## Linearization filter
a_lf = a_l.copy()
filter_func = z_filter
T_group = a_lf.groupT['Viscosity']

a_lf.data = a_lf.data[
    (T_group.apply(filter_func)
     .droplevel([0])
     .reset_index(drop=True)
     .to_numpy())
    ].dropna()
a_lf.log(f'filtered {filter_func.__name__}')

['loaded from: Experiments\\MultiplyTemperature\\Exp2(5)\\AData_Exp2_up.csv',
 "Initialy filtered: {'temperature': (13.8, 41), 'viscosity': (20, 200), 'time': (0, inf)}",
 'linearised',
 'filtered z_filter']

# Interactive

In [None]:
#Time
def temp_plot(
    data,
    name,
    y2_axix=False,
    symbol='circle',
    colors=plotly.colors.qualitative.Plotly,
):
    verboses = data[f'{name}_verbose'].unique()
    rules = zip(
        verboses,
        [round(i) for i in np.linspace(0, len(colors) - 1, len(verboses))],
    )
    colors_rule = {key: colors[color] for key, color in rules}

    scatter = go.Scatter(
        x=data['time'],
        y=data[name],
        name=name,
        mode='markers',
        yaxis='y2' if y2_axix else 'y',
        text=data[f'{name}_verbose'],
        hovertemplate='%{y}<br>%{text}',
        marker=dict(
            size=5,
            symbol=symbol,
            color=data[f'{name}_verbose'].replace(colors_rule),
        ),
    )
    return scatter


a = a
fig = go.Figure()
fig.add_traces([
    temp_plot(
        a.data,
        'Viscosity',
        colors=plotly.colors.sequential.Aggrnyl,
    ),
    temp_plot(
        a.data,
        'Temperature',
        y2_axix=True,
        symbol='star',
        colors=plotly.colors.sequential.Sunsetdark_r,
    )
])
fig.update_layout(
    hovermode="x unified",
    margin=dict(l=0, r=0, b=0, t=0, pad=0),
    yaxis2=dict(
        overlaying='y',
        side='right',  # range=[15, 20],
    ),  # xaxis=dict(range=[0, 500], ),
)
fig.show()

In [None]:
fig = go.Figure()
fig = px.scatter(
    a.data,
    x="Temperature",
    y="Viscosity",
    color='Viscosity_verbose',
)

## Mean and median
fig.add_traces([
    go.Scatter(name='Mean',
               x=a.groupT.mean().index,
               y=a.groupT.mean()['Viscosity'],
               mode='lines',
               line=dict(color='red')),
    go.Scatter(name='Median',
               x=a.groupT.median().index,
               y=a.groupT.median()['Viscosity'],
               mode='lines',
               line=dict(color='black')),
])

## Comparation
# test_plot= test
# fig.add_trace(
#     go.Scatter(name='Test',
#                x=test_plot.data['Temperature'],
#                y=test_plot.data['Viscosity'],
#                mode='markers',
#                line=dict(color='green'),
#                marker=dict(size=10)), )

# fig.add_traces([
#     go.Scatter(name='Mean comp',
#                x=test_plot.groupT.mean().index,
#                y=test_plot.groupT.mean()['Viscosity'],
#                mode='lines',
#                line=dict(color='red', width=2, dash='dot')),
#     go.Scatter(name='Median comp',
#                 x=test_plot.groupT.median().index,
#                 y=test_plot.groupT.median()['Viscosity'],
#                 mode='lines',
#                 line=dict(color='black', width=2, dash='dot')),
# ])

##Regression
# x = reg_data.index
# fig.add_trace(
#     go.Scatter(
#         name='Regression',
#         x=x,
#         y=reg_func(x),
#         mode='lines',
#         line=dict(color='green', width=2, dash='dot'),
#     ), )

fig.update_layout(hovermode="x unified", margin=dict(l=0, r=0, b=0, t=0, pad=0))
fig.show()

# Plots

In [None]:
## Temporal plots
df =a.data
# df = test_data

fig, ax_v = plt.subplots()
ax_T = ax_v.twinx()

ax_v.scatter(df['time'], df['Viscosity'], color='red', marker='.')
ax_v.set_ylabel('Viscosity', color='red')
ax_v.set_xlabel('time')

ax_T.scatter(df['time'], df['Temperature'], color='blue', marker='.')
ax_T.set_ylabel('Temperature', color='blue')


In [88]:
## Temperature plots
df = a.data

fig, ax = plt.subplots()
colors= df['Viscosity_verbose'].replace({
    'OK':'g', 'OK_inner':'b', 'image_sweep_check':'r',
})
ax.scatter(x=df['Temperature'],y=df['Viscosity'],c= colors,s=5)

sns.lineplot(
    ax=ax,
    data=df,
    x="Temperature",
    y="Viscosity",
    estimator='mean',
    errorbar=("sd", 1),
    err_style="bars", 
    label ='mean'  
)
sns.lineplot(
    ax=ax,
    data=df,
    x="Temperature",
    y="Viscosity",
    errorbar=('pi',68),
    estimator="median",
    label ='median'  
)

# sns.lineplot(
#     ax=ax,
#     data=a_l.data,
#     x="Temperature",
#     y="Viscosity",
#     estimator='mean',
#     errorbar=("sd", 1),
#     err_style="bars", 
#     label ='mean'  
# )

<Axes: xlabel='Temperature', ylabel='Viscosity'>

In [83]:
## fast OLS plot
reg_data = a_l.data
nu0,EkJ,func = regress(reg_data )
sns.regplot(
    data=reg_data,
    x="Temperature",
    y="Viscosity",
    scatter=True,
    truncate=False,
    order=1,
    # color=".2",
    label=f'All:\nE kJ= {EkJ:5.3f}\nNu0= {nu0:5.3e}')

reg_data = a_l.groupT.median().reset_index()
nu0,EkJ,func = regress(reg_data )
sns.regplot(
    data=reg_data,
    x="Temperature",
    y="Viscosity",
    scatter=False,
    truncate=False,
    order=1,
    # color=".2",
    label=f'Median:\nE kJ= { EkJ:5.3f}\nNu0= {nu0:5.3e}')
plt.legend()


<matplotlib.legend.Legend at 0x1ce36808fa0>

## Saving regression

In [None]:
# assert not os.path.isfile(f'{folder_path}\BReg_{data_name[6:]}'), 'Data already exists'
# dfp.to_csv(f'{folder_path}\BReg_{data_name[6:]}', index=0)