In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly 
import plotly.graph_objects as go
import plotly.express as px

from sklearn import linear_model
%matplotlib qt
pd.set_option('mode.chained_assignment', None)

# Load raw and save ready data

In [None]:
# path = f'Experiments\MultiplyTemperature\Exp5(12.5)\Exp5_down.csv'

path = ''
while (path == '') or (not os.path.isfile(path)):
    path = input(f"Input data path: ")
_path_list = (path).split('\\')
folder_path = '\\'.join(_path_list[:-1])
data_name = _path_list[-1]
print(f'    Folder:\n{folder_path}\n    Data name:\n {data_name}')

In [None]:
df1 = pd.read_csv('Experiments\\MultiplyTemperature\\Exp0(0)\\Exp0_up1.csv',
                  index_col=0)
# df2 = pd.read_csv('Experiments\\MultiplyTemperature\\Exp0(0)\\Exp0_up2.csv',index_col=0)
# df3 = pd.read_csv('Experiments\\MultiplyTemperature\\Exp0(0)\\Exp0_up3.csv',index_col=0)
# df4 = pd.read_csv('Experiments\\MultiplyTemperature\\Exp0(0)\\Exp0_up4.csv',index_col=0)


In [None]:
df1

In [None]:
rules = {
    1: 'image_sweep_check',
    2: 'combine_check',  # 'inner_processor_check':'OK_inner'
    0: 'OK'
}
col_rules = {
    'Viscosity_mark': 'Viscosity_verbose',
    'Temperature_mark': 'Temperature_verbose',
}

df1 = df1.rename(columns=col_rules)
df1[['Viscosity_verbose','Temperature_verbose']] =\
    df1[['Viscosity_verbose','Temperature_verbose']].replace(rules)

# df2 = df2.rename(columns=col_rules)
# df2[['Viscosity_verbose','Temperature_verbose']] = df2[['Viscosity_verbose','Temperature_verbose']].replace(rules)
# df2['time'] =df2['time']+ df1['time'].max()+1

# df3 = df3.rename(columns=col_rules)
# df3[['Viscosity_verbose','Temperature_verbose']] = df3[['Viscosity_verbose','Temperature_verbose']].replace({np.nan:'OK'})
# df3['time'] =df3['time']+ df2['time'].max()+1

# df4 = df4.rename(columns=col_rules)
# df4[['Viscosity_verbose','Temperature_verbose']] = df4[['Viscosity_verbose','Temperature_verbose']].replace({np.nan:'OK'})
# df4['time'] =df4['time']+ df3['time'].max()+1
# df = pd.concat([df1,df2,df3,df4])

df = df1.dropna(subset=['Viscosity', 'Temperature'])
df

In [None]:
np.any(df['time'].duplicated())

In [None]:
assert not os.path.isfile(f'{folder_path}\AData_{data_name}'), 'Data already exists'
df.to_csv(f'{folder_path}\AData_{data_name}', index=0)

In [None]:
base.

# Load

In [None]:
class Data:

    data: pd.DataFrame = None

    def __init__(self, data=None,name='python data'):
        self.data = data
        self.name= name
        self.desc=f'loaded from python'

    def load(self, path=''):
        while (path == '') or (not os.path.isfile(path)):
            path = input(f"Input data path: ")
        _path_list = (path).split('\\')
        self.folder = '\\'.join(_path_list[:-1])
        self.name = _path_list[-1]
        print(f'Loaded: {self.name}')
        self._raw_data = pd.read_csv(path)
        self.desc=f'loaded from: {path}'

    def build_data(self,
                   time_in_minutes=False,
                   temperature=(13, 45),
                   viscosity=(0, 400),
                   time=(0, np.inf)):
        self.conditions = dict(temperature=temperature,
                               viscosity=viscosity,
                               time=time)

        temperature_cond = ((temperature[0] < self._raw_data['Temperature']) &
                            (self._raw_data['Temperature'] < temperature[1]))
        viscosity_cond = ((viscosity[0] < self._raw_data['Viscosity']) &
                          (self._raw_data['Viscosity'] < viscosity[1]))
        time_cond = ((time[0] < self._raw_data['time']) &
                     (self._raw_data['time'] < time[1]))

        self.data = self._raw_data[temperature_cond & viscosity_cond
                                   & time_cond]
        if time_in_minutes: self.data['time'] / 60
        return self.data

    def temp_plot(self,
                  name,
                  y2_axix=False,
                  symbol='circle',
                  colors=plotly.colors.qualitative.Plotly):
        verboses = self.data[f'{name}_verbose'].unique()
        rules = zip(
            verboses,
            [round(i) for i in np.linspace(0, len(colors) - 1, len(verboses))],
        )
        colors_rule = {key: colors[color] for key, color in rules}

        scatter = go.Scatter(
            x=self.data['time'],
            y=self.data[name],
            name=name,
            mode='markers',
            yaxis='y2' if y2_axix else 'y',
            text=self.data[f'{name}_verbose'],
            hovertemplate='%{y}<br>%{text}',
            marker=dict(
                size=5,
                symbol=symbol,
                color=self.data[f'{name}_verbose'].replace(colors_rule),
            ),
        )
        return scatter

    @property
    def groupT(self):
        return self.data[['Temperature', 'Viscosity']].groupby(by='Temperature')

    @property
    def groupV(self):
        return self.data[['Temperature', 'Viscosity']].groupby(by='Viscosity')


In [None]:
base = Data()
base.load('Experiments\MultiplyTemperature\Exp4(10)\AData_Exp4_up.csv')
base.build_data(time=(172,np.inf),viscosity=(20,83))

# Time plots

In [None]:
df =base.data
# df = test_data

fig, ax_v = plt.subplots()
ax_T = ax_v.twinx()

ax_v.scatter(df['time'], df['Viscosity'], color='red', marker='.')
ax_v.set_ylabel('Viscosity', color='red')

ax_T.scatter(df['time'], df['Temperature'], color='blue', marker='.')
ax_T.set_ylabel('Temperature', color='blue')


In [None]:
fig = go.Figure()
fig.add_traces([
    base.temp_plot('Viscosity', colors=plotly.colors.sequential.Aggrnyl),
    base.temp_plot('Temperature',
                        y2_axix=True,
                        symbol='star',
                        colors=plotly.colors.sequential.Sunsetdark_r)
])
fig.update_layout(
    hovermode="x unified",
    margin=dict(l=0, r=0, b=0, t=0, pad=0),
    yaxis2=dict(
        overlaying='y',
        side='right',  # range=[15, 20],
    ),  # xaxis=dict(range=[0, 500], ),
)
fig.show()

# Temterature plots

In [None]:
base_plot = base
fig = go.Figure()
fig = px.scatter(
    base_plot.data,
    x="Temperature",
    y="Viscosity",
    color='Viscosity_verbose',
)

## test plots
fig.add_trace(
    go.Scatter(name='Test',
               x=test_data.data['Temperature'],
               y=test_data.data['Viscosity'],
               mode='markers',
               line=dict(color='green'),
               marker=dict(size=10)),
    
)
## Mean and median
fig.add_traces([
    go.Scatter(name='Median',
               x=base_plot.groupT.median().index,
               y=base_plot.groupT.median()['Viscosity'],
               mode='lines',
               line=dict(color='black')),
    go.Scatter(name='Mean',
               x=base_plot.groupT.mean().index,
               y=base_plot.groupT.mean()['Viscosity'],
               mode='lines',
               line=dict(color='red')),
])

## Comparation
# comp_plot = 
# fig.add_traces([
#     go.Scatter(name='Median2',
#                 x=dfs_median.index,
#                 y=dfs_median2['Viscosity'],
#                 mode='lines',
#                 line=dict(color='black', width=2, dash='dot')),
#     go.Scatter(name='Mean2',
#                 x=dfs_mean2.index,
#                 y=dfs_mean2['Viscosity'],
#                 mode='lines',
#                 line=dict(color='red', width=2, dash='dot')),
# ])

##Regression
# x = np.array(dfs_median.index)
# y = nu0 * np.exp(EkJ * 1000 / 8.314 / (x+273.15))
# fig.add_trace(go.Scatter(name='Regression', x=x, y=y, mode='lines'))

fig.update_layout(hovermode="x unified", margin=dict(l=0, r=0, b=0, t=0, pad=0))
fig.show()

# Filtration

In [None]:
# T_goupT= dfs.groupby(by='Temperature')['Temperature']
# T_groupV=dfs.groupby(by='Temperature')['Viscosity']
# def foo(g):
#     whisker_width=0
#     q1 = g.quantile(0.45)
#     q3 = g.quantile(0.55)
#     iqr = q3 - q1
#     return (g >= q1 - whisker_width*iqr) & (g <= q3 + whisker_width*iqr)
#     # return np.abs((g - g.median()) / iqr) < 2.22
#     # return np.abs((g - g.mean()) / g.std(ddof=0)) < 1

# error_mask= T_groupV.apply(foo ).droplevel([0]).reset_index(drop=True).to_numpy()

# dfs = dfs[error_mask]

# # T_goupT2= dfs2.groupby(by='Temperature')['Temperature']
# # T_groupV2=dfs2.groupby(by='Temperature')['Viscosity']
# # dfs2
# dfs

In [None]:
def z_filter(data: pd.Series):
    mean = data.mean()
    s = data.std(ddof=0)+1e-50
    z_score= np.abs((data-mean) / s) <3
    return z_score

def iqr_filter(data: pd.Series):
    whisker_width=0.5
    q1 = data.quantile(0.25)
    q3 = data.quantile(0.75)
    iqr = q3 - q1
    return (data >= q1 - whisker_width*iqr) & (data <= q3 + whisker_width*iqr)

filter_func =  iqr_filter 

T_group= base.groupT['Viscosity']
test_data = Data(base.data[T_group.apply(filter_func).droplevel([0]).reset_index(drop=True).to_numpy()])

## OLS

In [None]:
   # def get_values(self):
    #     y_lnD = np.log(nu)
    #     x_T = 1 / (T+273.15)

    #     nu0 = np.exp(w0)
    #     EkJ = 8.314 * w / 1000

    # go.Scatter(
    # x=self.data['time'],
    # y=self.data['Viscosity'],
    # name='Viscosity',
    # mode='markers',
    # marker=dict(size=5,
    #             color=self.data['Viscosity_verbose'].replace(colors_dict)))
    # go.Scatter(
    #     x=self.data['time'],
    #     y=self.data['Temperature'],
    #     name='Temperature',
    #     yaxis='y2',
    #     mode='markers',
    #     marker=dict(size=5,
    #                 symbol='square',
    #                 color=self.data['Temperature_verbose'].replace(colors_dict)),
    # )


In [None]:
dfp = dfs_median
dfp

In [None]:
T = np.array(dfp.index)
nu = np.array(dfp['Viscosity'])

In [None]:
y_lnD = np.log(nu)
x_T = 1 / (T+273.15)



nu0 = np.exp(w0)
EkJ = 8.314 * w / 1000


X = np.array([x_T]).T
Y = y_lnD

In [None]:
reg = linear_model.LinearRegression(fit_intercept=True)
reg.fit(X, Y)
w = reg.coef_[0]
w0 = reg.intercept_

In [None]:
nu0 = np.exp(w0)
EkJ = 8.314 * w / 1000


In [None]:
exp_name = folder_path.split('(')[-1][:-1]
print(
    f"Experiment= {data_name[6:-4]}: {exp_name}%",
    f'nu0=  {nu0: 6.3e}',
    f'E  =  {EkJ: 6.3f} kJ',
    sep='\n',
)
regression_done = True


## Saving regression

In [None]:
dfp = dfs_mean

In [None]:
assert not os.path.isfile(f'{folder_path}\BReg_{data_name[6:]}'), 'Data already exists'
dfp.to_csv(f'{folder_path}\BReg_{data_name[6:]}', index=0)