In [1]:
import pandas
import pathlib
from config import datadir
import numpy
import matplotlib.pyplot as plt
from latex_figure import plotfigure
import scipy.signal as sig
import statsmodels.nonparametric.smoothers_lowess as smoother

import pint

In [2]:
excel_directory = pandas.read_excel(datadir/'Data Description.xlsx', parse_dates=True, index_col='Start Date')
excel_directory

Unnamed: 0_level_0,End Date,Equipment,Type,Purpose,Filename,Run,Description
Start Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-12-06,2019-12-11,pH Probe,Drift,Test Drift on pH probe,20191206 pH Drift 1.txt,1,"long run of pH probe drift, performed inside t..."
2019-12-11,2019-12-17,pH Probe,Drift,Test Drift on pH probe,20191211 pH Drift 2.txt,2,"long run of pH probe drift, performed inside t..."
2019-12-17,2019-12-23,pH Probe,Drift,Test Drift on pH probe,20191217 pH Drift 3.txt,3,"long run of pH probe Drift, circulating ph7 bu..."
2020-01-16,2020-01-16,pH Probe,Step Test,Step Test,20200116 pH Step 3.txt,3,pH stepped outside of rig
2020-01-14,2020-01-14,pH Probe,Step Test,Step Test,20200116 pH Step 1.txt,1,"pH stepped outside of rig. Probe Broke, Dump run"
2020-01-15,2020-01-15,pH Probe,Step Test,Step Test,20200116 pH Step 2.txt,2,pH stepped outside of rig


In [None]:
drifts = excel_directory[(excel_directory['Type']=='Drift')]   #Use '&' for multiple conditions
drifts

In [None]:
all_drifts_frames =[]
durations = []
for file in drifts['Filename']:
    new_frame = pandas.read_csv(datadir /file, 
                               sep ='\t',header = None,
                               usecols = [0, 1, 2, 3],
                               names = ["Date", "Time", "Temp", "pH"])
    
    durations.append([new_frame.index[0],new_frame.index[-1]])
    
    new_frame['DateTime'] = new_frame['Date'].astype(str) +' '+ new_frame['Time'].astype(str)
    new_frame['DateTime'] = pandas.to_datetime(new_frame['DateTime'], format = r'%Y/%m/%d %I:%M:%S %p')
    new_frame = new_frame.drop(columns=['Date','Time']).set_index('DateTime')
    
    new_frame['Temp'] = new_frame['Temp'].astype('pint[degC]') #Allows setting of units for data types
    new_frame['Temp'] = new_frame['Temp'].pint.to('kelvin')
    
    all_drifts_frames.append(new_frame)
        
#     print (file)

In [None]:
Skip_first = 0
terminate_at = 400000

In [None]:
for index,frame in enumerate(all_drifts_frames):
    all_drifts_frames[index] = frame[:terminate_at]

In [None]:
times = []
ph = []
delta_times = []
for frame in all_drifts_frames:
    times.append(frame.index[Skip_first:])
    ph.append(frame['pH'].values[Skip_first:])
    delta_times.append((frame.index[Skip_first:] - frame.index[0]).astype('timedelta64[s]'))
    
# print (times, ph, delta_times)

### Ploting Settings

In [None]:
# Names and line types for the different plot series
YNAMES = ["Run 1", "Run 2", "Run 3", "Run 1 Filtered - 1", "Run 1 Filtered - 2"]
LINETYPES = ["c*", "b-", "g--", 'm-', 'r-']

aspect_ratio = 4 / 5
FULLSIZE = 5, 5 * aspect_ratio
HALFSIZE = 3, 3 * aspect_ratio

xlabel = "Time since Start /s"
ylabel = "pH"

In [None]:
plotfigure(delta_times, ph, YNAMES, LINETYPES, xlabel, ylabel, figsize=FULLSIZE, filename=str(datadir)+"/pH Drifts.pdf")
# plotfigure(times, temps, figsize=HALFSIZE, filename=str(datadir)+"/samplefigure_halfsize.pdf")

In [None]:
ph_filtered = [sig.savgol_filter(all_drifts_frames[0]['pH'][Skip_first:],100001,3)]
time_for_filtered = [all_drifts_frames[0].index[Skip_first:]]

In [None]:
frame = all_drifts_frames[0].copy()

frame['deltas'] = (frame.index-frame.index[0]).astype('timedelta64[s]')
frame['ph_shift'] = frame['pH'].shift(1)

allowed_change=0.05
cutoff = 7

drops = frame['pH'][((frame['pH']-frame['ph_shift'])>allowed_change*frame['ph_shift']) | (frame['pH']>cutoff)].index #(frame['pH']<10) & 

mask = ~numpy.in1d(frame.index, drops)
frame_trimmed = frame.loc[mask, :]

In [None]:
ph.append(ph_filtered[0])

delta_times.append(delta_times[0])

ph.append(frame_trimmed['pH'][Skip_first:])

delta_times.append(frame_trimmed['deltas'][Skip_first:])

In [None]:
plotfigure(delta_times, ph, YNAMES, LINETYPES, xlabel, ylabel, figsize=FULLSIZE, filename=str(datadir)+"/pH Drifts.pdf")
# plotfigure(times, temps, figsize=HALFSIZE, filename=str(datadir)+"/samplefigure_halfsize.pdf")