#### This program computes the linear regression coefficients and intercept to predict the Zn Drop in  106TK01 from the following features:
1. dezn flow
2. dezn feed temp
3. 105tk05 temp
4. H2S flow
5. ntrl pls condition
Ridge regression is used.


This is a continuation from the ZnDrop_Proj_Load_Data_Gathered_2017_to_2020Aug.ipynb file

In [1]:
#import libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from dateutil.parser import parse
from itertools import compress
from collections import OrderedDict
from datetime import date
import datetime
from datetime import timedelta
import re

import openpyxl
import random

from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 12, 10

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots


In [37]:
#-------Load the DF_combined data from ZnDrop_Proj_Load_Data_Gathered_2017_to_2020Aug.ipynb file

DF_NTRL_TK05_MSTK01 = pd.read_excel(r'C:\Users\v.t.flores\Documents\df_ZnNi_project_105TK05_2015_2020_PIdata.xlsx')

DF_NTRL_TK05_MSTK01_1 = DF_NTRL_TK05_MSTK01.set_index('DATETIME')

#convert the df to numeric
for i in DF_NTRL_TK05_MSTK01_1.columns:
    DF_NTRL_TK05_MSTK01_1[i] = pd.to_numeric(DF_NTRL_TK05_MSTK01_1[i], errors='coerce')

In [38]:
#
#
#-------------note that we use the interpolate to fill in missing values with linear interpolation
#
DF_NTRL_TK05_MSTK01_2 = DF_NTRL_TK05_MSTK01_1.interpolate(method='linear', limit_direction='forward', axis=0)  

In [39]:
DF_NTRL_TK05_MSTK01_2.columns

Index(['104_Al', '104_Ca', '104_Co', '104_Cr', '104_Cu', '104_Fe', '104_Fe2',
       '104_Ft', '104_Mg', '104_Mn', '104_NTU', '104_Ni', '104_ORP', '104_Pb',
       '104_Si', '104_Zn', '104_pH60', '106_Ni', '106_Zn', 'tk05_Al',
       'tk05_Ca', 'tk05_Co', 'tk05_Cr', 'tk05_Cu', 'tk05_Fe', 'tk05_Mg',
       'tk05_Mn', 'tk05_Ni', 'tk05_Pb', 'tk05_Si', 'tk05_Zn', 'DeZn_Feed_Flow',
       'DeZn_Feed_T', 'H2S_FT_TK01_A', 'H2S_FT_TK01_B', 'H2S_FT_TK02_A',
       'H2S_FT_TK02_B', 'DeZn_Seed_Flow', 'DeZn_TK05_T', 'H2S_tk01',
       'H2S_tk02'],
      dtype='object')

In [40]:
paramList_105 = ['tk05_Al', 'tk05_Ca', 'tk05_Co', 'tk05_Cr', 'tk05_Cu', 'tk05_Fe', 'tk05_Mg',
       'tk05_Mn', 'tk05_Ni', 'tk05_Pb', 'tk05_Si', 'tk05_Zn','DeZn_TK05_T']

#### Adjust 105, 106 for retention time

In [42]:
DF_105 = DF_NTRL_TK05_MSTK01_2[paramList_105] #---------slice the data and create a copy of the 105 data columns as a dataframe
DF_105_r = DF_105.reset_index()                                          #---------reset the index to prepare it to be added by the retention time
DF_105_r['DATETIME'] = DF_105_r['DATETIME'] + datetime.timedelta(hours=1) #--------add the retention time of 1h for this trial (this is saying, "after 1hr the solution from ntrl will be sampled in 105tk05")
DF_105 = DF_105_r.set_index('DATETIME')                                   #--------set the datetime column as the index. ready for joining with the rest of the retention t adjusted datasets

DF_106 = DF_NTRL_TK05_MSTK01_2[['106_Ni', '106_Zn']] #---------------------------------slice the data and create a copy of the zndrop column as a dataframe
DF_106_r = DF_106.reset_index()
DF_106_r['DATETIME'] = DF_106_r['DATETIME'] + datetime.timedelta(hours=2)
DF_106 = DF_106_r.set_index('DATETIME')  



##### drop the retention time adjusted params

In [44]:
#Prepare the DF for remixing the Rtime adjusted data into the main df.
DF_NTRL_TK05_MSTK01_2_drop105_106 = DF_NTRL_TK05_MSTK01_2.drop(paramList_105, axis=1)  #---------------------------Drop the columns related to 105tk05 samples from the Main DF: DF_NTRL_TK0513_MSTK01_2
DF_NTRL_TK05_MSTK01_2_drop105_106 = DF_NTRL_TK05_MSTK01_2_drop105_106.drop(['106_Ni', '106_Zn'], axis=1) #-------Drop the columns related to 106 sample from the Main DF. This the zn_drop value

# Remix the dropped and time adjusted columns
#DF_NTRL_TK0513_MSTK01_3 is the time adjusted dataframe
DF_NTRL_TK05_MSTK01_3 = DF_NTRL_TK05_MSTK01_2_drop105_106.join(DF_106, how='outer').join(DF_105, how='outer')

for col in DF_NTRL_TK05_MSTK01_3.columns:
    DF_NTRL_TK05_MSTK01_3[col] = pd.to_numeric(DF_NTRL_TK05_MSTK01_3[col], errors='coerce')

DF_NTRL_TK05_MSTK01_4 = DF_NTRL_TK05_MSTK01_3.dropna()

znNi_predictors = ['104_Pb', '104_Zn', '104_Cu', '104_Ni', '104_Co', '104_Fe', '104_Mn',
       '104_Cr', '104_Ca', '104_Si', '104_Al', '104_Mg', '104_Fe2', '104_pH60',
       '104_ORP', '104_NTU', '104_Ft',
       'DeZn_Feed_Flow', 'DeZn_Feed_T','DeZn_TK05_T',
       'H2S_tk01', 'H2S_tk02', 'tk05_Al', 'tk05_Ca', 'tk05_Co', 'tk05_Cr',
       'tk05_Cu', 'tk05_Fe', 'tk05_Mg', 'tk05_Mn', 'tk05_Ni', 'tk05_Pb',
       'tk05_Si', 'tk05_Zn']

In [52]:
print(len(znNi_predictors))

34


In [79]:
print(len(DF_NTRL_TK05_MSTK01_4.columns))
print(DF_NTRL_TK05_MSTK01_4.columns)
print(len(DF_NTRL_TK05_MSTK01_4))

41
Index(['104_Al', '104_Ca', '104_Co', '104_Cr', '104_Cu', '104_Fe', '104_Fe2',
       '104_Ft', '104_Mg', '104_Mn', '104_NTU', '104_Ni', '104_ORP', '104_Pb',
       '104_Si', '104_Zn', '104_pH60', 'DeZn_Feed_Flow', 'DeZn_Feed_T',
       'H2S_FT_TK01_A', 'H2S_FT_TK01_B', 'H2S_FT_TK02_A', 'H2S_FT_TK02_B',
       'DeZn_Seed_Flow', 'H2S_tk01', 'H2S_tk02', '106_Ni', '106_Zn', 'tk05_Al',
       'tk05_Ca', 'tk05_Co', 'tk05_Cr', 'tk05_Cu', 'tk05_Fe', 'tk05_Mg',
       'tk05_Mn', 'tk05_Ni', 'tk05_Pb', 'tk05_Si', 'tk05_Zn', 'DeZn_TK05_T'],
      dtype='object')
44429


In [68]:
paramList_104_105_106 = ['104_Al', '104_Ca', '104_Co', '104_Cr', '104_Cu', '104_Fe', '104_Fe2',
       '104_Ft', '104_Mg', '104_Mn', '104_NTU', '104_Ni', '104_ORP', '104_Pb',
       '104_Si', '104_Zn', '104_pH60', 'DeZn_Feed_Flow', 'DeZn_Feed_T',
       'H2S_tk01', 'H2S_tk02', '106_Ni', '106_Zn', 'tk05_Al',
       'tk05_Ca', 'tk05_Co', 'tk05_Cr', 'tk05_Cu', 'tk05_Fe', 'tk05_Mg',
       'tk05_Mn', 'tk05_Ni', 'tk05_Pb', 'tk05_Si', 'tk05_Zn', 'DeZn_TK05_T']

paramList_znNi_predictors = ['104_Al', '104_Ca', '104_Co', '104_Cr', '104_Cu', '104_Fe', '104_Fe2',
       '104_Ft', '104_Mg', '104_Mn', '104_NTU', '104_Ni', '104_ORP', '104_Pb',
       '104_Si', '104_Zn', '104_pH60', 'DeZn_Feed_Flow', 'DeZn_Feed_T',
       'H2S_tk01', 'H2S_tk02', 'tk05_Al',
       'tk05_Ca', 'tk05_Co', 'tk05_Cr', 'tk05_Cu', 'tk05_Fe', 'tk05_Mg',
       'tk05_Mn', 'tk05_Ni', 'tk05_Pb', 'tk05_Si', 'tk05_Zn', 'DeZn_TK05_T']
len(paramList_znNi_predictors)

34

In [69]:
DF_main_20152020 = DF_NTRL_TK05_MSTK01_4[paramList_104_105_106]
print(len(DF_main_20152020.columns))
DF_main_20152020.columns

36


Index(['104_Al', '104_Ca', '104_Co', '104_Cr', '104_Cu', '104_Fe', '104_Fe2',
       '104_Ft', '104_Mg', '104_Mn', '104_NTU', '104_Ni', '104_ORP', '104_Pb',
       '104_Si', '104_Zn', '104_pH60', 'DeZn_Feed_Flow', 'DeZn_Feed_T',
       'H2S_tk01', 'H2S_tk02', '106_Ni', '106_Zn', 'tk05_Al', 'tk05_Ca',
       'tk05_Co', 'tk05_Cr', 'tk05_Cu', 'tk05_Fe', 'tk05_Mg', 'tk05_Mn',
       'tk05_Ni', 'tk05_Pb', 'tk05_Si', 'tk05_Zn', 'DeZn_TK05_T'],
      dtype='object')

In [70]:
DF_MAIN_2015_2020 = DF_main_20152020.interpolate(method='linear', limit_direction='forward' ).dropna()

In [82]:
len(DF_clean1)
len(DF_main_FULL_CLEAN)

32081

In [71]:
DF_clean1 = DF_MAIN_2015_2020
DF_clean1_pH = DF_clean1.drop((DF_clean1[(DF_clean1['104_pH60']<2.0) | (DF_clean1['104_pH60']>4.0)].index))
DF_clean1_pHFe = DF_clean1_pH.drop((DF_clean1_pH[DF_clean1_pH['104_Fe']<0.1].index))
DF_clean1_pHFeNTU = DF_clean1_pHFe.drop((DF_clean1_pHFe[DF_clean1_pHFe['104_NTU']>1000].index))
DF_clean1_pHFeNTUORP = DF_clean1_pHFeNTU.drop((DF_clean1_pHFeNTU[(DF_clean1_pHFeNTU['104_ORP']<90) | (DF_clean1_pHFeNTU['104_ORP']>1000)].index))
DF_clean1_pHFeNTUORPMg = DF_clean1_pHFeNTUORP.drop(DF_clean1_pHFeNTUORP[DF_clean1_pHFeNTUORP['104_Mg']<1].index)
DF_clean1_pHFeNTUORPMgPb = DF_clean1_pHFeNTUORPMg.drop((DF_clean1_pHFeNTUORPMg[DF_clean1_pHFeNTUORPMg['104_Pb']>0.001].index))
DF_clean1_pHFeNTUORPMgPbMn = DF_clean1_pHFeNTUORPMgPb.drop(DF_clean1_pHFeNTUORPMgPb[DF_clean1_pHFeNTUORPMgPb['104_Mn']<1].index)
DF_clean1_pHFeNTUORPMgPbMnAl = DF_clean1_pHFeNTUORPMgPbMn.drop(DF_clean1_pHFeNTUORPMgPbMn[DF_clean1_pHFeNTUORPMgPbMn['104_Al']<1].index)
DF_clean1_pHFeNTUORPMgPbMnAlSi = DF_clean1_pHFeNTUORPMgPbMnAl.drop(DF_clean1_pHFeNTUORPMgPbMnAl[DF_clean1_pHFeNTUORPMgPbMnAl['104_Si']>2].index)
DF_clean1_pHFeNTUORPMgPbMnAlSiZn = DF_clean1_pHFeNTUORPMgPbMnAlSi.drop(DF_clean1_pHFeNTUORPMgPbMnAlSi[DF_clean1_pHFeNTUORPMgPbMnAlSi['104_Zn']>0.2].index)
DF_clean1_pHFeNTUORPMgPbMnAlSiZnCu = DF_clean1_pHFeNTUORPMgPbMnAlSiZn.drop(DF_clean1_pHFeNTUORPMgPbMnAlSiZn[DF_clean1_pHFeNTUORPMgPbMnAlSiZn['104_Cu']>0.1].index)
DF_clean1_pHFeNTUORPMgPbMnAlSiZnCuFe2 = DF_clean1_pHFeNTUORPMgPbMnAlSiZnCu.drop(DF_clean1_pHFeNTUORPMgPbMnAlSiZnCu[DF_clean1_pHFeNTUORPMgPbMnAlSiZnCu['104_Fe2']<0.01].index)
DF_clean1_pHFeNTUORPMgPbMnAlSiZnCuFe2Cr = DF_clean1_pHFeNTUORPMgPbMnAlSiZnCuFe2.drop(DF_clean1_pHFeNTUORPMgPbMnAlSiZnCuFe2[DF_clean1_pHFeNTUORPMgPbMnAlSiZnCuFe2['104_Cr']<0.05].index)
DF_clean1_pHFeNTUORPMgPbMnAlSiZnCuFe2CrCa = DF_clean1_pHFeNTUORPMgPbMnAlSiZnCuFe2Cr.drop(DF_clean1_pHFeNTUORPMgPbMnAlSiZnCuFe2Cr[DF_clean1_pHFeNTUORPMgPbMnAlSiZnCuFe2Cr['104_Ca']<0.09].index)
DF_clean2 = DF_clean1_pHFeNTUORPMgPbMnAlSiZnCuFe2CrCa
DF_clean3_Ca = DF_clean2.drop(DF_clean2[DF_clean2['tk05_Ca']<0.01].index)
DF_clean3_CaAl = DF_clean3_Ca.drop(DF_clean3_Ca[DF_clean3_Ca['tk05_Al']<1.0].index)
DF_clean3_CaAlFeedF = DF_clean3_CaAl.drop(DF_clean3_CaAl[DF_clean3_CaAl['DeZn_Feed_Flow']<300].index)
DF_clean3_CaAlFeedFZn = DF_clean3_CaAlFeedF.drop(DF_clean3_CaAlFeedF[DF_clean3_CaAlFeedF['tk05_Zn']<0.0001].index)
DF_clean3_CaAlFeedFZnCo = DF_clean3_CaAlFeedFZn.drop(DF_clean3_CaAlFeedFZn[DF_clean3_CaAlFeedFZn['tk05_Co']<0.04].index)
DF_clean3_CaAlFeedFZnCo2 = DF_clean3_CaAlFeedFZnCo.drop(DF_clean3_CaAlFeedFZnCo[DF_clean3_CaAlFeedFZnCo['tk05_Co']>0.9].index)
DF_main_FULL_CLEAN = DF_clean3_CaAlFeedFZnCo2

In [74]:
print(len(DF_main_FULL_CLEAN.columns))

36


In [75]:
zndrop_predictors = ['104_Pb', '104_Zn', '104_Cu', '104_Ni', '104_Co', '104_Fe', '104_Mn',
       '104_Cr', '104_Ca', '104_Si', '104_Al', '104_Mg', '104_Fe2', '104_pH60',
       '104_ORP', '104_NTU', '104_Ft', 'tk13_Zn', 'tk13_solids%',
       'DeZn_Feed_Flow', 'DeZn_Feed_T',
       'DeZn_TK05_T',
       'H2S_tk01', 'H2S_tk02', 'tk05_Al', 'tk05_Ca', 'tk05_Co', 'tk05_Cr',
       'tk05_Cu', 'tk05_Fe', 'tk05_Mg', 'tk05_Mn', 'tk05_Ni', 'tk05_Pb',
       'tk05_Si', 'tk05_Zn']
len(zndrop_predictors)

36

In [77]:
DF = DF_main_FULL_CLEAN

In [83]:
DF.to_excel(r'C:\Users\v.t.flores\Documents\ZnNi_project_2015_2020_dataset.xlsx')