# SCADA Flow Rate Calculation Model

In [1]:
import pandas as pd
import os
import csv

import re
import datetime
import io
from zipfile import ZipFile

import pandas as pd
import numpy as np
import pandas_gbq
import janitor

# Section 1

In this section I read in our sewer SCADA data to a pandas dataframe and do some quick formatting. I'll begin by converting the dateandtime column to datetime format, removing unneeded columns, and dropping any NA values. Finally I will remove erronious 0's. This data contains 1's and 0's in the value column. 1's signal a pump turning on while a 0 signals a pump turning off. The relationship needs to be 1:1, however SCADA inputs erronious zero's into the output every 15 minutes, so I will remove them. Once this process is finished we are ready to begin section 2.

## Read SCADA data into Pandas Dataframe

In [2]:
df = pd.read_csv('Kelly Island Data Edited.csv')

## Convert the DateAndTime column to Datetime Format

In [3]:
df['DateAndTime'] = pd.to_datetime(df['DateAndTime'], format="%m/%d/%Y %H:%M:%S.%f", errors = 'coerce')


## Format the dataframe

In [4]:
df.drop('Status', axis=1, inplace=True)
df.drop('Marker', axis=1, inplace=True)
df.drop('Unnamed: 6', axis=1, inplace=True)
df.drop('Unnamed: 7', axis=1, inplace=True)

In [5]:
df = df.dropna()

In [6]:
#df.head(20)

## Remove Extraneous 1's and 0's

In [7]:
# first remove 0's with the same timestamp right next to each other

for i in reversed(range(1, len(df))):
    if df.loc[i, 'Val'] == 0 and df.loc[i-1, 'Val'] == 0 and df.loc[i, 'DateAndTime'] == df.loc[i-1, 'DateAndTime']:
        # drop both rows if conditions are met
        df = df.drop([i, i-1]).reset_index(drop=True)
    

# Section 2

In this section I will create a new dataframe called calc_df, which will hold all of our data going forward. In section 2 I'll begin by filling the new dataframe with only time stamps containing pumps turning on or off (Val column 1 for on and 0 for off). From there I will convert both on and off timestamps into Unix time and calculate the difference in time between the pump turning on and off. Remove the # in front of any of the calc_df lines to see a preview of the dataframe.

## Let's set up our new dataframe

In [8]:
calc_df = pd.DataFrame()
calc_df['StartDateAndTime'] = ()
calc_df['EndDateAndTime'] = ()
calc_df['StartUnix'] = float()
calc_df['EndUnix'] = float()
calc_df['Time_diff'] = ()
calc_df['Pump_num'] = ()

## Here let's fill calc_df with On and Off times + Pump Numbers

In [None]:
has_date = False
last_index = -1
on = 1
off = 0

for index, row in df.iterrows():
    if row['Val'] == 1:
        tag = row['TagIndex']
        calc_df = calc_df.append({'StartDateAndTime': row['DateAndTime']}, ignore_index=True)
        last_index += 1
        calc_df.at[last_index, 'Pump_num'] = tag
        #calc_df.at[last_index, 'On/Off'] = on
        has_date = True
        
    if row['Val'] == 0 and has_date:
        tag = row['TagIndex']
        calc_df.at[last_index, 'EndDateAndTime'] = row['DateAndTime']
        calc_df.at[last_index, 'Pump_num'] = tag
        #calc_df.at[last_index, 'On/Off'] = off

In [10]:
calc_df['EndDateAndTime'] = pd.to_datetime(calc_df['EndDateAndTime'])

#calc_df.head(20)

## Let's delete any rows that don't have either a start or end time

In [11]:
calc_df = calc_df.dropna(subset=['StartDateAndTime', 'EndDateAndTime'])

# reset the index
calc_df = calc_df.reset_index(drop=True)


In [13]:
#calc_df.head(20)

## Now let's get Unix time for each timestamp

In [14]:
calc_df['StartUnix'] = pd.to_datetime(calc_df['StartDateAndTime']).astype(int)/ 10**9
calc_df['EndUnix'] = pd.to_datetime(calc_df['EndDateAndTime']).astype(int) / 10**9

In [15]:
#calc_df.head(20)

## Calculating the time difference between Pumps turning on and off

In [16]:
for index,row in calc_df.iterrows():
    result = (row['EndUnix']- row['StartUnix'])/60
    calc_df.at[index, 'Time_diff'] = result

In [17]:
#calc_df

# Section 3

In section 3 I will add elevation difference, volume per foot, observed pumping flow rate, average flow rate per pump, and average observed pumping flow rate (gpm).

Elevation Diference - This value can be changed for the desired lift station. Please note that each lift station will have a different elevation level. To change the elevation for a different lift station, simply change the 2.2 in the first cell below.

Observed pumping flow rate - To calculate this value we first need to input the wetwell's diameter, again this value will differ per each lift station. Simply change the diameter in the diam_well = (5) statement to the desired value. From there the vol_per_foot calculation will update and be added to the calc_df. Now we have all the information in the calc_df to calculate observed pumping flow rate. I'll take the elevation difference value times the volume per foot value, and divide the result by our time difference. The resulting value will be added to the calc_df in the observed pumping flow rate column.

Average flow rate per pump - Average flow rate per pump is calculated by adding the values in the observed pumping flow rate column for each individual pump, and then dividing that sum by the count of values for that pump. So if I iterate for 4 rows of the calc_df, and pump 1 has 3 out of the 4 rows with an observed pumping flow rate of 100, 120, and 100, the formula will sum these three rows (320) and divide by the amount of rows that were related to pump 1 (3). The average flow rate per pump 1 in row 4 would be 106.

Average observed pumping flow rate (gpm) - to calculate this value I simply add all the average flow rates for both pumps and divide the sum by the count of rows I have iterated over. For example if I have iterated over 4 rows, and 2 of the rows are pump 1 and 2 are pump 2, i'll add up the values from both pumps (100,110,100,120) and divide the sum by 4 to calculate the average observed pumping flow rate in GPM of 107.5.


## Add in Elevation Difference

In [18]:
calc_df['Elevation_dif'] = 2.2

In [19]:
#calc_df

## Calculate Observed Pumping Flow Rate

### First Lets get the volume per foot for our well

In [20]:
#Plug in diameter of wet well in ft.

diam_well = 5


#Calculate volume per foot

vol_per_foot = ((diam_well*diam_well) * 3.14)/4*7.48
vol_per_foot = round(vol_per_foot)

In [21]:
calc_df['volume_per_foot'] = vol_per_foot

In [22]:
#calc_df

## Now let's calculate our observed pumping Flow Rate

In [23]:
calc_df['observed_pumping_flow_rate'] = int()

In [24]:
for index, row in calc_df.iterrows():
    result = round((row['Elevation_dif'] * row['volume_per_foot'])/row['Time_diff'])
    calc_df.at[index,'observed_pumping_flow_rate'] = result

In [25]:
#calc_df

## Now Let's Calculate Average Flow Rate Per Pump

In [26]:
calc_df['avg_flow_rate_pump'] = int()

In [27]:
cum_sum_1 =0
cum_sum_2 =0
count_1 =0
count_2 =0

for index, row in calc_df.iterrows():
    if row['Pump_num'] == 975:
        cum_sum_1 += row['observed_pumping_flow_rate']
        count_1 += 1
        calc_df.at[index, 'avg_flow_rate_pump'] = cum_sum_1 / count_1
    if row['Pump_num'] == 974:
        cum_sum_2 += row['observed_pumping_flow_rate']
        count_2 += 1
        calc_df.at[index, 'avg_flow_rate_pump'] = cum_sum_2 / count_2
    

In [28]:
#calc_df

## Finally Let's calculate our Average Observed Pumping Flow Rate (gpm)

In [29]:
calc_df['average_observed_pumping_flow_rate'] = int()

In [30]:
cum_sum = 0
count = 0

for index, row in calc_df.iterrows():
    cum_sum += row['avg_flow_rate_pump']
    count += 1
    calc_df.at[index, 'average_observed_pumping_flow_rate'] = cum_sum / count


In [31]:
#calc_df.head(20)

# Section 4

In this section I will calculate the time to fill, inflow, and average inflow. These values need to be accounted for when calculating flow rate, as even when the pumps are turned on within a wet well and the level is dropping, water is still flowing through the pipes into the well.

Time to fill - To calculate time to fill for each row in calc_df I take the stop unix time of the first row within calc_df (when the pump turned off) and subtract this value from the start unix time off the next row (when the pump turned back on). The difference between the pump turning off and the next pump turning on is my time to fill measure.

Inflow - To calculate my inflow value for each row in calc_df, I take the row's volume per foot value and multiply by the row's elevation difference value. I then divide that result by the row's time to fill value we just calculated.

Average Inflow - To calculate my average inflow for each row in calc_df, I simply add all the values for inflow in the row's I have iterated over and divide by the count of those rows. So if I have iterated over 4 rows so far in calc_df and the inflow values are (60,40,20,60) I sum these values and divide by four to get my average inflow value in row 4, 45.

## Calculate time to fill

In [32]:
calc_df['Time_to_Fill'] = float()

In [33]:
stop_list= []

for index, row in calc_df.iterrows():
    stop = row['EndUnix']
    stop_list.append(stop)

start_list = []

for index, row in calc_df.iloc[1:].iterrows():
    start = row['StartUnix']
    start_list.append(start)
    

In [34]:
index = 1

for start, end in zip(start_list, stop_list):
    result = (start - end) /60
    #print(result)
    calc_df.at[index, 'Time_to_Fill'] = result
    index += 1

In [35]:
#calc_df

## Calculate Inflow and Average Inflow

In [36]:
calc_df['Inflow'] = float()
calc_df['Avg_Inflow'] = float()

In [37]:
for index, row in calc_df.iterrows():
    if row['Time_to_Fill'] != 0:
        inflow = row['volume_per_foot'] * row['Elevation_dif'] / row['Time_to_Fill']
        calc_df.at[index, 'Inflow'] = inflow
    else:
        pass

In [38]:
cum_sum = 0
count = 0

for index, row in calc_df.iterrows():
    cum_sum += row['Inflow']
    count += 1
    calc_df.at[index, 'Avg_Inflow'] = cum_sum / count


In [39]:
#calc_df

# Section 5

In this section I will calculate the actual pump flow rate in GPM, average flow rate per pump in GPM, and average pump flow rate in GPM.

Actual pump flow rate - To calculate actual pump flow rate for each row in calc_df, I simply take the row's observed pumping flow rate value we calculated earlier, and add the row's inflow value.

Average flow rate per pump - To calculate the average flow rate per pump I add the values in the actual pump flow rate column for each individual pump, and then divide that sum by the count of values for that pump. So if I iterate for 4 rows of the calc_df, and pump 1 has 3 out of the 4 rows with an actual pump flow rate of 100, 120, and 100, the formula will sum these three rows (320) and divide by the amount of rows that were related to pump 1 (3). The average flow rate per pump 1 in row 4 would be 106.

Average pump flow rate - to calculate this value I simply add all the actual pump flow rates for both pumps and divide the sum by the count of rows I have iterated over. For example if I have iterated over 4 rows, and 2 of the rows are pump 1 and 2 are pump 2, i'll add up the values from both pumps (100,110,100,120) and divide the sum by 4 to calculate the average pump flow rate in GPM of 107.5.

## Calculate Actual Pump Flow Rate (GPM)

In [40]:
calc_df['Actual_Pump_Flow_Rate'] = float()

In [44]:
for index, row in calc_df.iterrows():
    result = round(row['observed_pumping_flow_rate'] + row['Inflow'])
    calc_df.at[index, 'Actual_Pump_Flow_Rate'] = result

In [45]:
#calc_df.head(20)

## Finally Let's calculate our Average Flow Rate Per Pump and the Average Pump Flow Rate in (GPM)

In [46]:
calc_df['Average_Pump_1_Flow_Rate'] = int()
calc_df['Average_Pump_2_Flow_Rate'] = int()
calc_df['Average_Pump_Flow_Rate(1&2)'] = int()

In [47]:
cum_sum = 0
count = 0

for index, row in calc_df.iterrows():
    if row['Pump_num'] == 974:
        cum_sum += row['Actual_Pump_Flow_Rate']
        count += 1
        calc_df.at[index,'Average_Pump_1_Flow_Rate'] = cum_sum / count

In [48]:
cum_sum = 0
count = 0

for index, row in calc_df.iterrows():
    if row['Pump_num'] == 975:
        cum_sum += row['Actual_Pump_Flow_Rate']
        count += 1
        calc_df.at[index,'Average_Pump_2_Flow_Rate'] = cum_sum / count

In [49]:
cum_sum = 0
count = 0

for index, row in calc_df.iterrows():
    cum_sum += row['Actual_Pump_Flow_Rate']
    count += 1
    calc_df.at[index,'Average_Pump_Flow_Rate(1&2)'] = cum_sum / count

In [50]:
calc_df

Unnamed: 0,StartDateAndTime,EndDateAndTime,StartUnix,EndUnix,Time_diff,Pump_num,Elevation_dif,volume_per_foot,observed_pumping_flow_rate,avg_flow_rate_pump,average_observed_pumping_flow_rate,Time_to_Fill,Inflow,Avg_Inflow,Actual_Pump_Flow_Rate,Average_Pump_1_Flow_Rate,Average_Pump_2_Flow_Rate,Average_Pump_Flow_Rate(1&2)
0,2022-09-01 09:03:12,2022-09-01 09:05:20,1.662023e+09,1.662023e+09,2.133333,975.0,2.2,147,152,152.000000,152.000000,0.000000,0.000000,0.000000,152.0,0.000000,152.000000,152.000000
1,2022-09-01 09:08:21,2022-09-01 09:09:47,1.662023e+09,1.662023e+09,1.433333,974.0,2.2,147,226,226.000000,189.000000,3.016667,107.204420,53.602210,333.0,333.000000,0.000000,242.500000
2,2022-09-01 09:12:48,2022-09-01 09:14:57,1.662024e+09,1.662024e+09,2.150000,975.0,2.2,147,150,151.000000,176.333333,3.016667,107.204420,71.469613,257.0,0.000000,204.500000,247.333333
3,2022-09-01 09:18:01,2022-09-01 09:19:26,1.662024e+09,1.662024e+09,1.416667,974.0,2.2,147,228,227.000000,189.000000,3.066667,105.456522,79.966340,333.0,333.000000,0.000000,268.750000
4,2022-09-01 09:22:32,2022-09-01 09:24:41,1.662024e+09,1.662024e+09,2.150000,975.0,2.2,147,150,150.666667,181.333333,3.100000,104.322581,84.837588,254.0,0.000000,221.000000,265.800000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4372,2022-09-22 17:16:10,2022-09-22 17:18:09,1.663867e+09,1.663867e+09,1.983333,975.0,2.2,147,163,198.980329,234.344729,3.900000,82.923077,84.097251,246.0,0.000000,284.586917,318.010976
4373,2022-09-22 17:22:04,2022-09-22 17:23:22,1.663867e+09,1.663867e+09,1.300000,974.0,2.2,147,249,269.295923,234.352719,3.916667,82.570213,84.096902,332.0,351.967476,0.000000,318.014175
4374,2022-09-22 17:27:09,2022-09-22 17:29:08,1.663868e+09,1.663868e+09,1.983333,975.0,2.2,147,163,198.963877,234.344630,3.783333,85.480176,84.097218,248.0,0.000000,284.570187,317.998171
4375,2022-09-22 17:32:51,2022-09-22 17:34:11,1.663868e+09,1.663868e+09,1.333333,974.0,2.2,147,243,269.283883,234.352615,3.716667,87.013453,84.097884,330.0,351.957418,0.000000,318.000914


In [None]:
calc_df.to_csv("Model Data for Kelly Lift Station.csv")