In [7]:
class sensor():
    def __init__(self, fileName, sensorNum):
        self.parentFile = fileName
        self.sensor = sensorNum
        # self.sensorID TODO: WILL BE ADDED IN FUTURE UPDATE

In [8]:
class Data():

    def __init__(self, csvFile, slope):

        # Store File name and file as dataframe
        self.fileName = csvFile
        self.file = pd.read_csv(csvFile)

        self.dataVOut = self.file.loc[:, 'Vout0':'Vout15']
        self.dataVRef = self.file.loc[:, 'Vref0':'Vref15']

        # getting the resistance of each sensor and appending it to general file
        for s in range(0, 16):
            tempResistance = (self.file['Vref' + str(s)] * (5 - self.file['Vout' + str(s)])) / (
                    (self.file['Vout' + str(s)]) * (5 - self.file['Vref' + str(s)]))
            tempFrame = pd.DataFrame(tempResistance, columns=['Resistance' + str(s)])
            self.file = pd.concat([self.file, tempFrame], axis=1)

        self.resistance = self.file.loc[:, 'Resistance0':'Resistance15']

        self.dataRatio = self.dataVOut.to_numpy() / self.dataVRef.to_numpy()
        self.dataRatio = pd.DataFrame(self.dataRatio)

        # TODO: GET THIS BETTER AT SELECTING DATA

        self.partitionSpots = autoSelectData(self.resistance, slope)

        self.dataTarget = self.file.loc[:, 'Target ppm']

        self.dataHumid = self.file.loc[:, 'Humidity%']

        self.testSpots = selectPeriods(self.file, 225)

        self.sensors = []
        for s in range(0, 16):  # for each sensor
            self.sensors.append(sensor(self.fileName, s))

In [9]:
import numpy as np
import pandas as pd
import scipy.stats as st

def summarize(df: Data, N_sensors, partitionSpots):
    summary = pd.DataFrame()
    low, high = generateConfidinceInterval(partitionSpots, df.resistance, 16)
    for i in range(0, len(partitionSpots)):
        start = partitionSpots[i][0]
        end = partitionSpots[i][1]
        for s in range(0, N_sensors):  # for each sensor

            sensordf = pd.DataFrame(
                [df.sensors[s].sensor, df.dataTarget.loc[start],
                 df.resistance.loc[start:end, 'Resistance' + str(s)].mean(),
                 df.file.loc[start:end, 'SHTTemp(C)'].mean(),
                 df.file.loc[start:end, 'Humidity%'].mean(),
                 low[i][s], high[i][s],
                 df.dataRatio.loc[start:end].mean(numeric_only=True).mean()]).T
            sensordf.columns = ['SensorID', 'Target PPM', 'Resistance', 'Temperature',
                                'RelativeHumidity',
                                'lowInterval', 'highInterval',
                                'Ratio']
            summary = pd.concat([summary, sensordf], ignore_index=True)  # append as new row in summary df

    return summary



def generateConfidinceInterval(partitionSpots, resistance, nSensors):
    lowInterval = []
    highInterval = []

    for item in partitionSpots:
        subLow = []
        subHigh = []
        for i in range(0, nSensors):
            lowEnd = item[0]
            highEnd = item[1]
            tempDF = resistance.loc[lowEnd:highEnd, 'Resistance' + str(i)]
            tempLowInterval, tempHighInterval = st.t.interval(alpha=0.95, df=len(tempDF), loc=np.mean(tempDF),
                                                              scale=st.sem(tempDF))
            subLow.append(tempLowInterval)
            subHigh.append(tempHighInterval)
        lowInterval.append(subLow)
        highInterval.append(subHigh)

    return lowInterval,highInterval


def autoSelectData(resistance, slope):
    n = 200
    pointer1 = 0
    pointer2 = pointer1 + n
    selectedSpots = []
    # TODO: figure out optimal value for slopedif
    slopeDiff = slope
    currentSlope = 0
    lastSlope = 10
    len(resistance)
    while pointer2 <= len(resistance):

        if slopeDiff >= abs(currentSlope):
            testList = list(resistance[pointer1:pointer2].T.columns.values)
            b = np.polyfit(testList, resistance[pointer1:pointer2].T.mean(), 1)

            if currentSlope != 0:
                lastSlope = currentSlope
            currentSlope = b[0]
            pointer2 += n

            if pointer2 >= len(resistance) and slopeDiff >= abs(lastSlope):
                selectedSpots.append([pointer1, len(resistance) - 50])

        else:
            if slopeDiff >= abs(lastSlope):
                selectedSpots.append([pointer1, pointer2 - 50])
            pointer1 = pointer2
            pointer2 = pointer1 + n
            currentSlope = 0
            lastSlope = 10

    return selectedSpots


def selectPeriods(df, delta_t):
    pointer = 5
    endSpots = []
    setPoints = []
    while pointer < len(df) - 3:
        if (int(df.loc[pointer, 'Target ppm']) - int(df.loc[pointer - 1, 'Target ppm'])) != 0:
            endSpots.append(pointer - 1)
            pointer += 1
        else:
            pointer += 1
    endSpots.append(len(df) - 3)
    for item in endSpots:
        setPoints.append([item - delta_t, item])
    return setPoints

In [10]:
import glob
import os
import pandas as pd


def main():
    path = r'/Users/benfunk/DataspellProjects/MethaneDataScience/Raw Data'
    summary = pd.DataFrame()  # empty dataframe to hold summary results
    selectedData = pd.DataFrame()
    for filename in glob.glob(os.path.join(path, '*.csv')):
        with open(os.path.join(os.getcwd(), filename)) as file:
            myData = Data(csvFile=file, slope=6.7e-05)

            fsummary = summarize(df=myData, N_sensors=16, partitionSpots=myData.testSpots)
            summary = pd.concat([summary, fsummary], ignore_index=True)

    summary_fname = os.path.join('./Output', 'BenSummaryNew.csv')
    summary.to_csv(summary_fname)