In [1]:
import pandas as pd
import numpy as np
import math
import datetime

In [15]:
#---------------------------------------------------------#
# Prototyping
#---------------------------------------------------------#

In [7]:
def read_data(name):
    # file location for the csv file
    file = name #"./manual_data.csv"
    # return pandas dataframe of the csv file
    df = pd.read_csv(file)
    # remove all NaN entries
    df.drop(df.filter(regex="Unname"),axis=1, inplace=True)
    a=df.iterrows()
    i = 0
    for index,row in a:
        if math.isnan(row[2]):
            break
        else:
            i = i + 1
    table = df[0:i]
    
    return table

In [8]:
def sort_dates(eds_cols, window):
    # declare initial variables
    col_name = 'Date'
    counter = 0
    date_string = ''
    new_col = {col_name:[]}
    # go through the Date column of the noon data csv file
    for x in df[col_name]:
        if counter == 0:
            # store the first date to date_string
            date_string = date_string + x + '-'
            # increase counter
            counter = counter + 1
        elif counter == (window-1):
            # store the last date
            date_string = date_string + x
            # store to dictionary for dataframe for pre and post entry
            new_col[col_name].append(date_string)
            new_col[col_name].append(date_string)
            # reset date_string
            date_string = ''
            # reset counter
            counter = 0
        else:
            # increase counter
            counter = counter + 1
    eds_cols.update(new_col)
    return eds_cols

In [9]:
def sort_time(eds_cols, window):
    # declare initial variables
    col_name = 'Time'
    counter = 0
    index_counter = 0
    pre_time_data=[]
    post_time_data=[]
    new_col = {col_name:[]}
    # go through the Time column of the noon data csv file
    for x in df[col_name]:
        # convert time to seconds
        a,b,c = x.split(':')
        hour = int(a)*60*60
        minute = int(b)*60
        second = int(c)
        total_sec = hour + minute + second
        if counter == ((window*2)-1):
            # append the nth data for post time
            post_time_data.append(total_sec)
            index_counter = 0
            # take average seconds
            pre_time_avg = sum(pre_time_data)/window
            post_time_avg = sum(post_time_data)/window
            # convert seconds back to time string using datetime
            pre_time = str(datetime.timedelta(seconds = int(pre_time_avg)))
            post_time = str(datetime.timedelta(seconds = int(post_time_avg)))
            # append to the dictionary
            new_col[col_name].append(pre_time)
            new_col[col_name].append(post_time)
            # reset counter
            counter = 0
            pre_time_data = []
            post_time_data = []
        else:
            # seperate pre and post time data points
            if index_counter % 2 == 0:
                # append time data for pre measurements
                pre_time_data.append(total_sec)
                index_counter = index_counter + 1
            else:
                # append time data for post measurements
                post_time_data.append(total_sec)
                index_counter = 0
            # increase counter
            counter = counter + 1
    eds_cols.update(new_col)
    return eds_cols

In [10]:
def sort_labels(eds_cols, window):
    # declare initial variables
    col_name = 'EDS(#)'
    col_name2 = 'EDS/CTRL(#)'
    counter = 0
    new_col = {col_name:[]}
    new_col2= {col_name2:[]}
    # go through the labels column of the noon data csv file
    for x in df[col_name]:
        if counter == ((window*2)-1):
            # append PRE
            new_col[col_name].append("1")
            # append POST
            new_col[col_name].append("1")
            # append EDS number
            new_col2[col_name2].append("EDS1")
            new_col2[col_name2].append("EDS1")
            # reset counter
            counter = 0
        else:
            # increase counter
            counter = counter + 1
    eds_cols.update(new_col)
    eds_cols.update(new_col2)
    return eds_cols

In [11]:
def sort_data(name, eds_cols, window):
    # declare initial variables
    col_name = name #'Temperature(C)'
    counter = 0
    index_counter = 0
    pre_data = []
    post_data = []
    pre_avg = 0
    post_avg = 0
    new_col = {col_name:[]}
    # go through the measurements data columns of the noon data csv file
    for x in df[col_name]:
        if counter == ((window*2)-1):
            # seperate pre and post for nth data
            post_data.append(x)
            index_counter=0
            # get average value from pre post lists
            pre_avg = sum(pre_data)/window
            post_avg = sum(post_data)/window
            # append results to new dataframe
            new_col[col_name].append(pre_avg)
            new_col[col_name].append(post_avg)
            # reset counter
            counter = 0
            pre_data=[]
            post_data=[]
        else:
            # seperate pre and post data points
            if index_counter % 2 == 0:
                pre_data.append(x)
                index_counter = index_counter + 1
            else:
                post_data.append(x)
                index_counter = 0
            # increase counter
            counter = counter + 1
    eds_cols.update(new_col)
    return eds_cols

In [14]:
def get_avg_noon_data(cols_list, window):
    # read the noon_data csv file
    df = read_data("./manual_data.csv")
    # declare new dictionary for avg data
    eds_cols = {}
    # sort the date
    eds_cols = sort_dates(eds_cols, window)
    # sort the time
    eds_cols = sort_time(eds_cols, window)
    # sort the pre/post, EDS number
    eds_cols = sort_labels(eds_cols, window)
    # sort all the numerical data
    for x in cols_list:
        eds_cols = sort_data(x, eds_cols, window)
    # create new dataframe
    eds_df = pd.DataFrame(eds_cols)
    return eds_df

In [None]:
# declare metrics for noon data table
cols_list = ['Temperature(C)', 'Humidity(%)', 'GPOA(W/M2)', 'OCV(V)', 'SCC(A)', 'Power(W)', 'PR', 'SR']
# call the function
eds = get_avg_noon_data(cols_list, 10)
eds

In [12]:
table = read_data("./manual_data.csv")

In [13]:
table

Unnamed: 0,Date,Time,Temperature(C),Humidity(%),GPOA(W/M2),EDS(#),OCV_Before(V),OCV_After(V),SCC_Before(A),SCC_After(A),EDS_PWR_Before(W),EDS_PWR_After(W),EDS_PR_Before,EDS_PR_After,EDS_SR_Before,EDS_SR_After
0,1/21/2020,14:16:12,25.1,18.9,-1,1,0.307863,0.307863,0.030797,0.030797,0.004617,0.004617,-1,-1,-1,-1
1,1/22/2020,14:30:03,24.5,19.0,-1,1,0.307863,0.307863,0.030797,0.030797,0.004672,0.004672,-1,-1,-1,-1
2,1/23/2020,14:30:22,24.5,18.0,-1,1,0.246291,0.307863,0.030797,0.030797,0.003335,0.004672,-1,-1,-1,-1
3,1/24/2020,12:30:42,26.0,17.0,-1,1,0.307863,0.307863,0.030797,0.030797,0.003368,0.003368,-1,-1,-1,-1
4,1/25/2020,12:30:41,32.3,21.0,-1,1,0.307863,0.307863,0.030797,0.020531,0.00405,0.0027,-1,-1,-1,-1
5,1/26/2020,12:41:21,31.0,22.1,-1,1,0.307863,0.307863,0.030797,0.030797,0.004141,0.004141,-1,-1,-1,-1
6,1/27/2020,12:30:42,28.0,19.0,-1,1,0.307863,0.307863,0.030797,0.030797,0.003368,0.003368,-1,-1,-1,-1
7,1/28/2020,12:30:41,32.3,18.0,-1,1,0.307863,0.307863,0.030797,0.020531,0.00405,0.0027,-1,-1,-1,-1
8,1/29/2020,12:41:21,31.0,22.1,-1,1,0.307863,0.307863,0.030797,0.030797,0.004141,0.004141,-1,-1,-1,-1
9,1/30/2020,12:30:42,27.0,20.0,-1,1,0.307863,0.307863,0.030797,0.030797,0.003368,0.003368,-1,-1,-1,-1
