Skip to content

Commit

Permalink
monotonic increase filtering before integration
Browse files Browse the repository at this point in the history
  • Loading branch information
polmonso committed Oct 15, 2020
1 parent a33cf04 commit d476e69
Showing 1 changed file with 26 additions and 1 deletion.
27 changes: 26 additions & 1 deletion scripts/integral_batch_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
from datetime import datetime
import chardet


def show_usage():
print("usage: column2integrate input_data_file output_data_file csv_delimiter decimal\n\
e.g. python ./integral_batch_file.py \' Irradiation\' dades_in.csv dades_out.csv ';' ','")


def trapezoidal_approximation(ordered_sensors, from_date, to_date, outputDataFormat='%Y-%m-%d %H:%M:%S%z', timeSpacing=5./60., column2integrateTitle=0):
''' Trapezoidal aproximation of the 24 hours following first sensor entry'''

Expand All @@ -36,23 +38,27 @@ def trapezoidal_approximation(ordered_sensors, from_date, to_date, outputDataFor
integrals.append((timeString, test_integral))
return integrals


def to_ECT_csv(integrals):
'''TODO handle tz-aware output instead ot delivering UTC and better UTC-sandwich'''
return


def asciigraph_print(tuple_array, scale_factor = 10):
'''for the lulz'''

for t, v in tuple_array:
print("{} {} {:.2f}".format(t, v if pd.isna(v) else '=' * int(v/scale_factor), v) )


def find_encoding(fname):
with open(fname, 'rb') as f:
r_file = f.read()
result = chardet.detect(r_file)
charenc = result['encoding']
return charenc


def parse_csv(input_data_file, column2integrate, delimiter, decimal):

guessed_encoding = find_encoding(input_data_file)
Expand All @@ -78,6 +84,7 @@ def parse_csv(input_data_file, column2integrate, delimiter, decimal):

return sensors


def parse_xlsx(input_data_file, column2integrate):

columnNames = pd.read_excel(io=input_data_file, header=0).columns
Expand All @@ -94,6 +101,14 @@ def parse_xlsx(input_data_file, column2integrate):

return ordered_sensors


def dropNonMonotonicRows(df):
''' strictly monotonic increasing '''
anomalies = [r[1] for r in zip(df.index, df.index[1:]) if r[0] >= r[1]]
df.drop(index=anomalies, inplace=True)
return anomalies


def main():

if len(sys.argv[1:]) != 5:
Expand Down Expand Up @@ -136,14 +151,24 @@ def main():
from_date = ordered_sensors.index[0].floor('d')
to_date = ordered_sensors.index[-1].ceil('d')

if not ordered_sensors.index.is_monotonic_increasing:
print("Index is NOT monotonic! Attempting automatic fix")
anomalies = dropNonMonotonicRows(ordered_sensors)
print("\n[WARNING] Dropped rows with dates: {}".format(anomalies))

integrals = trapezoidal_approximation(ordered_sensors, from_date, to_date, outputDataFormat, timeSpacing, columnTitle)

integralsDF = pd.DataFrame(data = integrals, columns = ['datetime', columnTitle])
integralsDF.to_csv(output_data_file, sep=';', decimal=',', encoding='utf-8', index=False)

print("Saved {} records from {} to {}".format(len(integralsDF), from_date, to_date))
asciigraph_print(integrals)

if anomalies:
print("\n[WARNING] Dropped rows with dates: {}".format(anomalies))

print("\nJob's done, have a good day\n")


if __name__ == "__main__":
main()
main()

0 comments on commit d476e69

Please sign in to comment.