diff --git a/scripts/integral_batch_file.py b/scripts/integral_batch_file.py index 8af81ece..bb59e7e5 100755 --- a/scripts/integral_batch_file.py +++ b/scripts/integral_batch_file.py @@ -7,10 +7,12 @@ from datetime import datetime import chardet + def show_usage(): print("usage: column2integrate input_data_file output_data_file csv_delimiter decimal\n\ e.g. python ./integral_batch_file.py \' Irradiation\' dades_in.csv dades_out.csv ';' ','") + def trapezoidal_approximation(ordered_sensors, from_date, to_date, outputDataFormat='%Y-%m-%d %H:%M:%S%z', timeSpacing=5./60., column2integrateTitle=0): ''' Trapezoidal aproximation of the 24 hours following first sensor entry''' @@ -36,16 +38,19 @@ def trapezoidal_approximation(ordered_sensors, from_date, to_date, outputDataFor integrals.append((timeString, test_integral)) return integrals + def to_ECT_csv(integrals): '''TODO handle tz-aware output instead ot delivering UTC and better UTC-sandwich''' return + def asciigraph_print(tuple_array, scale_factor = 10): '''for the lulz''' for t, v in tuple_array: print("{} {} {:.2f}".format(t, v if pd.isna(v) else '=' * int(v/scale_factor), v) ) + def find_encoding(fname): with open(fname, 'rb') as f: r_file = f.read() @@ -53,6 +58,7 @@ def find_encoding(fname): charenc = result['encoding'] return charenc + def parse_csv(input_data_file, column2integrate, delimiter, decimal): guessed_encoding = find_encoding(input_data_file) @@ -78,6 +84,7 @@ def parse_csv(input_data_file, column2integrate, delimiter, decimal): return sensors + def parse_xlsx(input_data_file, column2integrate): columnNames = pd.read_excel(io=input_data_file, header=0).columns @@ -94,6 +101,14 @@ def parse_xlsx(input_data_file, column2integrate): return ordered_sensors + +def dropNonMonotonicRows(df): + ''' strictly monotonic increasing ''' + anomalies = [r[1] for r in zip(df.index, df.index[1:]) if r[0] >= r[1]] + df.drop(index=anomalies, inplace=True) + return anomalies + + def main(): if len(sys.argv[1:]) != 5: @@ -136,6 +151,11 @@ def main(): from_date = ordered_sensors.index[0].floor('d') to_date = ordered_sensors.index[-1].ceil('d') + if not ordered_sensors.index.is_monotonic_increasing: + print("Index is NOT monotonic! Attempting automatic fix") + anomalies = dropNonMonotonicRows(ordered_sensors) + print("\n[WARNING] Dropped rows with dates: {}".format(anomalies)) + integrals = trapezoidal_approximation(ordered_sensors, from_date, to_date, outputDataFormat, timeSpacing, columnTitle) integralsDF = pd.DataFrame(data = integrals, columns = ['datetime', columnTitle]) @@ -143,7 +163,12 @@ def main(): print("Saved {} records from {} to {}".format(len(integralsDF), from_date, to_date)) asciigraph_print(integrals) + + if anomalies: + print("\n[WARNING] Dropped rows with dates: {}".format(anomalies)) + print("\nJob's done, have a good day\n") + if __name__ == "__main__": - main() + main()