In [188]:
import pandas as pd


In [189]:
#read data into dataframes
df_sensors = pd.read_csv('data\dane_z_czujnikow.csv', sep=';')
df_inverters = pd.read_csv('data\dane_z_falownikow.csv', sep=';')
df_positions = pd.read_csv('data\dane_z_czujnikow_z_planow_farmy.csv', sep=';')


In [190]:
#drop accidental empty column
df_positions.drop('Unnamed: 2', axis=1, inplace=True)

In [191]:
def round_time(df):
    df['time'] = pd.to_datetime(df['time'])
    df['time'] = df['time'].dt.round('5min')
    df['time'] = df['time'].dt.time

In [192]:
round_time(df_sensors)
round_time(df_inverters)

  df['time'] = pd.to_datetime(df['time'])
  df['time'] = pd.to_datetime(df['time'])


In [193]:
#function filtering records that are out of bounds for a given record
def verify_bounds(sensor_df, column_name, output_name, min_range, max_range, error):
    filtered_df_below = sensor_df[min_range - error > sensor_df[column_name]]
    filtered_df_above = sensor_df[sensor_df[column_name] > max_range + error]
    filtered_df = pd.concat([filtered_df_below, filtered_df_above])
    return filtered_df[[output_name]]

#function that returns elements that are present in df1 and not in df2
def find_missing_devices(df_searched, df_with_records_to_find, identifier_column):
    result = pd.merge(df_searched, df_with_records_to_find, on=identifier_column, how='left', indicator=True)
    result = result[result['_merge'] == 'left_only']
    result = result.drop(columns='_merge')
    return result[[identifier_column]]  

def raise_error_status(df, df_errors, error_code):
    for index, _ in df_errors.iterrows():
        df.at[index, 'status'] += error_code
    return df

def recreate_missing_values(df, id_of_object_missing_data):

    original_filtered = df[df['sensor_number'] == id_of_object_missing_data]

    start_date = '06:05:00'
    end_date = '15:20:00'
    time_range = pd.date_range(start=start_date, end=end_date, freq='5T')
    time_range = time_range.time
    
    default_values = {'sensor_number': id_of_object_missing_data,
                      'panel_voltage': None,
                      'panel_currency':None,
                      'panel_temperature':None,
                      'signal_power':None,
                       'status': 16
                       }  # Define default values
    time_series = pd.Series(data=time_range, name='time')
    default_df = pd.DataFrame(index=time_series, data=default_values)
    original_filtered.set_index('time', inplace=True)
    default_df.update(original_filtered)
    default_df.reset_index(inplace=True)
    
    original_filtered_without_incomplete_data = df[df['sensor_number'] != id_of_object_missing_data]
    return pd.concat([default_df, original_filtered_without_incomplete_data])



In [194]:
#Checking parameters for sensors
invalid_sensors_voltage = verify_bounds(df_sensors, 'panel_voltage', 'sensor_number', min_range=0, max_range=39.1, error=0.5)
invalid_sensors_current = verify_bounds(df_sensors, 'panel_currency', 'sensor_number', min_range=0, max_range=9.15, error=0.2)
invalid_sensors_temperature = verify_bounds(df_sensors, 'panel_temperature', 'sensor_number', min_range=-40, max_range=120, error=1)

df_sensors_missing_from_the_farm = find_missing_devices(df_sensors, df_positions, 'sensor_number')
df_sensors_of_unknown_origin = find_missing_devices(df_positions, df_sensors, 'sensor_number')

# checking if sensors were not dropping packets. 
# for our timeframe there should be 112 measurements
# sensors 1714, 1724 missed one packet each
df_sensors['sensor_number'].value_counts()

df_sensors['status'] = 0 
df_sensors = raise_error_status(df_sensors, invalid_sensors_voltage, 1)
df_sensors = raise_error_status(df_sensors, invalid_sensors_current, 2)
df_sensors = raise_error_status(df_sensors, invalid_sensors_temperature, 4)
df_sensors = raise_error_status(df_sensors, df_sensors_of_unknown_origin, 8)
df_sensors = recreate_missing_values(df_sensors, 1714)
df_sensors = recreate_missing_values(df_sensors, 1724)
df_sensors[df_sensors['status'] > 0]


Unnamed: 0,time,sensor_number,panel_voltage,panel_currency,panel_temperature,signal_power,status
87,13:20:00,1724,,,,,16
87,13:20:00,1714,,,,,16
2423,06:10:00,427,70.352674,0.621157,-0.04,-12.969336,1
5936,06:15:00,1943,33.238049,0.742368,-1.32,-5.550086,8
9268,06:25:00,1279,-32.865461,1.076609,-0.66,6.664527,1
130369,11:30:00,501,32.984863,6.631402,21.09,-14.388079,8
220264,15:15:00,486,31.81612,0.494646,4000.0,-12.336726,4
222634,15:20:00,858,31.583484,0.355887,-2000.0,-3.578061,4
223093,15:20:00,1318,31.173979,0.357905,-33333.0,6.623326,4
223279,15:20:00,1504,31.701001,44.0,10.63,-3.754358,2


In [195]:
#Checking parameters for inverters

invalid_inverters_voltage_not_working = df_inverters[df_inverters['inverter_voltage'] < 200]
invalid_inverters_voltage_not_working = invalid_inverters_voltage_not_working[invalid_inverters_voltage_not_working['inverter_voltage'] > 20]                          
invalid_inverters_voltage_line_break = df_inverters[df_inverters['inverter_voltage'] < 20]
invalid_inverters_voltage_line_break = invalid_inverters_voltage_line_break[invalid_inverters_voltage_line_break['inverter_voltage'] > 0] 
invalid_inverters_voltage = verify_bounds(df_inverters, 'inverter_voltage', 'inverter_number', min_range=0, max_range=2500, error=20)
invalid_inverters_current = verify_bounds(df_inverters, 'inverter_current', 'inverter_number', min_range=0, max_range=20, error=0.1)

df_inverters_missing_from_the_farm =find_missing_devices(df_inverters, df_positions, 'inverter_number')
df_inverters_of_unknown_origin =find_missing_devices(df_positions, df_inverters, 'inverter_number')

# checking if sensors were not dropping packets. 
# for our timeframe there should be 112 measurements
# sensors 24, 33 missed one packet each
df_inverters['inverter_number'].value_counts()


df_inverters['status'] = 0
df_inverters = raise_error_status(df_inverters, invalid_inverters_voltage_not_working, 1)
df_inverters = raise_error_status(df_inverters, invalid_inverters_voltage_line_break, 2)
df_inverters = raise_error_status(df_inverters, invalid_inverters_voltage, 4)
df_inverters = raise_error_status(df_inverters, invalid_inverters_current, 8)
df_inverters[df_inverters['status'] > 0]


Unnamed: 0,time,inverter_number,inverter_voltage,inverter_current,status
115,06:15:00,16,3333.0,0.755437,4
3427,11:45:00,28,1304.459383,50.0,8
5348,14:55:00,60,-1000.0,1.775472,4


In [196]:
invalid_inverters_voltage_line_break

Unnamed: 0,time,inverter_number,inverter_voltage,inverter_current


In [197]:
print("invalid sensors")
print(invalid_sensors_voltage)
print(invalid_sensors_current)
print(invalid_sensors_temperature)
print(df_sensors_of_unknown_origin)
print(df_sensors_missing_from_the_farm)
print("invalid inverters")
print(invalid_inverters_voltage)
print(invalid_inverters_current)
print(df_inverters_missing_from_the_farm)
print(df_inverters_of_unknown_origin)

invalid sensors
        sensor_number
9268             1279
223763           1988
2423              427
        sensor_number
223279           1504
223570           1795
        sensor_number
222634            858
223093           1318
220264            486
        sensor_number
5936               54
130369           1166
        sensor_number
173826              0
175823           3333
invalid inverters
      inverter_number
5348               60
115                16
      inverter_number
3427               28
        inverter_number
213920               60
213921               60
Empty DataFrame
Columns: [inverter_number]
Index: []


In [198]:
df_sensors['sensor_number']

0         1724
1         1724
2         1724
3         1724
4         1724
          ... 
223771    1996
223772    1997
223773    1998
223774    1999
223775    2000
Name: sensor_number, Length: 223778, dtype: int64