### User input

In [None]:
decimal='.'                                                               # decimal of the input file
sep=';'                                                                   # separator of the input file

from config import *                                                      # Personal settings of local user to set input and output directories
input_dir = input_directory + 'results/df_dtypecor_sw.csv'                # input directory
output = output_directory + 'results/'                                    # output directory

dateheadername = 'Timestamp'                                              # header of the date  
header = 0                                                                # header row number
sample_name = 'sw'                                                        # name of the sample

### Start environment and import data

In [None]:
import abspectroscopy_functions as abspy # Functions from the AbspectroscoPY toolbox
import pandas as pd

df = pd.read_csv(input_dir, sep = sep, header = header, index_col = 0) 
df

### Remove missing data before applying nan_check

In [None]:
df = df.drop(df[df.isnull().all(axis=1)].index)  # drop rows containing only missing data
df = df.dropna(axis=1, how='all', inplace=False) # drop columns containing only missing data
#df

### nan_check

In [None]:
def nan_check(df_in, 
              dateheadername):
    '''
    function to quantify missing data per column and per row in percentage
    :argument df_in: dataframe in input
    :argument dateheadername: name of the date column
    :return: two dataframes with percentages of missing data per column and row
    '''    
    df_in = df_in.reset_index()
    nan_col = df_in.isnull().sum()           # check missing data per column
    rownr = len(df_in)  
    df_out1 = df_in.isnull().sum()/rownr*100 # check missing data per column in percentage
    nan_row = pd.DataFrame(columns=[dateheadername,'missing data'])
    colnr = len(df_in.columns)    
    for i in range(rownr):                   # check missing data per row
        nan_per_row = pd.DataFrame([[df_in[dateheadername][i], df_in.iloc[i].isnull().sum()]], columns=[dateheadername,'missing data'])
        nan_row = nan_row.append(nan_per_row)     
    nan_row.set_index(dateheadername, inplace = True)
    df_out2 = nan_row/colnr*100
    return(df_out1, df_out2)

In [None]:
nan_colper, nan_rowper = nan_check(df, dateheadername)
nan_colper.to_csv(output + 'missing_data_per_column_percent.csv', sep = sep, decimal = decimal, index=True) # export the missing data per column and row as percentage
nan_rowper.to_csv(output + 'missing_data_per_row_percent.csv', sep = sep, decimal = decimal, index=True)
nan_colper, nan_rowper

In [None]:
df.to_csv(output + 'df_nonan_' + str(sample_name) + '.csv', sep = sep, decimal = decimal, index=True) # export the dataframe without missing data
#df