In [1]:
import pandas as pd

In [2]:
#Combine SAP and POLON data on the basis of the teacher's PESEL number
#The "outer" option ensures that the function will return all rows from both DataFrames, even if there are no matches

def techers_compare(sap_t, polon_t):
    t_compare = sap_t.merge(polon_t,
                        left_on = 'SAP_PESEL',
                        right_on = 'POLON_PESEL',
                        how = 'outer')

    #Replace missing values with the text "no data"
    t_compare = t_compare.fillna('no data')

    #Select columns that are relevant for comparing teacher data
    t_compare = t_compare[['SAP_Administrator_kadrowy', 'SAP_Nr osob.', 'SAP_Nazwisko_imie',
                        'POLON_Nazwisko_imie', 'SAP_Podgrupa pracowników', 'POLON_Grupa_stanowisk',
                        'SAP_Stanowisko', 'POLON_Stanowisko',
                        'SAP_Etat', 'POLON_Etat',
                        'SAP_Płeć', 'POLON_Płeć',
                        'SAP_Tytuł', 'POLON_Tytuł',
                        'SAP_Podst. Miejs. Pracy', 'POLON_Podst. miej. pracy',
                        'SAP_Obywatelstwo', 'POLON_Obywatelstwo']]
    return t_compare


In [3]:
#Combine SAP and POLON data on the basis of the employee's PESEL number

def not_techers_compare(sap_nt, polon_nt):
    nt_compare = polon_nt.merge(sap_nt,
                                left_on = 'POLON_PESEL',
                                right_on = 'SAP_PESEL',
                                how = 'left')
    #Replace missing values with the text "no data"
    nt_compare = nt_compare.fillna('no data')

    #Select columns that are relevant for comparing non-teacher data
    nt_compare = nt_compare[['SAP_Administrator_kadrowy', 'SAP_Nr osob.', 'SAP_Nazwisko_imie',
                           'POLON_Nazwisko_imie', 'SAP_Etat', 'POLON_Etat', 'SAP_Płeć', 'POLON_Płeć',
                           'SAP_Tytuł', 'POLON_Tytuł', 'SAP_Podst. Miejs. Pracy', 'POLON_Podst. miej. pracy',
                           'SAP_Obywatelstwo', 'POLON_Obywatelstwo']]
    return nt_compare

In [4]:
#Saves the results of the comparison to an Excel file
def save_to_excel(t_compare, nt_compare, path_temp_excel):
    with pd.ExcelWriter(path_temp_excel) as writer:
        t_compare.to_excel(writer, sheet_name='teachers', index=False)
        nt_compare.to_excel(writer, sheet_name='not_teachers', index=False)