In [4]:
import pandas as pd
import openpyxl
import re

In [5]:
file1 = pd.read_excel(r"output_files/2023_new.xlsx", sheet_name=None)
file2 = pd.read_excel("Template.xlsx", sheet_name=None)

In [6]:
# List of names to exclude
excluded_names = {
    'alan akda krisdandi',
    'nyoman surya adi putra masna',
    'vanessa rosalie lautan',
    'juan davin'
}

In [7]:
def exclude_names(df, name_column):
    # Convert names to lowercase for comparison consistency
    return df[~df[name_column].str.lower().isin(excluded_names)]

In [8]:
merged_sheets = {}

In [9]:
# Process each sheet
for sheet_name in file1.keys():
    print(f"Processing sheet: {sheet_name}")

    # Get the DataFrame for the current sheet from both files
    df1 = file1[sheet_name]
    df2 = file2[sheet_name]
    
    if 'Nama' in df1.columns:
        df1 = exclude_names(df1, 'Nama')
        
    if 'Nama' in df2.columns:
        df2 = exclude_names(df2, 'Nama')

    # Ensure the 'NIS' column exists in both DataFrames
    if 'NIS' not in df1.columns or 'NIS' not in df2.columns:
        raise KeyError(f"The 'NIS' column is missing in the sheet '{sheet_name}'.")

    # Find common `NIS` values
    nis_in_both = set(df1['NIS']).intersection(set(df2['NIS']))

    # Filter rows where `NIS` exists in both files
    filtered_df1 = df1[df1['NIS'].isin(nis_in_both)]
    filtered_df2 = df2[df2['NIS'].isin(nis_in_both)]

    # Sort both DataFrames by `NIS` to align rows
    filtered_df1 = filtered_df1.sort_values(by='NIS').reset_index(drop=True)
    filtered_df2 = filtered_df2.sort_values(by='NIS').reset_index(drop=True)
    
    # Compare "Student Name" with "Nama"
    matches = 0
    no_matches = 0
    no_match_rows = []
    for index, (nis, student_name, nama) in enumerate(zip(filtered_df1['NIS'], filtered_df1['Student Name'], filtered_df2['Nama'])):
        # if str(student_name).lower() == str(nama).lower():
        #     matches += 1
        if nis == nis:
            matches += 1
        else:
            no_matches += 1
            no_match_rows.append((nis, student_name, nama))

    # Print the match results for the current sheet
    print(f"Matches: {matches}")
    print(f"No Matches: {no_matches}")
    
    if no_match_rows:
        print(f"\nRows with no matches in sheet '{sheet_name}':")
        for nis, student_name, nama in no_match_rows:
            print(f"NIS: {nis}, Student Name: '{student_name}', Nama: '{nama}'")
    
    # Concatenate the DataFrames side by side
    merged_df = pd.concat([filtered_df1.reset_index(drop=True), filtered_df2.reset_index(drop=True)], axis=1)

    # Store the result in the dictionary
    merged_sheets[sheet_name] = merged_df

Processing sheet: Critical Thinking
Matches: 977
No Matches: 0
Processing sheet: Creativity-Innovative
Matches: 977
No Matches: 0
Processing sheet: Problem Solving
Matches: 977
No Matches: 0
Processing sheet: Decision Making
Matches: 977
No Matches: 0
Processing sheet: Berkomunikasi
Matches: 977
No Matches: 0
Processing sheet: Leadership
Matches: 977
No Matches: 0
Processing sheet: Initiative
Matches: 977
No Matches: 0
Processing sheet: Learning Performance
Matches: 977
No Matches: 0
Processing sheet: Ethical Communication
Matches: 977
No Matches: 0
Processing sheet: Lifelong Learning
Matches: 977
No Matches: 0


In [10]:
output_file = 'merged_output_per_sheet.xlsx'
with pd.ExcelWriter(output_file) as writer:
    for sheet_name, merged_df in merged_sheets.items():
        merged_df.to_excel(writer, sheet_name=sheet_name, index=False)

print(f"All merged sheets saved to {output_file}")

All merged sheets saved to merged_output_per_sheet.xlsx
