### Add column "year", "code", "khtn", "khxh", "khoi_a", "khoi_b", "khoi_c", "khoi_d"

In [1]:
def add_column_code_year_khtn_khxh(self):
    if not self.dataframes:
        print("No data to process.")
        return

    start_year = 2020

    for i, (path, df) in enumerate(self.dataframes):
        print(f"\nProcessing file: {path}")

        # Add column 'code' if not present
        if 'code' not in df.columns:
            if 'sbd' in df.columns:
                try:
                    df['code'] = df['sbd'].astype(str).str[:2].astype(int)
                    print("Added column 'code'")
                except Exception as e:
                    print(f"Error adding 'code': {e}")
            else:
                print("'sbd' column not found.")
        else:
            print("'code' already exists.")

        # Add column 'year' if not present
        if 'year' not in df.columns:
            df['year'] = start_year + i
            print(f"Added column 'year' = {start_year + i}")

        # Add column 'khtn'
        cols_khtn = ['vat_li', 'hoa_hoc', 'sinh_hoc']
        if all(col in df.columns for col in cols_khtn):
            df['khtn'] = df[cols_khtn].sum(axis=1)
            df['khtn'] = df['khtn'].where(df[cols_khtn].notnull().all(axis=1))
            print("Added column 'khtn'")
        else:
            print(f"Missing KHTN columns: {', '.join([col for col in cols_khtn if col not in df.columns])}")

        # Add column 'khxh'
        cols_khxh = ['lich_su', 'dia_li', 'gdcd']
        if all(col in df.columns for col in cols_khxh):
            df['khxh'] = df[cols_khxh].sum(axis=1)
            df['khxh'] = df['khxh'].where(df[cols_khxh].notnull().all(axis=1))
            print("Added column 'khxh'")
        else:
            print(f"Missing KHXH columns: {', '.join([col for col in cols_khxh if col not in df.columns])}")

        # Add column group A = toan + vat_li + hoa_hoc
        khoi_a = ['toan', 'vat_li', 'hoa_hoc']
        if all(col in df.columns for col in khoi_a):
            df['khoi_a'] = df[khoi_a].sum(axis=1)
            df['khoi_a'] = df['khoi_a'].where(df[khoi_a].notnull().all(axis=1))
            print("Added column 'khoi_a'")
        else:
            print(f"Missing Khối A columns: {', '.join([col for col in khoi_a if col not in df.columns])}")

        # Add group B = toan + hoa_hoc + sinh_hoc
        khoi_b = ['toan', 'hoa_hoc', 'sinh_hoc']
        if all(col in df.columns for col in khoi_b):
            df['khoi_b'] = df[khoi_b].sum(axis=1)
            df['khoi_b'] = df['khoi_b'].where(df[khoi_b].notnull().all(axis=1))
            print("Added column 'khoi_b'")
        else:
            print(f"Missing Khối B columns: {', '.join([col for col in khoi_b if col not in df.columns])}")

        # Add group C = ngu_van + lich_su + dia_li
        khoi_c = ['ngu_van', 'lich_su', 'dia_li']
        if all(col in df.columns for col in khoi_c):
            df['khoi_c'] = df[khoi_c].sum(axis=1)
            df['khoi_c'] = df['khoi_c'].where(df[khoi_c].notnull().all(axis=1))
            print("Added column 'khoi_c'")
        else:
            print(f"Missing Khối C columns: {', '.join([col for col in khoi_c if col not in df.columns])}")

        # Add group D = toan + ngu_van + ngoai_ngu
        khoi_d = ['toan', 'ngu_van', 'ngoai_ngu']
        if all(col in df.columns for col in khoi_d):
            df['khoi_d'] = df[khoi_d].sum(axis=1)
            df['khoi_d'] = df['khoi_d'].where(df[khoi_d].notnull().all(axis=1))
            print("Added column 'khoi_d'")
        else:
            print(f"Missing Khối D columns: {', '.join([col for col in khoi_d if col not in df.columns])}")

        # Update dataframe back to list
        self.dataframes[i] = (path, df)



def check_data_column_year(self):
    if not self.dataframes:
        print("No data to process")
        return
    for path, df in self.dataframes:
        print(df["year"].unique())

def reorder_all_columns(self):
        if not self.dataframes:
            print("No data to process.")
            return