In [None]:
import pandas as pd
from openpyxl import Workbook, load_workbook

# Read the Excel file
file_path = 'your_excel_file.xlsx'  # Replace with your file path
output_file_path = 'grouped_data_with_subtables.xlsx'  # Replace with your desired output file path
excel_data = pd.ExcelFile(file_path)

# Create a Pandas Excel writer object
with pd.ExcelWriter(output_file_path, engine='openpyxl') as writer:
    for sheet_name in excel_data.sheet_names:
        # Read each sheet into a DataFrame
        df = pd.read_excel(excel_data, sheet_name=sheet_name)

        # Group the data by 'Mapped L3' and 'Role_temp'
        grouped = df.groupby(['Mapped L3', 'Role_temp'])

        # Count the occurrences of 'High' and 'Low' in the 'High/Low' column
        result = grouped['High/Low'].value_counts().unstack(fill_value=0).fillna(0)

        # Add totals for each row ('Mapped L3', 'Role_temp')
        result['Total'] = result.sum(axis=1)

        # Add a totals row
        total_row = result.sum(axis=0).to_frame().T
        total_row.index = pd.MultiIndex.from_tuples([('Total', '')])
        result = pd.concat([result, total_row])

        # Reset the index to get 'Mapped L3' and 'Role_temp' as columns
        result.reset_index(inplace=True)

        # Ensure 'High' and 'Low' are present, if not add them with default 0 values
        if 'High' not in result.columns:
            result['High'] = 0
        if 'Low' not in result.columns:
            result['Low'] = 0
        result = result[['Mapped L3', 'Role_temp', 'High', 'Low', 'Total']]

        # Write the grouped data to the sheet
        result.to_excel(writer, sheet_name=sheet_name, startrow=0, index=False)
        
        # Load the workbook and access the worksheet
        workbook = writer.book
        worksheet = workbook[sheet_name]

        # Initialize start row for writing subtables
        start_row = len(result) + 2  # Move to the next row after grouped data

        # Create subtables for each unique 'Mapped L3'
        unique_mapped_l3 = df['Mapped L3'].unique()
        for mapped_l3 in unique_mapped_l3:
            # Skip 'Total' rows if present
            if mapped_l3 == 'Total':
                continue

            # Filter the original DataFrame based on 'Mapped L3'
            mapped_l3_data = df[df['Mapped L3'] == mapped_l3]

            # Insert a heading for unique 'Mapped L3'
            mapped_l3_heading = f"Unique L3 Value: {mapped_l3}"
            worksheet.cell(row=start_row + 1, column=1, value=mapped_l3_heading)

            # Move to the next row after writing unique L3 heading
            start_row += 2

            # Iterate over each unique 'Role_temp' for the current 'Mapped L3'
            unique_role_temp = mapped_l3_data['Role_temp'].unique()
            for role_temp in unique_role_temp:
                # Filter the data for the current 'Mapped L3' and 'Role_temp'
                subtable_data = mapped_l3_data[mapped_l3_data['Role_temp'] == role_temp]

                # Insert a heading for 'Role_temp'
                role_temp_heading = f"Role Type: {role_temp}"
                worksheet.cell(row=start_row + 1, column=1, value=role_temp_heading)

                # Move to the next row after writing role_temp heading
                start_row += 2

                # Write the subtable data to the sheet
                subtable_data.to_excel(writer, sheet_name=sheet_name, startrow=start_row, index=False, header=True)

                # Move to the next row after writing subtable data
                start_row += len(subtable_data) + 1  # Add 1 line gap after each role_temp subtable

            # Add a 2-line gap after each Mapped L3 section
            start_row += 2

print(f"Grouped data with subtables has been saved to {output_file_path}")


In [None]:
import pandas as pd

# Read the Excel file
file_path = 'your_excel_file.xlsx'  # Replace with your file path
output_file_path = 'grouped_data_with_subtables.xlsx'  # Replace with your desired output file path
excel_data = pd.ExcelFile(file_path)

# Create a Pandas Excel writer object
with pd.ExcelWriter(output_file_path, engine='openpyxl') as writer:
    for sheet_name in excel_data.sheet_names:
        # Read each sheet into a DataFrame
        df = pd.read_excel(excel_data, sheet_name=sheet_name)

        # Group the data by 'Mapped L3' and 'Role_temp'
        grouped = df.groupby(['Mapped L3', 'Role_temp'])

        # Count the occurrences of 'High' and 'Low' in the 'High/Low' column
        result = grouped['High/Low'].value_counts().unstack(fill_value=0).fillna(0)

        # Add totals for each row ('Mapped L3', 'Role_temp')
        result['Total'] = result.sum(axis=1)

        # Add a totals row
        total_row = result.sum(axis=0).to_frame().T
        total_row.index = pd.MultiIndex.from_tuples([('Total', '')])
        result = pd.concat([result, total_row])

        # Reset the index to get 'Mapped L3' and 'Role_temp' as columns
        result.reset_index(inplace=True)

        # Ensure 'High' and 'Low' are present, if not add them with default 0 values
        if 'High' not in result.columns:
            result['High'] = 0
        if 'Low' not in result.columns:
            result['Low'] = 0
        result = result[['Mapped L3', 'Role_temp', 'Total', 'High', 'Low']]

        # Initialize start row for writing grouped data
        start_row = 0

        # Write the grouped data to the sheet
        result.to_excel(writer, sheet_name=sheet_name, startrow=start_row, index=False)
        start_row += len(result) + 2  # Move to the next row after grouped data

        # Create subtables for each unique 'Mapped L3'
        unique_mapped_l3 = df['Mapped L3'].unique()
        for mapped_l3 in unique_mapped_l3:
            # Skip 'Total' rows if present
            if mapped_l3 == 'Total':
                continue

            # Filter the original DataFrame based on 'Mapped L3'
            mapped_l3_data = df[df['Mapped L3'] == mapped_l3]

            # Insert a heading for unique 'Mapped L3'
            mapped_l3_heading = f"Unique L3 Value: {mapped_l3}"
            worksheet = writer.sheets[sheet_name]
            worksheet.cell(row=start_row, column=1, value=mapped_l3_heading)

            # Move to the next row after writing unique L3 heading
            start_row += 2

            # Iterate over each unique 'Role_temp' for the current 'Mapped L3'
            unique_role_temp = mapped_l3_data['Role_temp'].unique()
            for role_temp in unique_role_temp:
                # Filter the data for the current 'Mapped L3' and 'Role_temp'
                subtable_data = mapped_l3_data[mapped_l3_data['Role_temp'] == role_temp]

                # Insert a heading for 'Role_temp'
                role_temp_heading = f"Role Type: {role_temp}"
                worksheet.cell(row=start_row, column=1, value=role_temp_heading)

                # Move to the next row after writing role_temp heading
                start_row += 2

                # Insert an empty row to separate headings from subtable data
                worksheet.cell(row=start_row, column=1, value="")

                # Move to the next row after inserting empty row
                start_row += 1

                # Write the subtable data to the sheet
                subtable_data.to_excel(writer, sheet_name=sheet_name, startrow=start_row, index=False)

                # Move to the next row after writing subtable data
                start_row += len(subtable_data) + 2  # Add extra space after each subtable

print(f"Grouped data with subtables has been saved to {output_file_path}")


In [12]:
import pandas as pd

# Sample data for df1 with 30 columns (only a few shown here for simplicity)
data1 = {
    'colA': [1, 2, 3, 4, 7],
    'colB': [10, 20, 30, 40, 50],
    'colS': [None, None, None, None, None]  # Initially None or some default value
    # Add other columns as needed
}
df1 = pd.DataFrame(data1)

# Sample data for df2 with 45 columns (only a few shown here for simplicity)
data2 = {
    'colR': [3, 3, 4, 5, 6, 6],
    'colS': ['a', 'b', 'c', 'd', 'e', 'f'],
    'colPM': [7, 8, 9, 10, 11, 12]
    # Add other columns as needed
}
df2 = pd.DataFrame(data2)

# Ensure the data types of colA, colR, and colPM are the same
df1['colA'] = df1['colA'].astype(str)
df2['colR'] = df2['colR'].astype(str)
df2['colPM'] = df2['colPM'].astype(str)

# Initial merge using colA and colR
df_merged_initial = pd.merge(df1, df2[['colR', 'colS']], left_on='colA', right_on='colR', how='left')

# Update df1's colS with the values from initial merge
df1['colS'] = df_merged_initial['colS_y']

# Identify rows where colS is still NaN
mask = df1['colS'].isna()

# Perform the second merge using colA and colPM for the NaN rows
df_merged_second = pd.merge(df1[mask], df2[['colPM', 'colS']], left_on='colA', right_on='colPM', how='left')

# Update colS in df1 for the NaN rows with values from the second merge
df1.loc[mask, 'colS'] = df_merged_second['colS_y']

print(df1)


  colA  colB colS
0    1    10  NaN
1    2    20  NaN
2    3    30    a
3    4    40    b
4    7    50    c


In [14]:
import pandas as pd

# Sample data for df1 with 30 columns (only a few shown here for simplicity)
data1 = {
    'colA': [1, 2, 3, 4, 7],
    'colB': [10, 20, 30, 40, 50],
    'colS': [None, '', ' ', '   ', '']  # Different types of empty or blank values
    # Add other columns as needed
}
df1 = pd.DataFrame(data1)

# Sample data for df2 with 45 columns (only a few shown here for simplicity)
data2 = {
    'colR': [3, 3, 4, 5, 6, 6],
    'colS': ['a', 'b', 'c', 'd', 'e', 'f'],
    'colPM': [7, 8, 9, 10, 11, 12]
    # Add other columns as needed
}
df2 = pd.DataFrame(data2)

# Ensure the data types of colA, colR, and colPM are the same
df1['colA'] = df1['colA'].astype(str)
df2['colR'] = df2['colR'].astype(str)
df2['colPM'] = df2['colPM'].astype(str)

# Initial merge using colA and colR
df_merged_initial = pd.merge(df1, df2[['colR', 'colS']], left_on='colA', right_on='colR', how='left')

# Update df1's colS with the values from initial merge
df1['colS'] = df_merged_initial['colS_y']

# Identify rows where colS is empty or blank
mask = df1['colS'].apply(lambda x: x == '' or x.isspace() if isinstance(x, str) else False)

# Perform the second merge using colA and colPM for the empty or blank rows
df_merged_second = pd.merge(df1[mask], df2[['colPM', 'colS']], left_on='colA', right_on='colPM', how='left')

# Update colS in df1 for the empty or blank rows with values from the second merge
df1.loc[mask, 'colS'] = df_merged_second['colS_y']

print(df1)


  colA  colB colS
0    1    10  NaN
1    2    20  NaN
2    3    30    a
3    4    40    b
4    7    50    c


In [6]:
df_merged_pm

Unnamed: 0,colA,colB,colS_x,colPM,colS_y
0,1,10,,,
1,2,20,,,
2,7,50,,7.0,a


In [1]:
import pandas as pd

# Sample data for df1 with 30 columns (only a few shown here for simplicity)
data1 = {
    'colA': [1, 2, 3, 4],
    'colB': [10, 20, 30, 40],
    # Add other columns as needed
}
df1 = pd.DataFrame(data1)

# Sample data for df2 with 45 columns (only a few shown here for simplicity)
data2 = {
    'colR': [3, 3, 4, 5, 6, 6],
    'colS': ['a', 'b', 'c', 'd', 'e', 'f'],
    # Add other columns as needed
}
df2 = pd.DataFrame(data2)

# Ensure the data types of colA and colR are the same
df1['colA'] = df1['colA'].astype(str)
df2['colR'] = df2['colR'].astype(str)

# Select only the required columns from df2
df2_selected = df2[['colR', 'colS']].drop_duplicates(subset='colR')

# Merge df1 with df2_selected on colA and colR
df_merged = pd.merge(df1, df2_selected, left_on='colA', right_on='colR', how='left')

# Assign the values to the new column in df1 and drop any extra columns
df1['colS1'] = df_merged['colS']

# Drop the extra merge column if needed (not strictly necessary, but clean)
df1.drop(columns=['colR'], inplace=True, errors='ignore')

print(df1)


  colA  colB colS1
0    1    10   NaN
1    2    20   NaN
2    3    30     a
3    4    40     c


In [1]:
import pandas as pd

# Sample data for df1
data1 = {'colA': [1, 2, 3, 4]}
df1 = pd.DataFrame(data1)

# Sample data for df2 with non-unique colR values
data2 = {'colR': [3, 3, 4, 5, 6, 6], 'colS': ['a', 'b', 'c', 'd', 'e', 'f']}
df2 = pd.DataFrame(data2)

# Merge df1 with df2 on the condition that df1['colA'] matches df2['colR']
df_merged = pd.merge(df1, df2, left_on='colA', right_on='colR', how='left')

# Select only relevant columns and rename them
df1['colS1'] = df_merged['colS']

print(df1)


   colA colS1
0     1   NaN
1     2   NaN
2     3     a
3     4     b


In [2]:
import pandas as pd

# Sample DataFrames
df1 = pd.DataFrame({
    'id': [1, 2, 3, 4, 5],
    'colA': ['A1', 'A2', 'A3', 'A4', 'A5'],
    'colB': ['B1', 'B2', 'B3', 'B4', 'B5'],
    'colC': ['C1', 'C2', 'C3', 'C4', 'C5']
})

df2 = pd.DataFrame({
    'id': [6, 7, 8],
    'colX': ['X6', 'X7', 'X8'],
    'colY': ['Y6', 'Y7', 'Y8'],
    'colZ': ['Z6', 'Z7', 'Z8']
})

# Columns to copy from df1 and their corresponding columns in df2
columns_to_copy = {
    'id': 'id',
    'colB': 'colY',
    'colC': 'colZ'
}

# Create a new DataFrame with the selected columns from df1
new_rows = df1[list(columns_to_copy.keys())].copy()

# Rename the columns in the new DataFrame to match the column names in df2
new_rows.rename(columns=columns_to_copy, inplace=True)

# Append the new DataFrame to df2
df2 = df2.append(new_rows, ignore_index=True)

# Display the updated df2
print(df2)


   id colX colY colZ
0   6   X6   Y6   Z6
1   7   X7   Y7   Z7
2   8   X8   Y8   Z8
3   1  NaN   B1   C1
4   2  NaN   B2   C2
5   3  NaN   B3   C3
6   4  NaN   B4   C4
7   5  NaN   B5   C5
