In [None]:
import pandas as pd
import re

# Function to extract Staff ID using regular expression
def extract_staff_id(action_owner):
    if isinstance(action_owner, str):
        match = re.search(r'\((\d{8})\)', action_owner)
        return match.group(1) if match else None
    return None

# Load the Excel file into a DataFrame
df = pd.read_excel('path_to_your_file.xlsx')

# Ensure "Action Owner" column is treated as string
df['Action Owner'] = df['Action Owner'].astype(str)

# Filter the DataFrame for rows where 'Issue Status' is 'open'
open_issues_df = df[df['Issue Status'] == 'open']

# Extract Staff ID from "Action Owner" column for filtered rows
open_issues_df['Staff ID'] = open_issues_df['Action Owner'].apply(extract_staff_id)

# Add the 'Record Type' column with value 'Issue'
open_issues_df['Record Type'] = 'Issue'

# Create the final DataFrame with the required columns
result_df = open_issues_df[['Staff ID', 'Record Type', 'Issue ID', 'Issue Title', 'Issue Description']]
result_df.rename(columns={
    'Issue ID': 'Record ID',
    'Issue Title': 'Record Name',
    'Issue Description': 'Record Details'
}, inplace=True)

# Display the final DataFrame
print(result_df)


In [None]:
import pandas as pd

# Read the Excel files into DataFrames
vNew = pd.read_excel('path_to_vNew.xlsx')
vOld = pd.read_excel('path_to_vOld.xlsx')

# Select relevant columns from vNew and vOld
vNew = vNew[['Employee ID', 'Position Number', 'Level4']]
vOld = vOld[['Employee ID', 'Position Number', 'Level4']]

# Merge vNew and vOld on Employee ID
merged_df = pd.merge(vNew, vOld, on='Employee ID', suffixes=('_new', '_old'), how='left')

# Rename columns to match the required format
merged_df.rename(columns={
    'Position Number_new': 'new Position Number',
    'Level4_new': 'new Level4',
    'Position Number_old': 'old Position Number',
    'Level4_old': 'old Level4'
}, inplace=True)

# Create the "Position Changed" column
merged_df['Position Changed'] = merged_df.apply(
    lambda row: 'New' if pd.isna(row['old Position Number']) or pd.isna(row['old Level4']) else 'No Change', axis=1)

# Fetch records from vOld where Employee ID is not in vNew
not_in_vNew = vOld[~vOld['Employee ID'].isin(vNew['Employee ID'])]

# Rename columns to match the format of merged_df
not_in_vNew.rename(columns={
    'Position Number': 'old Position Number',
    'Level4': 'old Level4'
}, inplace=True)

# Add new columns with default values for records not in vNew
not_in_vNew['new Position Number'] = None
not_in_vNew['new Level4'] = None
not_in_vNew['Position Changed'] = 'Left'

# Reorder columns to match the merged_df structure
not_in_vNew = not_in_vNew[['Employee ID', 'new Position Number', 'new Level4', 'old Position Number', 'old Level4', 'Position Changed']]

# Concatenate the two DataFrames
final_df = pd.concat([merged_df, not_in_vNew], ignore_index=True)

# Display the final DataFrame
print(final_df)


# Display the final DataFrame
print(merged_df)


In [None]:
import pandas as pd

# Read the Excel files into DataFrames
vNew = pd.read_excel('path_to_vNew.xlsx')
vOld = pd.read_excel('path_to_vOld.xlsx')

# Select relevant columns from vNew and vOld
vNew = vNew[['Employee ID', 'Position Number', 'Level4']]
vOld = vOld[['Employee ID', 'Position Number', 'Level4']]

# Merge vNew and vOld on Employee ID
merged_df = pd.merge(vNew, vOld, on='Employee ID', suffixes=('_new', '_old'), how='left')

# Rename columns to match the required format
merged_df.rename(columns={'Position Number_new': 'new Position Number',
                          'Level4_new': 'new Level4',
                          'Position Number_old': 'old Position Number',
                          'Level4_old': 'old Level4'}, inplace=True)

# Display the final DataFrame
print(merged_df)


In [None]:
import pandas as pd

# Read the Excel files into DataFrames
vNew = pd.read_excel('path_to_vNew.xlsx')
vOld = pd.read_excel('path_to_vOld.xlsx')

# Ensure column names are properly stripped of leading/trailing spaces
vNew.columns = vNew.columns.str.strip()
vOld.columns = vOld.columns.str.strip()

# Initialize new columns in vOld with default value 'left'
vOld['Pos Check'] = 'left'
vOld['BF4 Check'] = 'left'
vOld['Country Check'] = 'left'

# Perform the lookup and update the values for matching Employee Ids
vOld.loc[vOld['Employee ID'].isin(vNew['Employee ID']), 'Pos Check'] = vOld.loc[vOld['Employee ID'].isin(vNew['Employee ID']), 'Employee ID'].map(vNew.set_index('Employee ID')['Position Number'])
vOld.loc[vOld['Employee ID'].isin(vNew['Employee ID']), 'BF4 Check'] = vOld.loc[vOld['Employee ID'].isin(vNew['Employee ID']), 'Employee ID'].map(vNew.set_index('Employee ID')['BF Level 4 Name'])
vOld.loc[vOld['Employee ID'].isin(vNew['Employee ID']), 'Country Check'] = vOld.loc[vOld['Employee ID'].isin(vNew['Employee ID']), 'Employee ID'].map(vNew.set_index('Employee ID')['Work Location Country/Territory Name'])

# Create the 'In Scope' column
vOld['In Scope'] = ((vOld['Pos Check'] != vOld['Position Number']) | 
                    (vOld['BF4 Check'] != vOld['BF Level 4 Name']) | 
                    (vOld['Country Check'] != vOld['Work Location Country/Territory Name'])).apply(lambda x: 'Movement' if x else 'No Movement')

# Create 'Position Changed', 'BF Changed', and 'Country Changed' columns
vOld['Position Changed'] = vOld.apply(lambda row: 'Left' if row['Pos Check'] == 'left' else ('Yes' if row['Pos Check'] != row['Position Number'] else 'No'), axis=1)
vOld['BF Changed'] = vOld.apply(lambda row: 'Left' if row['BF4 Check'] == 'left' else ('Yes' if row['BF4 Check'] != row['BF Level 4 Name'] else 'No'), axis=1)
vOld['Country Changed'] = vOld.apply(lambda row: 'Left' if row['Country Check'] == 'left' else ('Yes' if row['Country Check'] != row['Work Location Country/Territory Name'] else 'No'), axis=1)

# Display the updated vOld DataFrame
print(vOld)


In [None]:
import pandas as pd

# Read the Excel files into DataFrames
vNew = pd.read_excel('path_to_vNew.xlsx')
vOld = pd.read_excel('path_to_vOld.xlsx')

# Ensure column names are properly stripped of leading/trailing spaces
vNew.columns = vNew.columns.str.strip()
vOld.columns = vOld.columns.str.strip()

# Initialize new columns in vOld with default value 'left'
vOld['Pos Check'] = 'left'
vOld['BF4 Check'] = 'left'
vOld['Country Check'] = 'left'

# Perform the lookup and update the values for matching Employee Ids
vOld.loc[vOld['Employee ID'].isin(vNew['Employee ID']), 'Pos Check'] = vOld.loc[vOld['Employee ID'].isin(vNew['Employee ID']), 'Employee ID'].map(vNew.set_index('Employee ID')['Position Number'])
vOld.loc[vOld['Employee ID'].isin(vNew['Employee ID']), 'BF4 Check'] = vOld.loc[vOld['Employee ID'].isin(vNew['Employee ID']), 'Employee ID'].map(vNew.set_index('Employee ID')['BF Level 4 Name'])
vOld.loc[vOld['Employee ID'].isin(vNew['Employee ID']), 'Country Check'] = vOld.loc[vOld['Employee ID'].isin(vNew['Employee ID']), 'Employee ID'].map(vNew.set_index('Employee ID')['Work Location Country/Territory Name'])

# Display the updated vOld DataFrame
print(vOld)


In [None]:
import pandas as pd

# Read the new and old data from the Excel workbooks
vNew = pd.read_excel('path_to_vNew.xlsx')
vOld = pd.read_excel('path_to_vOld.xlsx')

# Select the required columns
vNew = vNew[['Employee ID', 'Position Number', 'Level4']]
vOld = vOld[['Employee ID', 'Position Number', 'Level4']]

# Merge the dataframes on Employee ID
merged_df = pd.merge(vNew, vOld, on='Employee ID', how='outer', suffixes=('_new', '_old'))

# Create the Status column
merged_df['Status'] = merged_df.apply(
    lambda row: 'new' if pd.isna(row['Position Number_old']) and not pd.isna(row['Position Number_new']) else 
                ('left' if pd.isna(row['Position Number_new']) and not pd.isna(row['Position Number_old']) else 
                'existing'), axis=1)

# Save the resulting dataframe to a new Excel file
merged_df.to_excel('path_to_output.xlsx', index=False)


In [None]:
import pandas as pd
from openpyxl import Workbook, load_workbook

# Read the Excel file
file_path = 'your_excel_file.xlsx'  # Replace with your file path
output_file_path = 'grouped_data_with_subtables.xlsx'  # Replace with your desired output file path
excel_data = pd.ExcelFile(file_path)

# Create a Pandas Excel writer object
with pd.ExcelWriter(output_file_path, engine='openpyxl') as writer:
    for sheet_name in excel_data.sheet_names:
        # Read each sheet into a DataFrame
        df = pd.read_excel(excel_data, sheet_name=sheet_name)

        # Group the data by 'Mapped L3' and 'Role_temp'
        grouped = df.groupby(['Mapped L3', 'Role_temp'])

        # Count the occurrences of 'High' and 'Low' in the 'High/Low' column
        result = grouped['High/Low'].value_counts().unstack(fill_value=0).fillna(0)

        # Add totals for each row ('Mapped L3', 'Role_temp')
        result['Total'] = result.sum(axis=1)

        # Add a totals row
        total_row = result.sum(axis=0).to_frame().T
        total_row.index = pd.MultiIndex.from_tuples([('Total', '')])
        result = pd.concat([result, total_row])

        # Reset the index to get 'Mapped L3' and 'Role_temp' as columns
        result.reset_index(inplace=True)

        # Ensure 'High' and 'Low' are present, if not add them with default 0 values
        if 'High' not in result.columns:
            result['High'] = 0
        if 'Low' not in result.columns:
            result['Low'] = 0
        result = result[['Mapped L3', 'Role_temp', 'High', 'Low', 'Total']]

        # Write the grouped data to the sheet
        result.to_excel(writer, sheet_name=sheet_name, startrow=0, index=False)
        
        # Load the workbook and access the worksheet
        workbook = writer.book
        worksheet = workbook[sheet_name]

        # Initialize start row for writing subtables
        start_row = len(result) + 2  # Move to the next row after grouped data

        # Create a list to store all subtables
        subtables_list = []

        # Create subtables for each unique 'Mapped L3'
        unique_mapped_l3 = df['Mapped L3'].unique()
        for mapped_l3 in unique_mapped_l3:
            # Skip 'Total' rows if present
            if mapped_l3 == 'Total':
                continue

            # Filter the original DataFrame based on 'Mapped L3'
            mapped_l3_data = df[df['Mapped L3'] == mapped_l3]

            # Insert a heading for unique 'Mapped L3'
            mapped_l3_heading = f"Unique L3 Value: {mapped_l3}"
            subtables_list.append((mapped_l3_heading, None))  # Append heading to list

            # Iterate over each unique 'Role_temp' for the current 'Mapped L3'
            unique_role_temp = mapped_l3_data['Role_temp'].unique()
            for role_temp in unique_role_temp:
                # Filter the data for the current 'Mapped L3' and 'Role_temp'
                subtable_data = mapped_l3_data[mapped_l3_data['Role_temp'] == role_temp]

                # Insert a heading for 'Role_temp'
                role_temp_heading = f"Role Type: {role_temp}"
                subtables_list.append((role_temp_heading, None))  # Append heading to list

                # Append the subtable data to the list
                subtables_list.append((None, subtable_data))

        # Write subtables to Excel with proper gaps
        for item in subtables_list:
            if item[0]:  # If it's a heading
                worksheet.cell(row=start_row, column=1, value=item[0])
                start_row += 2  # 2-line gap before next heading or subtable
            elif item[1] is not None:  # If it's subtable data
                item[1].to_excel(writer, sheet_name=sheet_name, startrow=start_row, index=False, header=True)
                start_row += len(item[1]) + 1  # 1-line gap after subtable

print(f"Grouped data with subtables has been saved to {output_file_path}")


In [None]:
import pandas as pd

# Read the Excel file
file_path = 'your_excel_file.xlsx'  # Replace with your file path
output_file_path = 'grouped_data_with_subtables.xlsx'  # Replace with your desired output file path
excel_data = pd.ExcelFile(file_path)

# Create a Pandas Excel writer object
with pd.ExcelWriter(output_file_path, engine='openpyxl') as writer:
    for sheet_name in excel_data.sheet_names:
        # Read each sheet into a DataFrame
        df = pd.read_excel(excel_data, sheet_name=sheet_name)

        # Group the data by 'Mapped L3' and 'Role_temp'
        grouped = df.groupby(['Mapped L3', 'Role_temp'])

        # Count the occurrences of 'High' and 'Low' in the 'High/Low' column
        result = grouped['High/Low'].value_counts().unstack(fill_value=0).fillna(0)

        # Add totals for each row ('Mapped L3', 'Role_temp')
        result['Total'] = result.sum(axis=1)

        # Add a totals row
        total_row = result.sum(axis=0).to_frame().T
        total_row.index = pd.MultiIndex.from_tuples([('Total', '')])
        result = pd.concat([result, total_row])

        # Reset the index to get 'Mapped L3' and 'Role_temp' as columns
        result.reset_index(inplace=True)

        # Ensure 'High' and 'Low' are present, if not add them with default 0 values
        if 'High' not in result.columns:
            result['High'] = 0
        if 'Low' not in result.columns:
            result['Low'] = 0
        result = result[['Mapped L3', 'Role_temp', 'Total', 'High', 'Low']]

        # Initialize start row for writing grouped data
        start_row = 0

        # Write the grouped data to the sheet
        result.to_excel(writer, sheet_name=sheet_name, startrow=start_row, index=False)
        start_row += len(result) + 2  # Move to the next row after grouped data

        # Create subtables for each unique 'Mapped L3'
        unique_mapped_l3 = df['Mapped L3'].unique()
        for mapped_l3 in unique_mapped_l3:
            # Skip 'Total' rows if present
            if mapped_l3 == 'Total':
                continue

            # Filter the original DataFrame based on 'Mapped L3'
            mapped_l3_data = df[df['Mapped L3'] == mapped_l3]

            # Insert a heading for unique 'Mapped L3'
            mapped_l3_heading = f"Unique L3 Value: {mapped_l3}"
            worksheet = writer.sheets[sheet_name]
            worksheet.cell(row=start_row, column=1, value=mapped_l3_heading)

            # Move to the next row after writing unique L3 heading
            start_row += 2

            # Iterate over each unique 'Role_temp' for the current 'Mapped L3'
            unique_role_temp = mapped_l3_data['Role_temp'].unique()
            for role_temp in unique_role_temp:
                # Filter the data for the current 'Mapped L3' and 'Role_temp'
                subtable_data = mapped_l3_data[mapped_l3_data['Role_temp'] == role_temp]

                # Insert a heading for 'Role_temp'
                role_temp_heading = f"Role Type: {role_temp}"
                worksheet.cell(row=start_row, column=1, value=role_temp_heading)

                # Move to the next row after writing role_temp heading
                start_row += 2

                # Insert an empty row to separate headings from subtable data
                worksheet.cell(row=start_row, column=1, value="")

                # Move to the next row after inserting empty row
                start_row += 1

                # Write the subtable data to the sheet
                subtable_data.to_excel(writer, sheet_name=sheet_name, startrow=start_row, index=False)

                # Move to the next row after writing subtable data
                start_row += len(subtable_data) + 2  # Add extra space after each subtable

print(f"Grouped data with subtables has been saved to {output_file_path}")


In [12]:
import pandas as pd

# Sample data for df1 with 30 columns (only a few shown here for simplicity)
data1 = {
    'colA': [1, 2, 3, 4, 7],
    'colB': [10, 20, 30, 40, 50],
    'colS': [None, None, None, None, None]  # Initially None or some default value
    # Add other columns as needed
}
df1 = pd.DataFrame(data1)

# Sample data for df2 with 45 columns (only a few shown here for simplicity)
data2 = {
    'colR': [3, 3, 4, 5, 6, 6],
    'colS': ['a', 'b', 'c', 'd', 'e', 'f'],
    'colPM': [7, 8, 9, 10, 11, 12]
    # Add other columns as needed
}
df2 = pd.DataFrame(data2)

# Ensure the data types of colA, colR, and colPM are the same
df1['colA'] = df1['colA'].astype(str)
df2['colR'] = df2['colR'].astype(str)
df2['colPM'] = df2['colPM'].astype(str)

# Initial merge using colA and colR
df_merged_initial = pd.merge(df1, df2[['colR', 'colS']], left_on='colA', right_on='colR', how='left')

# Update df1's colS with the values from initial merge
df1['colS'] = df_merged_initial['colS_y']

# Identify rows where colS is still NaN
mask = df1['colS'].isna()

# Perform the second merge using colA and colPM for the NaN rows
df_merged_second = pd.merge(df1[mask], df2[['colPM', 'colS']], left_on='colA', right_on='colPM', how='left')

# Update colS in df1 for the NaN rows with values from the second merge
df1.loc[mask, 'colS'] = df_merged_second['colS_y']

print(df1)


  colA  colB colS
0    1    10  NaN
1    2    20  NaN
2    3    30    a
3    4    40    b
4    7    50    c


In [14]:
import pandas as pd

# Sample data for df1 with 30 columns (only a few shown here for simplicity)
data1 = {
    'colA': [1, 2, 3, 4, 7],
    'colB': [10, 20, 30, 40, 50],
    'colS': [None, '', ' ', '   ', '']  # Different types of empty or blank values
    # Add other columns as needed
}
df1 = pd.DataFrame(data1)

# Sample data for df2 with 45 columns (only a few shown here for simplicity)
data2 = {
    'colR': [3, 3, 4, 5, 6, 6],
    'colS': ['a', 'b', 'c', 'd', 'e', 'f'],
    'colPM': [7, 8, 9, 10, 11, 12]
    # Add other columns as needed
}
df2 = pd.DataFrame(data2)

# Ensure the data types of colA, colR, and colPM are the same
df1['colA'] = df1['colA'].astype(str)
df2['colR'] = df2['colR'].astype(str)
df2['colPM'] = df2['colPM'].astype(str)

# Initial merge using colA and colR
df_merged_initial = pd.merge(df1, df2[['colR', 'colS']], left_on='colA', right_on='colR', how='left')

# Update df1's colS with the values from initial merge
df1['colS'] = df_merged_initial['colS_y']

# Identify rows where colS is empty or blank
mask = df1['colS'].apply(lambda x: x == '' or x.isspace() if isinstance(x, str) else False)

# Perform the second merge using colA and colPM for the empty or blank rows
df_merged_second = pd.merge(df1[mask], df2[['colPM', 'colS']], left_on='colA', right_on='colPM', how='left')

# Update colS in df1 for the empty or blank rows with values from the second merge
df1.loc[mask, 'colS'] = df_merged_second['colS_y']

print(df1)


  colA  colB colS
0    1    10  NaN
1    2    20  NaN
2    3    30    a
3    4    40    b
4    7    50    c


In [6]:
df_merged_pm

Unnamed: 0,colA,colB,colS_x,colPM,colS_y
0,1,10,,,
1,2,20,,,
2,7,50,,7.0,a


In [1]:
import pandas as pd

# Sample data for df1 with 30 columns (only a few shown here for simplicity)
data1 = {
    'colA': [1, 2, 3, 4],
    'colB': [10, 20, 30, 40],
    # Add other columns as needed
}
df1 = pd.DataFrame(data1)

# Sample data for df2 with 45 columns (only a few shown here for simplicity)
data2 = {
    'colR': [3, 3, 4, 5, 6, 6],
    'colS': ['a', 'b', 'c', 'd', 'e', 'f'],
    # Add other columns as needed
}
df2 = pd.DataFrame(data2)

# Ensure the data types of colA and colR are the same
df1['colA'] = df1['colA'].astype(str)
df2['colR'] = df2['colR'].astype(str)

# Select only the required columns from df2
df2_selected = df2[['colR', 'colS']].drop_duplicates(subset='colR')

# Merge df1 with df2_selected on colA and colR
df_merged = pd.merge(df1, df2_selected, left_on='colA', right_on='colR', how='left')

# Assign the values to the new column in df1 and drop any extra columns
df1['colS1'] = df_merged['colS']

# Drop the extra merge column if needed (not strictly necessary, but clean)
df1.drop(columns=['colR'], inplace=True, errors='ignore')

print(df1)


  colA  colB colS1
0    1    10   NaN
1    2    20   NaN
2    3    30     a
3    4    40     c


In [1]:
import pandas as pd

# Sample data for df1
data1 = {'colA': [1, 2, 3, 4]}
df1 = pd.DataFrame(data1)

# Sample data for df2 with non-unique colR values
data2 = {'colR': [3, 3, 4, 5, 6, 6], 'colS': ['a', 'b', 'c', 'd', 'e', 'f']}
df2 = pd.DataFrame(data2)

# Merge df1 with df2 on the condition that df1['colA'] matches df2['colR']
df_merged = pd.merge(df1, df2, left_on='colA', right_on='colR', how='left')

# Select only relevant columns and rename them
df1['colS1'] = df_merged['colS']

print(df1)


   colA colS1
0     1   NaN
1     2   NaN
2     3     a
3     4     b


In [2]:
import pandas as pd

# Sample DataFrames
df1 = pd.DataFrame({
    'id': [1, 2, 3, 4, 5],
    'colA': ['A1', 'A2', 'A3', 'A4', 'A5'],
    'colB': ['B1', 'B2', 'B3', 'B4', 'B5'],
    'colC': ['C1', 'C2', 'C3', 'C4', 'C5']
})

df2 = pd.DataFrame({
    'id': [6, 7, 8],
    'colX': ['X6', 'X7', 'X8'],
    'colY': ['Y6', 'Y7', 'Y8'],
    'colZ': ['Z6', 'Z7', 'Z8']
})

# Columns to copy from df1 and their corresponding columns in df2
columns_to_copy = {
    'id': 'id',
    'colB': 'colY',
    'colC': 'colZ'
}

# Create a new DataFrame with the selected columns from df1
new_rows = df1[list(columns_to_copy.keys())].copy()

# Rename the columns in the new DataFrame to match the column names in df2
new_rows.rename(columns=columns_to_copy, inplace=True)

# Append the new DataFrame to df2
df2 = df2.append(new_rows, ignore_index=True)

# Display the updated df2
print(df2)


   id colX colY colZ
0   6   X6   Y6   Z6
1   7   X7   Y7   Z7
2   8   X8   Y8   Z8
3   1  NaN   B1   C1
4   2  NaN   B2   C2
5   3  NaN   B3   C3
6   4  NaN   B4   C4
7   5  NaN   B5   C5
