In [None]:
import pandas as pd
import win32com.client

# Load your file and sheet
df = pd.read_excel("Your_File.xlsx")  # <-- Update with your actual file
email_col = "Email"  # <-- Your email column name

# Initialize Outlook session
outlook = win32com.client.Dispatch("Outlook.Application").Session

# Define a function to validate each email
def check_email_validity(email):
    if pd.isna(email) or not isinstance(email, str) or "@" not in email:
        return "Invalid"
    recipient = outlook.CreateRecipient(email.strip())
    return "Valid" if recipient.Resolve() else "Invalid"

# Apply to the dataframe
df["Email Validity"] = df[email_col].apply(check_email_validity)

# Save to new file
output_file = "Email_Validation_With_Status.xlsx"
df.to_excel(output_file, index=False)

print(f"✅ Email validation complete. Output saved to: {output_file}")


In [None]:
# Merge GIRS Current Status
unique_psids = unique_psids.merge(df_girs[["PSID", "Role"]], on="PSID", how="left")
unique_psids.rename(columns={"Role": "GIRS Current Status"}, inplace=True)

# ===== START: Email-based fallback if PSID match fails =====
missing_mask = unique_psids["GIRS Current Status"].isna() | (unique_psids["GIRS Current Status"].str.lower() == "n/a")
fallback_emails = unique_psids.loc[missing_mask, ["PSID", "Selected Email"]].copy()

# Clean df_girs email for match
df_girs["clean_email"] = df_girs["Email"].apply(lambda x: x.strip().lower() if isinstance(x, str) else "")

# Clean fallback_emails too
fallback_emails["Selected Email"] = fallback_emails["Selected Email"].apply(lambda x: x.strip().lower() if isinstance(x, str) else "")

# Merge based on email
email_matches = fallback_emails.merge(df_girs[["clean_email", "Role"]], how="left", left_on="Selected Email", right_on="clean_email")

# Update missing GIRS Current Status
for idx, row in email_matches.iterrows():
    psid = row["PSID"]
    role = row["Role"]
    if pd.notna(role):
        unique_psids.loc[unique_psids["PSID"] == psid, "GIRS Current Status"] = role
# ===== END: Email-based fallback =====
# ===== START: Enhanced update of team status using PSID, fallback to Email =====
for _, row in df_conso.iterrows():
    psid = row["PSID"]
    status = row["Status"]
    email = row["Email"]
    teams = str(row["Tab data"]).split(",")

    for team in teams:
        team = team.strip()
        
        # Update based on PSID match
        updated = unique_psids.loc[unique_psids["PSID"] == psid, team]
        if updated.notna().any():
            unique_psids.loc[unique_psids["PSID"] == psid, team] = status

        # Update based on Email if PSID match didn't work
        fallback_mask = (unique_psids[team] == "N/A") & (unique_psids["Selected Email"] == email)
        unique_psids.loc[fallback_mask, team] = status
# ===== END: Enhanced update of team status using PSID, fallback to Email =====


In [None]:
import pandas as pd

# Load the Excel file
file_path = "your_file.xlsx"
xls = pd.ExcelFile(file_path)

# Read the Processed Data sheet
df_processed = xls.parse("Processed Data")

# Ensure column names are lower case for consistency
df_processed.columns = df_processed.columns.str.lower()

# Define the dynamic columns
dynamic_columns = [col for col in df_processed.columns if col not in ["psid", "resource name", "email", "girs current status", "tier1", "ttrl", "pdmr"]]

# Function to determine the comment based on conditions
def assign_comment(row):
    girs_status = row["girs current status"].strip().lower() if pd.notna(row["girs current status"]) else ""
    tier_check = row["tier1"].lower() == "yes" or row["ttrl"].lower() == "yes" or row["pdmr"].lower() == "yes"
    
    dynamic_values = [row[col].strip().lower() if pd.notna(row[col]) else "n/a" for col in dynamic_columns]
    add_exists = "add" in dynamic_values
    del_exists = "del" in dynamic_values
    cur_exists = "cur" in dynamic_values
    only_na_cur_del = all(val in ["n/a", "cur", "del"] for val in dynamic_values)
    only_na_cur_add = all(val in ["n/a", "cur", "add"] for val in dynamic_values)
    only_na = all(val == "n/a" for val in dynamic_values)

    if girs_status == "inactive insider" or girs_status == "" and tier_check:
        return "Part of Tier1, TTRL, PDMR"
    elif girs_status == "non-core" and tier_check:
        return "Change to Inactive"
    elif girs_status == "" and not tier_check and add_exists and only_na_cur_del:
        return "To Add"
    elif girs_status == "inactive insider" and not tier_check and del_exists and only_na_cur_add:
        return "Already Inactive"
    elif girs_status == "non-core" and not tier_check and add_exists and only_na_cur_del:
        return "Already Non-Core"
    elif girs_status == "inactive insider" and not tier_check and del_exists and only_na_cur_add:
        return "No Action"
    elif girs_status == "non-core" and not tier_check and (cur_exists or add_exists) and only_na:
        return "No Action"
    elif not tier_check and girs_status in ["", "non-core"] and del_exists and only_na:
        return "To Delete"
    elif not tier_check and girs_status in ["", "non-core"] and (cur_exists or add_exists) and only_na_cur_del:
        return "To Retain Non-Core"
    else:
        return "Uncategorized"

# Apply the function to create the Comment column
df_processed["comment"] = df_processed.apply(assign_comment, axis=1)

# Write the updated Processed Data sheet
with pd.ExcelWriter("your_updated_file.xlsx", engine="xlsxwriter") as writer:
    df_processed.to_excel(writer, sheet_name="Processed Data", index=False)
    
    # Create separate sheets for each unique comment
    for comment in df_processed["comment"].unique():
        df_filtered = df_processed[df_processed["comment"] == comment]
        df_filtered.to_excel(writer, sheet_name=comment[:31], index=False)  # Sheet names must be <= 31 chars

print("Updated file saved with new comments and categorized sheets.")

In [None]:
# Updated Code for Processing Data with Comments and Categorization

import pandas as pd

# Load data (assuming dfs are already loaded)
# processed_data = ...

# Ensure dynamic columns are in lowercase for consistency
dynamic_cols = [col.lower() for col in processed_data.columns if col not in ["PSID", "Resource Name", "Email", "Tier1", "TTRL", "PDMR", "GIRS Current Status"]]

# Standardize "N/A" values in dynamic columns
processed_data[dynamic_cols] = processed_data[dynamic_cols].replace("", "N/A")

# Define function to classify records
def classify_records(row):
    girs_status = row["GIRS Current Status"].strip().lower() if pd.notna(row["GIRS Current Status"]) else ""
    tier_check = row[["Tier1", "TTRL", "PDMR"]].eq("Yes").any()
    tier_all_no = row[["Tier1", "TTRL", "PDMR"]].eq("No").all()
    
    add_exists = (row[dynamic_cols] == "add").any()
    del_exists = (row[dynamic_cols] == "del").any()
    cur_exists = (row[dynamic_cols] == "cur").any()
    only_na_cur_del = row[dynamic_cols].apply(lambda x: x in ["N/A", "cur", "del"]).all()
    only_na_cur = row[dynamic_cols].apply(lambda x: x in ["N/A", "cur"]).all()
    only_na = row[dynamic_cols].eq("N/A").all()
    
    if (girs_status == "inactive insider" or girs_status == "") and tier_check:
        return "Part of Tier1, TTRL, PDMR"
    elif girs_status == "non-core" and tier_check:
        return "Change to Inactive"
    elif girs_status == "" and tier_all_no and add_exists and only_na_cur_del:
        return "To Add"
    elif girs_status == "inactive insider" and tier_all_no and del_exists and only_na_cur:
        return "Already Inactive"
    elif girs_status == "non-core" and tier_all_no and add_exists and only_na_cur_del:
        return "Already Non-Core"
    elif girs_status == "non-core" and tier_all_no and del_exists and only_na:
        return "Change to Non-Core"
    elif girs_status == "inactive insider" and tier_all_no and del_exists and only_na_cur:
        return "No Action"
    elif girs_status == "non-core" and tier_all_no and (cur_exists or add_exists) and only_na:
        return "No Action"
    elif tier_all_no and (girs_status == "" or girs_status == "non-core") and del_exists and only_na:
        return "To Delete"
    elif tier_all_no and (girs_status == "" or girs_status == "non-core") and (cur_exists or add_exists) and only_na_cur_del:
        return "To Retain Non-Core"
    return "Uncategorized"

# Apply classification
processed_data["Comment"] = processed_data.apply(classify_records, axis=1)

# Save categorized data into separate sheets
categorized_sheets = processed_data.groupby("Comment")

with pd.ExcelWriter("Processed_Data_Updated.xlsx", engine="openpyxl") as writer:
    processed_data.to_excel(writer, sheet_name="Processed Data", index=False)
    for comment, group in categorized_sheets:
        group.to_excel(writer, sheet_name=comment[:31], index=False)  # Excel sheet names max 31 chars


In [None]:
import pandas as pd
import re

# Load Excel file
file_path = "GIRS2.xlsx"
xl = pd.ExcelFile(file_path)

# Load necessary sheets
df_conso = xl.parse("Conso Data")
df_girs = xl.parse("GIRS")
df_tier = xl.parse("Tier_Data")
df_ttrl = xl.parse("TTRL_Data")
df_pdmr = xl.parse("PDMR_Data", header=None, names=["Resource Name"])

# Remove extra spaces & standardize case
def clean_dataframe(df):
    df.columns = df.columns.str.strip()
    return df.applymap(lambda x: x.strip().lower() if isinstance(x, str) else x)

df_conso = clean_dataframe(df_conso)
df_girs = clean_dataframe(df_girs)
df_tier = clean_dataframe(df_tier)
df_ttrl = clean_dataframe(df_ttrl)
df_pdmr = clean_dataframe(df_pdmr)

# Standardize PSIDs by removing leading zeros
def standardize_psid(psid):
    return str(int(psid)) if str(psid).isdigit() else psid

df_conso["PSID"] = df_conso["PSID"].astype(str).apply(standardize_psid)
df_girs["PSID"] = df_girs["PSID"].astype(str).apply(standardize_psid)
df_tier["PSID"] = df_tier["PSID"].astype(str).apply(standardize_psid)
df_ttrl["PSID"] = df_ttrl["PSID"].astype(str).apply(standardize_psid)

# Extract email from < > if present
def extract_email(email):
    match = re.search(r"<(.*?)>", email)
    return match.group(1) if match else email.split()[0] if "@" in email else ""

# Process unique PSIDs with Resource Name and Email
def process_psid_group(group):
    resource_names = [name.strip() for name in group["Resource Name"].dropna().unique()]
    email_ids = [extract_email(email.strip()) for email in group["Email"].dropna().unique()]
    psid = group["PSID"].iloc[0]
    
    # Remove resource names containing PSID unless it's the only one
    valid_names = [name for name in resource_names if str(psid) not in name]
    resource_name = max(valid_names, key=lambda x: len(x.split()), default=resource_names[0] if resource_names else "")
    
    # Validate email addresses
    valid_emails = [email for email in email_ids if "@" in email and not any(sym in email for sym in ["/", "#"])]
    email = "; ".join(valid_emails) if valid_emails else ""
    
    selected_email = valid_emails[0] if len(set(valid_emails)) == 1 else "; ".join(valid_emails) if len(valid_emails) > 1 else ""
    
    return pd.Series({"Resource Name": resource_name, "Email": email, "All Emails": "; ".join(email_ids), "Selected Email": selected_email})

# Filter valid PSIDs (numeric only)
df_valid_psid = df_conso[df_conso["PSID"].str.isnumeric()].copy()
df_invalid_psid = df_conso[~df_conso["PSID"].str.isnumeric()].copy()
# df_invalid_psid.insert(0, "PSID", "Invalid")

unique_psids = df_valid_psid.groupby("PSID").apply(process_psid_group).reset_index()

# Capture invalid email records
invalid_emails = df_valid_psid[df_valid_psid["PSID"].isin(unique_psids.loc[unique_psids["Email"].eq(""), "PSID"])].copy()

# Capture multiple email discrepancies
multiple_emails = unique_psids[unique_psids["All Emails"].str.contains(";")].copy()
multiple_emails["Selected Email"] = multiple_emails["Selected Email"]

# Merge GIRS Current Status
unique_psids = unique_psids.merge(df_girs[["PSID", "Role"]], on="PSID", how="left")
unique_psids.rename(columns={"Role": "GIRS Current Status"}, inplace=True)

# Merge Tier1 & TTRL status
unique_psids["Tier1"] = unique_psids["PSID"].isin(df_tier["PSID"]).map({True: "Yes", False: "No"})
unique_psids["TTRL"] = unique_psids["PSID"].isin(df_ttrl["PSID"]).map({True: "Yes", False: "No"})

# Merge PDMR status
# unique_psids["PDMR"] = unique_psids["Resource Name"].isin(df_pdmr["Resource Name"]).map({True: "Yes", False: "No"})
df_invalid_psid["PDMR"]="Resource Name is missing."
# Extract unique team names from Conso Data Tab data column
df_conso["Tab data"] = df_conso["Tab data"].astype(str)
unique_teams = df_conso["Tab data"].str.split(",").explode().str.strip().unique()

# Initialize team columns with N/A
for team in unique_teams:
    unique_psids[team] = "N/A"

# Update team status based on Conso Data
for _, row in df_conso.iterrows():
    psid = row["PSID"]
    status = row["Status"]
    teams = str(row["Tab data"]).split(",")
    for team in teams:
        team = team.strip()
        unique_psids.loc[unique_psids["PSID"] == psid, team] = status

# Save the final processed data
with pd.ExcelWriter(file_path, mode='a', if_sheet_exists='replace', engine='openpyxl') as writer:
    unique_psids.to_excel(writer, sheet_name="Processed Data", index=False)
    invalid_emails.to_excel(writer, sheet_name="Invalid Emails", index=False)
    df_invalid_psid.to_excel(writer, sheet_name="Invalid PSIDs", index=False)
    multiple_emails.to_excel(writer, sheet_name="Multiple Emails", index=False)
    df_conso.to_excel(writer, sheet_name="Original Data", index=False)


In [None]:
import pandas as pd
import re

# Load Excel file
file_path = "GIRS2.xlsx"
xl = pd.ExcelFile(file_path)

# Load necessary sheets
df_conso = xl.parse("Conso Data")
df_girs = xl.parse("GIRS")
df_tier = xl.parse("Tier_Data")
df_ttrl = xl.parse("TTRL_Data")
df_pdmr = xl.parse("PDMR_Data", header=None, names=["Resource Name"])

# Remove extra spaces & standardize case
def clean_dataframe(df):
    df.columns = df.columns.str.strip()
    return df.applymap(lambda x: x.strip().lower() if isinstance(x, str) else x)

df_conso = clean_dataframe(df_conso)
df_girs = clean_dataframe(df_girs)
df_tier = clean_dataframe(df_tier)
df_ttrl = clean_dataframe(df_ttrl)
df_pdmr = clean_dataframe(df_pdmr)

# Extract email from < > if present
def extract_email(email):
    match = re.search(r"<(.*?)>", email)
    return match.group(1) if match else email

# Process unique PSIDs with Resource Name and Email
def process_psid_group(group):
    resource_names = [name.strip() for name in group["Resource Name"].dropna().unique()]
    email_ids = [extract_email(email.strip()) for email in group["Email"].dropna().unique()]
    psid = group["PSID"].iloc[0]
    
    # Remove resource names containing PSID unless it's the only one
    valid_names = [name for name in resource_names if str(psid) not in name]
    resource_name = max(valid_names, key=lambda x: len(x.split()), default=resource_names[0] if resource_names else "")
    
    # Validate email addresses
    valid_emails = [email for email in email_ids if "@" in email and not any(sym in email for sym in ["/", "#"])]
    email = "; ".join(valid_emails) if valid_emails else ""
    
    selected_email = valid_emails[0] if len(set(valid_emails)) == 1 else ""
    
    return pd.Series({"Resource Name": resource_name, "Email": email, "All Emails": "; ".join(email_ids), "Selected Email": selected_email})

# Filter valid PSIDs (numeric only)
df_conso["PSID"] = df_conso["PSID"].astype(str)
df_valid_psid = df_conso[df_conso["PSID"].str.isnumeric()].copy()
df_invalid_psid = df_conso[~df_conso["PSID"].str.isnumeric()].copy()
df_invalid_psid.insert(0, "PSID", "Invalid")  # Ensure PSID column exists

unique_psids = df_valid_psid.groupby("PSID").apply(process_psid_group).reset_index()

# Capture invalid email records
invalid_emails = df_valid_psid[df_valid_psid["PSID"].isin(unique_psids.loc[unique_psids["Email"].eq(""), "PSID"])].copy()

# Capture multiple email discrepancies
multiple_emails = unique_psids[unique_psids["All Emails"].str.contains(";")].copy()
multiple_emails["Selected Email"] = multiple_emails["Selected Email"]

# Merge GIRS Current Status
unique_psids = unique_psids.merge(df_girs[["PSID", "Role"]], on="PSID", how="left")
unique_psids.rename(columns={"Role": "GIRS Current Status"}, inplace=True)

# Merge Tier1 & TTRL status
unique_psids["Tier1"] = unique_psids["PSID"].isin(df_tier["PSID"]).map({True: "Yes", False: "No"})
unique_psids["TTRL"] = unique_psids["PSID"].isin(df_ttrl["PSID"]).map({True: "Yes", False: "No"})

# Merge PDMR status
unique_psids["PDMR"] = unique_psids["Resource Name"].isin(df_pdmr["Resource Name"]).map({True: "Yes", False: "No"})

# Extract unique team names from Conso Data Tab data column
df_conso["Tab data"] = df_conso["Tab data"].astype(str)
unique_teams = df_conso["Tab data"].str.split(",").explode().str.strip().unique()

# Initialize team columns with N/A
for team in unique_teams:
    unique_psids[team] = "N/A"

# Update team status based on Conso Data
for _, row in df_conso.iterrows():
    psid = row["PSID"]
    status = row["Status"]
    teams = str(row["Tab data"]).split(",")
    for team in teams:
        team = team.strip()
        unique_psids.loc[unique_psids["PSID"] == psid, team] = status

# Save the final processed data
with pd.ExcelWriter(file_path, mode='a', if_sheet_exists='replace', engine='openpyxl') as writer:
    unique_psids.to_excel(writer, sheet_name="Processed Data", index=False)
    invalid_emails.to_excel(writer, sheet_name="Invalid Emails", index=False)
    df_invalid_psid.to_excel(writer, sheet_name="Invalid PSIDs", index=False)
    multiple_emails.to_excel(writer, sheet_name="Multiple Emails", index=False)
    df_conso.to_excel(writer, sheet_name="Original Data", index=False)


In [None]:
import pandas as pd

# Load Excel file
file_path = "GIRS2.xlsx"
xl = pd.ExcelFile(file_path)

# Load necessary sheets
df_conso = xl.parse("Conso Data")
df_girs = xl.parse("GIRS")
df_tier = xl.parse("Tier_Data")
df_ttrl = xl.parse("TTRL_Data")
df_pdmr = xl.parse("PDMR_Data", header=None, names=["Resource Name"])

# Remove extra spaces & standardize case
def clean_dataframe(df):
    df.columns = df.columns.str.strip()
    return df.applymap(lambda x: x.strip().lower() if isinstance(x, str) else x)

df_conso = clean_dataframe(df_conso)
df_girs = clean_dataframe(df_girs)
df_tier = clean_dataframe(df_tier)
df_ttrl = clean_dataframe(df_ttrl)
df_pdmr = clean_dataframe(df_pdmr)

# Process unique PSIDs with Resource Name and Email
def process_psid_group(group):
    resource_names = [name.strip() for name in group["Resource Name"].dropna().unique()]
    email_ids = [email.strip() for email in group["Email"].dropna().unique()]
    psid = group["PSID"].iloc[0]
    
    # Remove resource names containing PSID unless it's the only one
    valid_names = [name for name in resource_names if str(psid) not in name]
    resource_name = max(valid_names, key=lambda x: len(x.split()), default=resource_names[0] if resource_names else "")
    
    # Validate email addresses
    valid_emails = [email for email in email_ids if "@" in email and not any(sym in email for sym in ["/", "#"])]
    email = "; ".join(valid_emails) if valid_emails else (email_ids[0] if email_ids else "")
    
    return pd.Series({"Resource Name": resource_name, "Email": email})

unique_psids = df_conso.groupby("PSID").apply(process_psid_group).reset_index()

# Capture invalid email records
invalid_emails = df_conso[df_conso["PSID"].isin(unique_psids[unique_psids["Email"].eq("")]["PSID"])]

# Merge GIRS Current Status
unique_psids = unique_psids.merge(df_girs[["PSID", "Role"]], on="PSID", how="left")
unique_psids.rename(columns={"Role": "GIRS Current Status"}, inplace=True)

# Merge Tier1 & TTRL status
unique_psids["Tier1"] = unique_psids["PSID"].isin(df_tier["PSID"]).map({True: "Yes", False: "No"})
unique_psids["TTRL"] = unique_psids["PSID"].isin(df_ttrl["PSID"]).map({True: "Yes", False: "No"})

# Merge PDMR status
unique_psids["PDMR"] = unique_psids["Resource Name"].isin(df_pdmr["Resource Name"]).map({True: "Yes", False: "No"})

# Extract unique team names from Conso Data Tab data column
df_conso["Tab data"] = df_conso["Tab data"].astype(str)  # Convert to string to avoid errors
unique_teams = df_conso["Tab data"].str.split(",").explode().str.strip().unique()

# Initialize team columns with N/A
for team in unique_teams:
    unique_psids[team] = "N/A"

# Update team status based on Conso Data
for _, row in df_conso.iterrows():
    psid = row["PSID"]
    status = row["Status"]
    teams = str(row["Tab data"]).split(",")  # Convert to string before splitting
    for team in teams:
        team = team.strip()
        unique_psids.loc[unique_psids["PSID"] == psid, team] = status

# Handle records with missing or non-numeric PSIDs
invalid_psid_records = df_conso[df_conso["PSID"].isna() | ~df_conso["PSID"].astype(str).str.isnumeric()]

# Save the final processed data
with pd.ExcelWriter(file_path, mode='a', if_sheet_exists='replace', engine='openpyxl') as writer:
    unique_psids.to_excel(writer, sheet_name="Processed Data", index=False)
    invalid_emails.to_excel(writer, sheet_name="Invalid Emails", index=False)
    invalid_psid_records.to_excel(writer, sheet_name="Invalid PSIDs", index=False)


In [None]:
import pandas as pd

# Load Excel file
file_path = "your_excel_file.xlsx"
xl = pd.ExcelFile(file_path)

# Load necessary sheets
df_conso = xl.parse("Conso Data")
df_girs = xl.parse("GIRS")
df_tier = xl.parse("Tier_Data")
df_ttrl = xl.parse("TTRL_Data")
df_pdmr = xl.parse("PDMR_Data")

# Remove extra spaces & standardize case
def clean_dataframe(df):
    df.columns = df.columns.str.strip()
    return df.applymap(lambda x: x.strip() if isinstance(x, str) else x)

df_conso = clean_dataframe(df_conso)
df_girs = clean_dataframe(df_girs)
df_tier = clean_dataframe(df_tier)
df_ttrl = clean_dataframe(df_ttrl)
df_pdmr = clean_dataframe(df_pdmr)

# Extract unique PSIDs
unique_psids = df_conso[["PSID", "Resource Name", "Email"]].drop_duplicates()

# Merge GIRS Current Status
unique_psids = unique_psids.merge(df_girs[["PSID", "Role"]], on="PSID", how="left")
unique_psids.rename(columns={"Role": "GIRS Current Status"}, inplace=True)

# Merge Tier1 & TTRL status
unique_psids["Tier1"] = unique_psids["PSID"].isin(df_tier["PSID"]).map({True: "Yes", False: "No"})
unique_psids["TTRL"] = unique_psids["PSID"].isin(df_ttrl["PSID"]).map({True: "Yes", False: "No"})

# Merge PDMR status
unique_psids["PDMR"] = unique_psids["Resource Name"].isin(df_pdmr["Resource Name"]).map({True: "Yes", False: "No"})

# Extract unique team names from Conso Data Tab Data column
unique_teams = df_conso["Tab Data"].str.split(",").explode().str.strip().unique()

# Initialize team columns with N/A
for team in unique_teams:
    unique_psids[team] = "N/A"

# Update team status based on Conso Data
for _, row in df_conso.iterrows():
    psid = row["PSID"]
    status = row["Status"]
    teams = row["Tab Data"].split(",")
    for team in teams:
        team = team.strip()
        unique_psids.loc[unique_psids["PSID"] == psid, team] = status

# Save the final processed data
with pd.ExcelWriter(file_path, mode='a', if_sheet_exists='replace') as writer:
    unique_psids.to_excel(writer, sheet_name="Processed Data", index=False)


In [None]:
import pandas as pd

# Load Excel file
file_path = "your_file.xlsx"
xls = pd.ExcelFile(file_path)

# Read required sheets
df_conso = pd.read_excel(xls, sheet_name='Conso Data')
df_girs = pd.read_excel(xls, sheet_name="GIRS Extract_20 Mar'25")
df_tier1 = pd.read_excel(xls, sheet_name="TIER1_5 Mar'25")
df_ttrl = pd.read_excel(xls, sheet_name="TTRL_3 Mar'25")
df_pdmr = pd.read_excel(xls, sheet_name="PDMR_1Jan'25", header=None, names=['PDMR'])

# Data Cleaning & Standardization
df_conso['PSID'] = df_conso['PSID'].astype(str).str.lstrip('0')
df_conso['Resource name'] = df_conso['Resource name'].str.title()
df_pdmr['PDMR'] = df_pdmr['PDMR'].str.lower()

df_girs['Employee ID'] = df_girs['Employee ID'].astype(str).str.lstrip('0')
df_girs['Insider Name'] = df_girs['Insider Name'].str.title()

df_tier1['Employee ID'] = df_tier1['Employee ID'].astype(str).str.lstrip('0')
df_tier1['Insider Name'] = df_tier1['Insider Name'].str.title()

df_ttrl['Employee ID'] = df_ttrl['Employee ID'].astype(str).str.lstrip('0')
df_ttrl['Insider Name'] = df_ttrl['Insider Name'].str.title()

# Merge Conso Data with GIRS Extract
df_merged = df_conso.merge(df_girs, left_on='PSID', right_on='Employee ID', how='left')

# Identify status updates
final_records = []
for _, row in df_merged.iterrows():
    psid = row['PSID']
    resource_name = row['Resource name']
    email = row['Email']
    original_status = row['Role'] if pd.notna(row['Role']) else 'Not in GIRS'
    updated_status = row['Status']
    comments = []
    
    if row['TIER1'] == 'Yes' or row['TTRL'] == 'Yes' or resource_name.lower() in df_pdmr['PDMR'].values:
        comments.append("Employee in TIER1/TTRL/PDMR")
    
    if original_status == 'Non-Core' and updated_status == 'DEL':
        updated_status = 'Inactive Insider'
        comments.append("Non-Core user marked for deletion")
    
    if original_status == 'Inactive Insider' and updated_status == 'ADD':
        updated_status = 'Non-Core'
        comments.append("Inactive Insider reactivated as Non-Core")
    
    final_records.append([psid, resource_name, email, original_status, updated_status, ', '.join(comments)])

# Convert to DataFrame
final_df = pd.DataFrame(final_records, columns=['PSID', 'Resource name', 'Email', 'Original Status', 'Updated Status', 'Comments'])

# Export to Excel
final_df.to_excel("Processed_GIRS_Data.xlsx", index=False)


In [None]:
import pandas as pd

def process_girs_data(file_path):
    # Load required sheets
    conso_data = pd.read_excel(file_path, sheet_name='Conso Data')
    girs_data = pd.read_excel(file_path, sheet_name="GIRS Extract_20 Mar'25")
    tier1_data = pd.read_excel(file_path, sheet_name='TIER1_5 Mar'25')
    ttrl_data = pd.read_excel(file_path, sheet_name='TTRL_3 Mar'25')
    pdmr_data = pd.read_excel(file_path, sheet_name='PDMR_1Jan'25', header=None)  # PDMR has one column
    
    # Standardizing column names
    conso_data.rename(columns={'Resource name': 'Insider Name'}, inplace=True)
    girs_data.rename(columns={'Employee ID': 'PSID', 'Employee Email': 'Email'}, inplace=True)
    tier1_data.rename(columns={'Employee ID': 'PSID', 'Employee Email': 'Email'}, inplace=True)
    ttrl_data.rename(columns={'Employee ID': 'PSID', 'Employee Email': 'Email'}, inplace=True)
    
    # Convert 'PDMR' column to lowercase for matching
    pdmr_data[0] = pdmr_data[0].str.lower()
    
    # Remove leading zeros from PSID
    conso_data['PSID'] = conso_data['PSID'].astype(str).str.lstrip('0')
    girs_data['PSID'] = girs_data['PSID'].astype(str).str.lstrip('0')
    tier1_data['PSID'] = tier1_data['PSID'].astype(str).str.lstrip('0')
    ttrl_data['PSID'] = ttrl_data['PSID'].astype(str).str.lstrip('0')
    
    # Capitalize Insider Names
    conso_data['Insider Name'] = conso_data['Insider Name'].str.upper()
    girs_data['Insider Name'] = girs_data['Insider Name'].str.upper()
    tier1_data['Insider Name'] = tier1_data['Insider Name'].str.upper()
    ttrl_data['Insider Name'] = ttrl_data['Insider Name'].str.upper()
    
    # Merge GIRS Current Status into Conso Data
    merged_data = conso_data.merge(girs_data[['PSID', 'Insider Name', 'Email', 'Role']], 
                                   on=['PSID', 'Insider Name'], how='left')
    merged_data.rename(columns={'Role': 'Original Status in GIRS'}, inplace=True)
    
    # Identify if PSID is present in Tier1, TTRL
    merged_data['Tier1 Flag'] = merged_data['PSID'].isin(tier1_data['PSID']).map({True: 'Yes', False: 'No'})
    merged_data['TTRL Flag'] = merged_data['PSID'].isin(ttrl_data['PSID']).map({True: 'Yes', False: 'No'})
    
    # Identify if Insider Name matches with PDMR data
    merged_data['PDMR Flag'] = merged_data['Insider Name'].str.lower().isin(pdmr_data[0]).map({True: 'Yes', False: 'No'})
    
    # Determine Updated Status Request based on rules
    def determine_updated_status(row):
        if row['Status'] == 'ADD':
            return 'Non-Core' if row['Original Status in GIRS'] == 'Inactive Insider' else 'ADD'
        elif row['Status'] == 'DEL':
            return 'Inactive Insider' if row['Original Status in GIRS'] == 'Non-Core' else 'DEL'
        elif row['Status'] == 'Cur':
            return row['Original Status in GIRS']  # Keep as is
        return row['Status']
    
    merged_data['Updated Status Request'] = merged_data.apply(determine_updated_status, axis=1)
    
    # Identify Request Type (New/Updated Request)
    merged_data['Request Type'] = merged_data['Original Status in GIRS'].apply(
        lambda x: 'New Request' if pd.isna(x) else 'Updated Request'
    )
    
    # Generate Comments Column
    def generate_comment(row):
        comments = []
        if row['Status'] == 'ADD' and row['Updated Status Request'] == 'ADD':
            comments.append('Fresh Addition')
        if row['Status'] == 'DEL' and row['Updated Status Request'] == 'Inactive Insider':
            comments.append('Converted to Inactive Insider')
        if row['Status'] == 'ADD' and row['Updated Status Request'] == 'Non-Core':
            comments.append('Reactivated to Non-Core')
        if row['Tier1 Flag'] == 'Yes':
            comments.append('Present in Tier1')
        if row['TTRL Flag'] == 'Yes':
            comments.append('Present in TTRL')
        if row['PDMR Flag'] == 'Yes':
            comments.append('Listed in PDMR')
        return '; '.join(comments)
    
    merged_data['Comment'] = merged_data.apply(generate_comment, axis=1)
    
    # Select final required columns
    final_columns = ['PSID', 'Insider Name', 'Email', 'Original Status in GIRS',
                     'Updated Status Request', 'Request Type', 'Comment']
    final_data = merged_data[final_columns]
    
    # Save output to a new Excel file
    output_file = 'Final_Consolidated_Data.xlsx'
    final_data.to_excel(output_file, index=False)
    print(f'Final consolidated data saved as {output_file}')
    
    return final_data

# Example usage:
file_path = 'GIRS_Processing.xlsx'  # Update this with your actual file path
final_output = process_girs_data(file_path)
