In [32]:
import pandas as pd
import os

def categorize_positions(input_file, output_file, column_name):
    # Remove surrounding quotes from paths if any
    input_file = input_file.strip('"').strip("'")
    output_file = output_file.strip('"').strip("'")
    
    # Normalize file paths
    input_file = os.path.normpath(input_file)
    output_file = os.path.normpath(output_file)
    
    # Read the Excel file
    df = pd.read_excel(input_file)
    
    # Define the terms for different categories
    repartidor_terms = ['REPA', 'Repa', 'DISTRIBUTO', 'Distributo']
    
    bodega_terms = ['ATUDANTE DE BODE', 'AYUDANTE DE BODE', 'AYUDANTE BODE', 'WAREHOUSE C', 
                    'Atudante de bode', 'Ayudante de bode', 'Ayudanate bode', 'Warehouse c',
                    'Atudande de Bode', 'Ayudante de Bode', 'Ayudande Bode', 'Warehouse C']
    
    bdr_terms = ['BUSINESS DEV', 'BUSINES DEV', 'BUSSINES DEV', 'PREVENTIS', 'SALES BD', 'SALES REPRE', 'SUPERVISOR DE NEG', 'BUSINESS DVE',
                'Business dev', 'Busines dev', 'Bussines dev', 'Preventist', 'Sales bd', 'Sales repre', 'Supervisor de neg', 'Business dve',
                'Business Dev', 'Busines Dev', 'Bussines Dev', 'Sales Bd', 'Sales Repre', 'Supervisor de Neg', 'Business Dve',
                'Sales BD']
    
    vendedor_terms = ['AYUDANTE DE REPAR', 'AYUDANTE DE VENDED', 'DISTRIBUTION AUX', 'DISTRIBUTION TECH',
                     'Ayudante de repar', 'Ayudande de vended', 'Distribution aux', 'Distribution tech',
                     'Ayudante de Repar', 'Ayudante de Vended', 'Distribution Aux', 'Distribution Tech' ]
    montacarguista_terms = ['MONTACARG', 'FORKLIFT DRI', 
                           'Montacarg', 'Forklift dri',
                           'Forklift Dri']

    supervisor_distrib_terms = ['SUPERVISOR DE DISTRIBU', 'SUPERVISOR DISTRIBU', 'SUPERVISOR DE REPAR', 'SUPERVISOR REPAR',
                              'Supervisor de distribu', 'Supervisor distribu', 'Supervisor de repar', 'Supervisor repar',
                              'Supervisor de Distribu', 'Supervisor Distribu', 'Supervisor de Repar', 'Supervisor Repar']

    supervisor_almacen_terms = ['SUPERVISOR DE ALMAC', 'SUPERVISOR ALMAC',
                         'Supervisor de almac', 'Supervisor almac',
                         'Supervisor de Almac', 'Supervisor Almac']

    coordinador_almacen_terms = ['COORDINADOR DE ALMAC', 'COORDINADOR ALMAC',
                          'Coordinador de almac', 'Coordinador almac',
                          'Coordinador de Almac', 'Coordinador Almac']

    coordinador_distribu_terms = ['COORDINADOR DE DISTRIBU', 'COORDINADOR DISTRIBU',
                           'Coordinador de distribu', 'Coordinador distribu',
                           'Coordinador de Distribu', 'Coordinador Distribu']

    delegado_sindical_terms = ['DELEGADO SIN', 'Delegado sin', 'Delegado Sin']

    gerente_operaciones = ['GERENTE DE OPER', 'GERENTE OPER',
                          'Gerente de oper', 'Gerente oper',
                          'Gerente de Oper', 'Gerente Oper']

    jefe_almacen = ['JEFE DE ALMAC', 'JEFE ALMAC',
                   'Jefe de almac', 'Jefe almac',
                   'Jefe de Almac', 'Jefe Almac']

    # Masks
    mask_repartidor = df[column_name].str.startswith(tuple(repartidor_terms), na=False)
    mask_bodega = df[column_name].str.startswith(tuple(bodega_terms), na=False)
    mask_bdr = df[column_name].str.startswith(tuple(bdr_terms), na=False)
    mask_vendedor = df[column_name].str.startswith(tuple(vendedor_terms), na=False)
    mask_montacarguista = df[column_name].str.startswith(tuple(montacarguista_terms), na=False)
    mask_supervisor_distrib = df[column_name].str.startswith(tuple(supervisor_distrib_terms), na=False)
    mask_supervisor_almacen = df[column_name].str.startswith(tuple(supervisor_almacen_terms), na=False)
    mask_coordinador_almacen = df[column_name].str.startswith(tuple(coordinador_almacen_terms), na=False)
    mask_coordinador_distribucion = df[column_name].str.startswith(tuple(coordinador_distribu_terms), na=False)
    mask_delegado_sindical = df[column_name].str.startswith(tuple(delegado_sindical_terms), na=False)
    mask_gerente_operaciones = df[column_name].str.startswith(tuple(gerente_operaciones), na=False)
    mask_jefe_almacen = df[column_name].str.startswith(tuple(jefe_almacen), na=False)

    # Update the column based on the masks
    df.loc[mask_repartidor, column_name] = 'REPARTOS'
    df.loc[mask_bodega, column_name] = 'AYUDANTE DE BODEGA'
    df.loc[mask_bdr, column_name] = 'BUSINESS DEVELOPER REPRESENTATIVE'
    df.loc[mask_vendedor, column_name] = 'AYUDANTE DE VENDEDOR'
    df.loc[mask_montacarguista, column_name] = 'MONTACARGUISTA'
    df.loc[mask_supervisor_distrib, column_name] = 'SUPERVISOR DE DISTRIBUCION'
    df.loc[mask_supervisor_almacen, column_name] = 'SUPERVISOR DE ALMACEN'
    df.loc[mask_coordinador_almacen, column_name] = 'COORDINADOR DE ALMACEN'
    df.loc[mask_coordinador_distribucion, column_name] = 'COORDINADOR DE DISTRIBUCION'
    df.loc[mask_delegado_sindical, column_name] = 'DELEGADO SINDICAL'
    df.loc[mask_gerente_operaciones, column_name] = 'GERENTE DE OPERACIONES'
    df.loc[mask_jefe_almacen, column_name] = 'JEFE DE ALMACEN'

    # Save the updated DataFrame to a new Excel file
    df.to_excel(output_file, index=False)

# Collect user inputs
input_file = input("Enter the path to the input Excel file: ")
output_file = input("Enter the path to the output Excel file: ")
column_name = input("Enter the name of the column to categorize: ")

# Call the function with user inputs
categorize_positions(input_file, output_file, column_name)

Enter the path to the input Excel file:  "C:\Users\alexa\OneDrive\Escritorio\GRUPO MODELO\HC.xlsx"
Enter the path to the output Excel file:  "C:\Users\alexa\OneDrive\Escritorio\GRUPO MODELO\HC_cleaned.xlsx"
Enter the name of the column to categorize:  Position Title
