In [1]:
import pandas as pd 
import numpy as np

In [2]:
def process_data(dat):

    out_str = ', '.join(list(dat['Name0'].unique()))

    return pd.Series({"Machine_Name": out_str}, index=['Machine_Name'])

def get_user_machine_mappings(system_df):

    # Find the machines for the given day
    user_mach_df = system_df.groupby(['User_Name0', 'RWB_EFFECTIVE_DATE']).apply(process_data)

    # Orient to dictionary
    call_dict = user_mach_df.to_dict(orient='index')

    # Groupby user and machine to get the count of machines 
    count_mach = system_df.groupby(['User_Name0', 'Name0']).size().reset_index().rename(columns={0:"count"})

    # Get the most used machine for a given user
    max_machine = count_mach.loc[count_mach.groupby('User_Name0')['count'].idxmax()][['User_Name0', 'Name0']]

    # Orient the output to dict
    max_mach_dict = max_machine.set_index('User_Name0').to_dict(orient='index')

    return call_dict, max_mach_dict


In [None]:
def find_machine(x, in_dict, max_mach_dict):

    # Return NaN if there is no user id
    if pd.isna(x['calling_user_id']):
        return np.nan
    
    # Try finding the machine for the day
    try: 
        
        user = int(x['calling_user_id'])
        date = x['opened_at_date']

        machine = in_dict[(user, date)]["Machine_Name"]

        # If there are multiple machines, return most used machine
        if ',' in machine:
            try: 
                machine = max_mach_dict[int(x['calling_user_id'])]['Name0']
            except:
                machine = np.nan
    
    # If no machine is present at the time the incident was opened
    # Return the most used machine
    except KeyError:

        try:

            machine = max_mach_dict[int(x['calling_user_id'])]['Name0']

        except:
            machine = np.nan 
    
    return machine 

def find_user_machinename():

    # Read in incident data
    df = pd.read_csv('assets/ServiceNow_Incident.csv')

    # Read in System_DISC
    system_df = pd.read_parquet('assets/Persist_System_DISC.parquet')

    # Get machine user mappings
    call_dict, max_mach_dict = get_user_machine_mappings(system_df)

    # Create a formatted date field using the date time of incident ticket creation
    df['opened_at_date'] = pd.to_datetime(df['opened_at']).dt.strftime('%Y-%m-%d')

    # Find the machine names for each user in the INC data
    df['machine_user_test'] = df.apply(lambda x: find_machine(x, call_dict, max_mach_dict), axis=1)

    # Add machines to configuration_item field
    df['configuration_item'] = df.apply(lambda x: x['machine_user_test'] if pd.isna(x['configuration_item'])
                                        else x['configuration_item'], axis=1 )
    
    # Save result to CSV
    df.to_csv('ServiceNow_Processed_Incidents_v2.csv', index=False)

find_user_machinename()