In [None]:
import pandas as pd

# Sample DataFrame
data = {
    'desk': ['A', 'A', 'B', 'B', 'C', 'C'],
    'order_type': ['X', 'X', 'X', 'X', 'X', 'X'],
    'client': ['Client1', 'Client1', 'Client1', 'Client1', 'Client1', 'Client2'],
    'target': [0, 1, 0, 1, 1, 0]
}

df = pd.DataFrame(data)

# Step 1: Group by 'order_type' and 'client' to identify differences in delay outcomes across desks
grouped = df.groupby(['order_type', 'client']).filter(lambda x: x['target'].nunique() > 1)

# Step 2: Filter those records where there's at least one delay (1) and one no-delay (0) for the same 'order_type' and 'client'
mixed_outcome = grouped.groupby(['order_type', 'client', 'desk']).filter(lambda x: x['target'].nunique() > 0)

# Step 3: Identify which desks are responsible for delays
delay_desks = mixed_outcome[mixed_outcome['target'] == 1]

# Output the results
print("Desks responsible for delays on the same order type and client:")
print(delay_desks[['order_type', 'client', 'desk']].drop_duplicates())


In [None]:
import pandas as pd

# Sample DataFrame
data = {
    'desk': ['A', 'A', 'B', 'B', 'C', 'C'],
    'order_type': ['X', 'X', 'X', 'X', 'X', 'X'],
    'client': ['Client1', 'Client1', 'Client1', 'Client1', 'Client1', 'Client2'],
    'target': [0, 1, 0, 1, 1, 0]
}

df = pd.DataFrame(data)

# Step 1: Group by 'order_type' and 'client' to identify differences in delay outcomes across desks
grouped = df.groupby(['order_type', 'client']).filter(lambda x: x['target'].nunique() > 1)

# Step 2: Separate records with delay (target == 1) and no-delay (target == 0)
delay_desks = grouped[grouped['target'] == 1].copy()
no_delay_desks = grouped[grouped['target'] == 0].copy()

# Step 3: Merge delayed desks with no-delay desks on 'order_type' and 'client'
comparison = pd.merge(delay_desks, no_delay_desks, on=['order_type', 'client'], suffixes=('_delayed', '_not_delayed'))

# Step 4: Output the comparison
print("Comparison of desks with and without delays for the same order type and client:")
print(comparison[['order_type', 'client', 'desk_delayed', 'desk_not_delayed']])


In [None]:
# Count the number of delays caused by each desk
delay_count = df[df['target'] == 1].groupby('desk').size().reset_index(name='delay_count')

# Count the number of no-delays for each desk
no_delay_count = df[df['target'] == 0].groupby('desk').size().reset_index(name='no_delay_count')

# Merge the delay and no-delay counts
desk_stats = pd.merge(delay_count, no_delay_count, on='desk', how='outer').fillna(0)

# Calculate the total number of records for each desk
desk_stats['total_count'] = desk_stats['delay_count'] + desk_stats['no_delay_count']

# Calculate the ratio or percentage of delays to no-delays
desk_stats['delay_to_no_delay_ratio'] = desk_stats['delay_count'] / desk_stats['no_delay_count']

# Handle cases where no_delay_count is 0 (avoid division by zero)
desk_stats['delay_to_no_delay_ratio'] = desk_stats['delay_to_no_delay_ratio'].replace([float('inf'), -float('inf')], np.nan).fillna(0)


# Sort by the ratio or any other statistic you prefer
desk_stats = desk_stats.sort_values(by='delay_to_no_delay_ratio', ascending=False)

print("Desk Statistics:")
print(desk_stats)


In [None]:

# Count the number of delays (target == 1) for each desk, order_type, and client
delay_count = df[df['target'] == 1].groupby(['order_type', 'client', 'desk']).size().reset_index(name='delay_count')

# Count the number of no-delays (target == 0) for each desk, order_type, and client
no_delay_count = df[df['target'] == 0].groupby(['order_type', 'client', 'desk']).size().reset_index(name='no_delay_count')

# Filter desks that have caused delays more than once
delays_more_than_once = delay_count[delay_count['delay_count'] > 1]

# Filter desks that have never caused delays (no_delay_count > 0)
no_delays = no_delay_count[no_delay_count['no_delay_count'] > 0]

# Merge to get desks that meet both criteria
# Merge on order_type and client, ensuring we only include desks with delays > 1 and no delays
filtered = pd.merge(delays_more_than_once, no_delays, on=['order_type', 'client'], suffixes=('', '_no_delay'))


print("Desks that caused delays more than once and have never caused delays:")
print(filtered[['order_type', 'client', 'desk', 'desk_no_delay']])


In [None]:
import pandas as pd

# Sample DataFrame
data = {
    'desk': ['A', 'A', 'B', 'B', 'C', 'C', 'D'],
    'order_type': ['X', 'X', 'X', 'X', 'X', 'X', 'X'],
    'client': ['Client1', 'Client1', 'Client1', 'Client1', 'Client1', 'Client2', 'Client1'],
    'target': [0, 1, 0, 1, 1, 0, 0]
}

df = pd.DataFrame(data)

# Count the number of delays (target == 1) for each desk, order_type, and client
delay_count = df[df['target'] == 1].groupby(['order_type', 'client', 'desk']).size().reset_index(name='delay_count')

# Count the number of no-delays (target == 0) for each desk, order_type, and client
no_delay_count = df[df['target'] == 0].groupby(['order_type', 'client', 'desk']).size().reset_index(name='no_delay_count')

# Filter desks with delays more than once
desks_with_delays = delay_count[delay_count['delay_count'] > 1]

# Filter desks that never caused delays
desks_with_no_delays = no_delay_count[no_delay_count['no_delay_count'] > 0]

# Remove desks from no_delays if they are present in the delays list for the same order_type and client
def remove_desks_in_no_delay(df, desks_with_delays, desks_with_no_delays):
    # Initialize list to keep rows to keep
    rows_to_keep = []

    for _, no_delay_row in desks_with_no_delays.iterrows():
        order_type = no_delay_row['order_type']
        client = no_delay_row['client']
        desk_no_delay = no_delay_row['desk']

        # Get desks with delays for the same order_type and client
        desks_with_delays_set = set(desks_with_delays[(desks_with_delays['order_type'] == order_type) & 
                                                      (desks_with_delays['client'] == client)]['desk'])

        # If the desk in no_delays is not in desks_with_delays, keep it
        if desk_no_delay not in desks_with_delays_set:
            rows_to_keep.append(no_delay_row)

    # Create DataFrame from the kept rows
    filtered_no_delay = pd.DataFrame(rows_to_keep)

    # Merge back with original DataFrame to get final result
    final_df = df.merge(filtered_no_delay[['order_type', 'client', 'desk']], 
                        on=['order_type', 'client', 'desk'], 
                        how='left', 
                        indicator=True)

    # Keep only rows where the indicator is 'left_only' (i.e., those not present in filtered_no_delay)
    final_df = final_df[final_df['_merge'] == 'left_only'].drop(columns=['_merge'])

    return final_df

# Apply the function
filtered_df = remove_desks_in_no_delay(df, desks_with_delays, desks_with_no_delays)

# Print the results
print("Filtered DataFrame:")
print(filtered_df)
