In [None]:
import pandas as pd
import itertools

data = r"C:\Users\isaac\Documents\groceries - groceries.csv"

def process_data(data):
    # Load the DataFrame
    df = pd.read_csv(data)

    # Clean up column names by stripping any extra spaces
    df.columns = df.columns.str.strip()

    # Make a list of item column names
    columns_as_string = "Item 1	Item 2	Item 3	Item 4	Item 5	Item 6	Item 7	Item 8	Item 9	Item 10	Item 11	Item 12	Item 13	Item 14	Item 15	Item 16	Item 17	Item 18	Item 19	Item 20	Item 21	Item 22	Item 23	Item 24	Item 25	Item 26	Item 27	Item 28	Item 29	Item 30	Item 31	Item 32"
    columns_as_list = columns_as_string.split('\t')

    # Check if all expected column names exist in the dataframe
    missing_columns = [col for col in columns_as_list if col not in df.columns]
    if missing_columns:
        print(f"Warning: The following expected columns are missing: {missing_columns}")
    
    # Only include rows where item count, the first column, is two or more
    processed_df = df.loc[df.iloc[:, 0] > 2]

    # Make new columns for each possible column combination
    item_column_combinations = itertools.combinations(columns_as_list, 2)

    for column_a, column_b in item_column_combinations:
        if column_a in processed_df.columns and column_b in processed_df.columns:
            processed_df[f'{column_a}_and_{column_b}_pair'] = (
                processed_df[column_a].astype(str) + '-' + processed_df[column_b].astype(str)
            )

    # Drop individual item columns, leaving only our item combo columns
    processed_df.drop(columns=columns_as_list, inplace=True, errors='ignore')

    # Extract unique items from the original columns
    unique_items = set()
    for col in columns_as_list:
        # Exclude placeholders like "0" or NaN
        unique_items.update(df[col].dropna().loc[df[col] != "0"].astype(str).unique())

    return processed_df, list(unique_items)

def check(processed_df, column_name, item_a, item_b):
    # Check if either "item_a-item_b" or "item_b-item_a" exists in the column
    pair_1 = f"{item_a}-{item_b}"
    pair_2 = f"{item_b}-{item_a}"
    return pair_1 in processed_df[column_name].values or pair_2 in processed_df[column_name].values

# Process the data
processed_df, unique_items = process_data(data)

# Loop through all unique combinations of items
for item_a, item_b in itertools.combinations(unique_items, 2):
    all_not_found = True  # Initialize flag to True for each pair
    
    # Check across all columns in the processed dataset
    for column_name in processed_df.columns:
        if check(processed_df, column_name, item_a, item_b):
            # If pair is found in the current column, set flag to False
            all_not_found = False
            break  # No need to check further if pair is found
    
    # If the pair is not found in any column, print a message
    if all_not_found:
        print(f"{item_a}, {item_b} not found in data")
