### Cleanup Items

* Get all csv files in '/dev/clean_data folder'
* For each file retain only one-hot encoded columns in each category
* Merge to all categories to itemset

In [27]:
import os
import pandas as pd
folder_path = os.path.join(os.getcwd(), os.pardir)+ '/dev/clean_data'
folder_path

def get_csv_files(directory):
    """
    Get all csv files in a directory
    """
    csv_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.csv'):
                csv_files.append(os.path.join(root, file))
    return csv_files

def get_itemset():
    files = get_csv_files(folder_path)
    df_itemset = pd.DataFrame()

    for file in files:
        filtered_df = pd.DataFrame()
        print('Merging :',  os.path.basename(file))
        df = pd.read_csv(file, low_memory=False)
        # Keep only 'ASIN' and one hot encoded columns
        columns_to_keep = ['ASIN'] + [col for col in df.columns if col != 'ASIN' and
                                    all(df[col].dropna().unique() == [1])]
        filtered_df = df[columns_to_keep]
        # Merge filtered df to df itemset
        if df_itemset.empty:
            # Direct assignment for the first DataFrame
            df_itemset = filtered_df
        else:
            # Merge filtered df to the itemset on common columns
            df_itemset = pd.merge(df_itemset, filtered_df, how='outer')
    df_itemset = df_itemset.drop_duplicates()
    return df_itemset
get_itemset()

Merging : items_Bedroom.csv
Merging : items_Computer Components.csv
Merging : items_Bathroom.csv
Merging : items_Cleaning Material.csv
Merging : items_Car Stuff.csv
Merging : items_Mobile Accessories.csv
Merging : items_Fashion.csv
Merging : items_Books.csv
Merging : items_Peripheral Devices.csv
Merging : items_Electronic Devices.csv
Merging : items_Personal Care.csv
Merging : items_Office Supplies.csv
Merging : items_Travel Essentials.csv
Merging : items_Kitchen.csv
Merging : items_Children.csv
Merging : items_Living Room.csv


Unnamed: 0,ASIN,Home & Kitchen,Bedding,Comforters & Sets,Comforter Sets,Kids' Bedding,Baby Products,Nursery,Toddler Bedding,Bedding Sets,...,Torches,Pest Control,Bug Zappers,Bistro Sets,Outdoor Curtains,Patio Furniture Covers,Furniture Set Covers,Figurine Lights,Storage Benches,Boot & Shoe Boxes
0,B0CMSW6JNM,1.0,1.0,1.0,1.0,,,,,,...,,,,,,,,,,
1,B0CH9Y8CBQ,1.0,1.0,1.0,1.0,,,,,,...,,,,,,,,,,
2,B0CC6N7XR9,1.0,1.0,1.0,1.0,1.0,,,,,...,,,,,,,,,,
3,B0B4ZCYXMX,,1.0,,,,1.0,1.0,1.0,1.0,...,,,,,,,,,,
4,B0B2DG8QRL,1.0,1.0,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34427,B07QXK9WG5,1.0,,,,,,,,,...,,,,,,,,,,
34428,B0CHXTR17D,1.0,,,,,,,,,...,,,,,,,,,,
34429,B0C9MZWQ1D,1.0,,,,,,,,,...,,,,,,,,,,
34430,B0CLRHN99R,1.0,,,,,,,,,...,,,,,,,,,,
