### Cleanup Items

* Get all csv files in '/dev/clean_data folder'
* For each file retain only one-hot encoded columns in each category
* Merge to all categories to itemset

In [37]:
import os
import pandas as pd
folder_path = os.path.join(os.getcwd(), os.pardir)+ '/dev/clean_data'
folder_path

def get_csv_files(directory):
    """
    Get all csv files in a directory
    """
    csv_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.csv'):
                csv_files.append(os.path.join(root, file))
    return csv_files

def get_itemset():
    files = get_csv_files(folder_path)
    df_itemset = pd.DataFrame()

    for file in files:
        filtered_df = pd.DataFrame()
        category = os.path.basename(file).split('_')[1].split('.')[0].lower()
        print('Merging :',  category)
        df = pd.read_csv(file, low_memory=False)
        df['category'] = category
        # Keep only 'ASIN' and one hot encoded columns
        columns_to_keep = ['ASIN','category'] + [col for col in df.columns if col != 'ASIN' and
                                    all(df[col].dropna().unique() == [1])]
        filtered_df = df[columns_to_keep]
        # Merge filtered df to df itemset
        if df_itemset.empty:
            # Direct assignment for the first DataFrame
            df_itemset = filtered_df
        else:
            # Merge filtered df to the itemset on common columns
            df_itemset = pd.merge(df_itemset, filtered_df, how='outer')
    df_itemset = df_itemset.drop_duplicates()
    return df_itemset
get_itemset()

Merging : bedroom
Merging : computer components
Merging : bathroom
Merging : cleaning material
Merging : car stuff
Merging : mobile accessories
Merging : fashion
Merging : books
Merging : peripheral devices
Merging : electronic devices
Merging : personal care
Merging : office supplies
Merging : travel essentials
Merging : kitchen
Merging : children
Merging : living room


Unnamed: 0,ASIN,category,Home & Kitchen,Bedding,Comforters & Sets,Comforter Sets,Kids' Bedding,Baby Products,Nursery,Toddler Bedding,...,Torches,Pest Control,Bug Zappers,Bistro Sets,Outdoor Curtains,Patio Furniture Covers,Furniture Set Covers,Figurine Lights,Storage Benches,Boot & Shoe Boxes
0,B0CMSW6JNM,bedroom,1.0,1.0,1.0,1.0,,,,,...,,,,,,,,,,
1,B0CH9Y8CBQ,bedroom,1.0,1.0,1.0,1.0,,,,,...,,,,,,,,,,
2,B0CC6N7XR9,bedroom,1.0,1.0,1.0,1.0,1.0,,,,...,,,,,,,,,,
3,B0B4ZCYXMX,bedroom,,1.0,,,,1.0,1.0,1.0,...,,,,,,,,,,
4,B0B2DG8QRL,bedroom,1.0,1.0,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34614,B07QXK9WG5,living room,1.0,,,,,,,,...,,,,,,,,,,
34615,B0CHXTR17D,living room,1.0,,,,,,,,...,,,,,,,,,,
34616,B0C9MZWQ1D,living room,1.0,,,,,,,,...,,,,,,,,,,
34617,B0CLRHN99R,living room,1.0,,,,,,,,...,,,,,,,,,,
