 # Cleaning and Filtering Dataframe & Diet Function

In [7]:
# Importing packages
import pandas as pd
import re 

In [3]:
# Creating and cleaning data
def drop_nan_columns(df):
    # Drop columns where all values are NaN
    df_cleaned = df.dropna(axis=1, how='all')
    return df_cleaned

# Trader Joes Price Data
df_price = pd.read_csv("trader_joes - trader_joes.csv")
# USDA Data
df_usda = pd.read_csv("USDA-305tj(Sheet1).csv")
# Merging two datasets
df_price.rename(columns={"gtin_upc":"GTIN/UPC"}, inplace=True)
df_merged = pd.merge(df_price, df_usda, how="inner", on="GTIN/UPC")

# Columns that aren't neccesary 
dropped_columns = ['brand_owner', 'brand_name', 'Brand Owner', 'Market Country', 'market_country', 'Brand',
                   'modified_date', 'available_date', 'data_source']

# Final Clean Data
clean_data = drop_nan_columns(df_merged).drop(dropped_columns, axis = 1).set_index('Name')


In [4]:
# Function to filter out dataframe depending on certain strings

def remove_rows(df, column_name, search_strings):
    # Convert all values in the specified column to strings
    df[column_name] = df[column_name].apply(lambda x: str(x) if x is not None else "")
    
    # Create a regex pattern to match any of the search strings as whole words
    # Combine all search strings into a single regex pattern 
    search_pattern = r'|'.join([r'\b' + re.escape(search_string) + r'\b' for search_string in search_strings])

    # Filtering out rows where the column contains any of the search strings
    df_filtered = df[~df[column_name].str.contains(search_pattern, case=False, na=False, regex=True)]
    
    return df_filtered

# Vegetarian Example

meats_and_fish = ['CHICKEN', 'BEEF', 'HAM', 'PORK', 'FISH', 'TURKEY', 'SALMON', 'TUNA']
remove_rows(clean_data, 'ingredients', meats_and_fish)

# Vegan Example
animal_products = ['CHICKEN', 'BEEF', 'HAM', 'PORK', 'FISH','TURKEY', 'SALMON', 'TUNA',
                    'MILK', 'BUTTER', 'EGG', 'EGGS', 'HONEY', 'CHEESE', 'YOGURT', 'CREAM', 'GELATINE']

remove_rows(clean_data, 'ingredients', animal_products)

Unnamed: 0_level_0,Unnamed: 0,fdc_id,GTIN/UPC,ingredients,not_a_significant_source_of,serving_size,serving_size_unit,household_serving_fulltext,branded_food_category,package_weight,Branded Food Category,Price
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
TURBINADO RAW CANE SUGAR,45331,357010,765152,ORGANIC EVAPORATED CANE JUICE.,,4.00,g,1 tsp,"Granulated, Brown & Powdered Sugar",,"Granulated, Brown & Powdered Sugar",3.99
"TRADER JOE'S, COCONUT OIL SPRAY",45342,357021,505000,"MECHANICALLY-PRESSED, REFINED ORGANIC COCONUT ...",,0.25,g,0.333 SECOND SPRAY,Vegetable & Cooking Oils,,Vegetable & Cooking Oils,4.99
WHITE BEAN HUMMUS,45344,357023,911054,"GREAT NORTHERN WHITE BEANS, WATER, TAHINI (PUR...",,30.00,g,2 Tbsp,Dips & Salsa,,Dips & Salsa,5.99
"TRADER JOE'S, VEGETABLE RADIATORE ORGANIC PASTA",45436,357115,362870,"ORGANIC DURUM WHEAT SEMOLINA, SPINACH, BEET, R...",,56.00,g,0.75 cup,Pasta by Shape & Type,,Pasta by Shape & Type,1.99
"TRADER JOE'S, CRUNCHY UNSALTED ORGANIC PEANUT BUTTER",45777,357456,507318,ORGANIC PEANUTS.,,32.00,g,2 Tbsp,Nut & Seed Butters,,Nut & Seed Butters,2.49
...,...,...,...,...,...,...,...,...,...,...,...,...
"TRADER JOE'S, CORNBREAD MIX",1842820,2563428,328098,"ENRICHED UNBLEACHED FLOUR (WHEAT FLOUR, MALTED...",,47.00,GRM,0.11 PACKAGE,"Cake, Cookie & Cupcake Mixes",15 oz/425 g,"Cake, Cookie & Cupcake Mixes",2.99
"TRADER JOE'S, DRIED PITTED TART MONTMORENCY CHERRIES",1893986,2614763,265058,"MONTMORENCY TART CHERRIES,, SUGAR, SUNFLOWER OIL.",,44.00,GRM,0.333 cup,Wholesome Snacks,8 oz/227 g,Wholesome Snacks,3.99
"TRADER JOE'S, SPECULOOS CRISP CARAMELIZED CINNAMON SPICED BELGIAN COOKIES",1895087,2615864,502368,"UNBLEACHED WHEAT FLOUR, SUGAR, VEGETABLE OILS ...",,32.00,GRM,4 COOKIES,Cookies & Biscuits,7 oz/200 g,Cookies & Biscuits,2.49
"TRADER JOE'S, 12 GRAIN MINI SNACK CRACKERS",1896257,2617034,923064,"ENRICHED FLOUR (WHEAT FLOUR, NIACIN, REDUCED I...",,30.00,GRM,22 CRACKERS,Crackers & Biscotti,10 oz/284 g,Crackers & Biscotti,3.49
