In [1]:
import os
import pandas as pd
from pathlib import Path

def read_file_auto(filename, folder='data'):
    full_path = f"{folder}/{filename}"
    print(f"Reading: {full_path}")  # Print the full path being read
    ext = Path(full_path).suffix.lower()
    
    try:
        if ext == '.csv':
            return pd.read_csv(full_path)
        elif ext == '.tsv':
            return pd.read_csv(full_path, sep='\t')
        elif ext == '.json':
            return pd.read_json(full_path)
        elif ext == '.parquet':
            return pd.read_parquet(full_path)
        elif ext == '.pkl':
            return pd.read_pickle(full_path)
        else:
            print(f"Unsupported file format: {full_path}")
            return None
    except Exception as e:
        print(f"Failed to read {full_path}: {e}")
        return None

# Read each file using the updated function
data = 'data'
files = os.listdir(data)


dataframes = {}
for file in files:
    df = read_file_auto(file, folder='data')
    if df is not None:
        key = Path(file).stem  # filename without extension
        dataframes[key] = df

print("Loaded DataFrames:", list(dataframes.keys()))

Reading: data/epi_r.csv
Reading: data/food_data.csv
Reading: data/full_format_recipes.json
Reading: data/groceries.csv


  return pd.read_csv(full_path)


Reading: data/openfoodfacts_products.pkl
Reading: data/personalized_diet_recommendations.csv
Reading: data/recipes.parquet
Reading: data/repo
Unsupported file format: data/repo
Reading: data/reviews.parquet
Loaded DataFrames: ['epi_r', 'food_data', 'full_format_recipes', 'groceries', 'openfoodfacts_products', 'personalized_diet_recommendations', 'recipes', 'reviews']


# Recipe datasets:
### ['full_format_recipes' 'epi_r']
### 'recipes'


# diet categorization:
### 'personalized_diet_recommendations': 
contains recommended intakes for different people. We can classify this into relatively coarse groups. 

['High-Protein Diet', 'Balanced Diet', 'Low-Fat Diet',
       'Low-Carb Diet']

Where high protein has requirement above x grams of protein
balanced diet has requirement of protein, calory, fat carbs in a certain range.
low fat has requirement fat under x grams
same for carbs

later we can add more detail for preferred cuisine, food aversions.

### 'food_data': 
the Food column contains 184 different foods which are classified into allergy types.


# food information:
### 'food_data'
contains type of food classifies it into different levels and assigns to allergy
### 'groceries'
prices of foods, category, product name, size of packaging
### 'openfoodfacts_products'
contains product names, quantity and very detailed nutrients per100grams


## redundant:
we can probably add the rating to each recipe for a recommendation system if we have time left
### 'reviews'

In [46]:
dataframes['epi_r'].columns

Index(['title', 'rating', 'calories', 'protein', 'fat', 'sodium', '#cakeweek',
       '#wasteless', '22-minute meals', '3-ingredient recipes',
       ...
       'yellow squash', 'yogurt', 'yonkers', 'yuca', 'zucchini', 'cookbooks',
       'leftovers', 'snack', 'snack week', 'turkey'],
      dtype='object', length=680)

In [48]:
dataframes['epi_r'].columns.tolist()

['title',
 'rating',
 'calories',
 'protein',
 'fat',
 'sodium',
 '#cakeweek',
 '#wasteless',
 '22-minute meals',
 '3-ingredient recipes',
 '30 days of groceries',
 'advance prep required',
 'alabama',
 'alaska',
 'alcoholic',
 'almond',
 'amaretto',
 'anchovy',
 'anise',
 'anniversary',
 'anthony bourdain',
 'aperitif',
 'appetizer',
 'apple',
 'apple juice',
 'apricot',
 'arizona',
 'artichoke',
 'arugula',
 'asian pear',
 'asparagus',
 'aspen',
 'atlanta',
 'australia',
 'avocado',
 'back to school',
 'backyard bbq',
 'bacon',
 'bake',
 'banana',
 'barley',
 'basil',
 'bass',
 'bastille day',
 'bean',
 'beef',
 'beef rib',
 'beef shank',
 'beef tenderloin',
 'beer',
 'beet',
 'bell pepper',
 'berry',
 'beverly hills',
 'birthday',
 'biscuit',
 'bitters',
 'blackberry',
 'blender',
 'blue cheese',
 'blueberry',
 'boil',
 'bok choy',
 'bon appétit',
 'bon app��tit',
 'boston',
 'bourbon',
 'braise',
 'bran',
 'brandy',
 'bread',
 'breadcrumbs',
 'breakfast',
 'brie',
 'brine',
 'brisk

In [None]:
dataframes['full_format_recipes'][['fat'. ]]

Unnamed: 0,directions,fat,date,categories,calories,desc,protein,rating,title,ingredients,sodium
0,"[1. Place the stock, lentils, celery, carrot, ...",7.0,2006-09-01 04:00:00+00:00,"[Sandwich, Bean, Fruit, Tomato, turkey, Vegeta...",426.0,,30.0,2.500,"Lentil, Apple, and Turkey Wrap","[4 cups low-sodium vegetable or chicken stock,...",559.0
1,[Combine first 9 ingredients in heavy medium s...,23.0,2004-08-20 04:00:00+00:00,"[Food Processor, Onion, Pork, Bake, Bastille D...",403.0,This uses the same ingredients found in boudin...,18.0,4.375,Boudin Blanc Terrine with Red Onion Confit,"[1 1/2 cups whipping cream, 2 medium onions, c...",1439.0
2,[In a large heavy saucepan cook diced fennel a...,7.0,2004-08-20 04:00:00+00:00,"[Soup/Stew, Dairy, Potato, Vegetable, Fennel, ...",165.0,,6.0,3.750,Potato and Fennel Soup Hodge,"[1 fennel bulb (sometimes called anise), stalk...",165.0
3,[Heat oil in heavy large skillet over medium-h...,,2009-03-27 04:00:00+00:00,"[Fish, Olive, Tomato, Sauté, Low Fat, Low Cal,...",,The Sicilian-style tomato sauce has tons of Me...,,5.000,Mahi-Mahi in Tomato Olive Sauce,"[2 tablespoons extra-virgin olive oil, 1 cup c...",
4,[Preheat oven to 350°F. Lightly grease 8x8x2-i...,32.0,2004-08-20 04:00:00+00:00,"[Cheese, Dairy, Pasta, Vegetable, Side, Bake, ...",547.0,,20.0,3.125,Spinach Noodle Casserole,"[1 12-ounce package frozen spinach soufflé, th...",452.0
...,...,...,...,...,...,...,...,...,...,...,...
20125,[Beat whites in a bowl with an electric mixer ...,2.0,2004-08-20 04:00:00+00:00,"[Mixer, Cheese, Egg, Fry, Cocktail Party, Parm...",28.0,,2.0,3.125,Parmesan Puffs,"[2 large egg whites, 3 oz Parmigiano-Reggiano,...",64.0
20126,[Bring broth to simmer in saucepan.Remove from...,28.0,2008-02-28 22:06:54+00:00,"[Side, Kid-Friendly, High Fiber, Dinner, Parme...",671.0,Cooking the artichokes with the rice infuses t...,22.0,4.375,Artichoke and Parmesan Risotto,"[5 1/2 cups (or more) low-salt chicken broth, ...",583.0
20127,"[Using a sharp knife, cut a shallow X in botto...",38.0,2005-10-21 18:21:20+00:00,"[Onion, Poultry, turkey, Vegetable, Bake, Kid-...",563.0,,31.0,4.375,Turkey Cream Puff Pie,"[1 small tomato, 1 small onion, finely chopped...",652.0
20128,[Heat 2 tablespoons oil in heavy medium skille...,24.0,2004-08-20 04:00:00+00:00,"[Milk/Cream, Citrus, Dairy, Fish, Garlic, Past...",631.0,"Sharon Hooykaas of Los Alamitos, California, w...",45.0,4.375,Snapper on Angel Hair with Citrus Cream,"[4 tablespoons olive oil, 4 shallots, thinly s...",517.0


In [31]:
dataframes['full_format_recipes'].iloc[0].directions

['1. Place the stock, lentils, celery, carrot, thyme, and salt in a medium saucepan and bring to a boil. Reduce heat to low and simmer until the lentils are tender, about 30 minutes, depending on the lentils. (If they begin to dry out, add water as needed.) Remove and discard the thyme. Drain and transfer the mixture to a bowl; let cool.',
 '2. Fold in the tomato, apple, lemon juice, and olive oil. Season with the pepper.',
 '3. To assemble a wrap, place 1 lavash sheet on a clean work surface. Spread some of the lentil mixture on the end nearest you, leaving a 1-inch border. Top with several slices of turkey, then some of the lettuce. Roll up the lavash, slice crosswise, and serve. If using tortillas, spread the lentils in the center, top with the turkey and lettuce, and fold up the bottom, left side, and right side before rolling away from you.']

In [5]:
dataframes['food_data']

Unnamed: 0,Class,Type,Group,Food,Allergy
0,Plant origin,Nut and seed,Oil seed,Almond,Nut Allergy
1,Plant origin,Fruit,Pome fruit,Apple,Oral Allergy Syndrome
2,Plant origin,Fruit,Stone fruit,Apricot,Stone Fruit Allergy
3,Plant origin,Vegetable,Composite vegetable,Artichoke,Insulin Allergy
4,Plant origin,Vegetable,Liliaceous vegetable,Asparagus,Allium Allergy
...,...,...,...,...,...
179,Plant origin,Cereal grain and pulse,Cereal grain,Wheat,Gluten Allergy
180,Animal origin,Dairy,Dairy,Whey,Milk allergy / Lactose intolerance
181,Plant origin,Cereal grain and pulse,Pulse,White bean,Legume Allergy
182,Plant origin,Vegetable,Potato,Yam,Potato Allergy


In [34]:
dataframes['food_data'][dataframes['food_data']['Type'] ==  'Nut and seed']

Unnamed: 0,Class,Type,Group,Food,Allergy
0,Plant origin,Nut and seed,Oil seed,Almond,Nut Allergy
24,Plant origin,Nut and seed,Beverage seed,Cacao bean,
33,Plant origin,Nut and seed,Oil seed,Chestnut,Nut Allergy
38,Plant origin,Nut and seed,Beverage seed,Coffee bean,Ochratoxin Allergy
41,Plant origin,Nut and seed,Oil seed,Cotton seed,Seed Allergy
58,Plant origin,Nut and seed,Oil seed,Ginkgo nut,Nut Allergy
117,Plant origin,Nut and seed,Oil seed,Pecan,Nut Allergy
131,Plant origin,Nut and seed,Oil seed,Rapeseed,Seed Allergy
137,Plant origin,Nut and seed,Oil seed,Safflower seed,Seed Allergy
143,Plant origin,Nut and seed,Oil seed,Sesame seed,Seed Allergy


In [17]:
dataframes['groceries']['PRODUCT_NAME'].iloc[:50]

0           Marketside Roasted Red Pepper Hummus, 10 Oz
1               Marketside Roasted Garlic Hummus, 10 Oz
2                      Marketside Classic Hummus, 10 Oz
3                   Marketside Everything Hummus, 10 oz
4                          Price's Jalapeno Dip, 12 Oz.
5                       Price's Green Chili Dip, 12 Oz.
6                   Dean's, French Onion Dip, 16 oz Tub
7             Marketside Spinach & Artichoke Dip, 16 Oz
8         Fresh Cravings Roasted Red Pepper Hummus 10oz
9           Marketside Buffalo Style Chicken Dip, 11 oz
10                       Marketside Spicy Hummus, 10 oz
11                    Marketside Pine Nut Hummus, 10 oz
12                Marketside Cantina Style Salsa, 16 oz
13          Fresh Cravings Everything Bagel Hummus 10oz
14           Fresh Cravings Roasted Garlic Hummus 10 oz
15            Fresh Cravings Honey Jalapeno Hummus 10oz
16                   Fresh Cravings Classic Hummus 10oz
17           Freshness Guaranteed Guacamole, Mil

In [43]:
dataframes['groceries'][
    dataframes['groceries']['PRODUCT_NAME'].str.contains('blueberr', case=False, na=False)
]


Unnamed: 0,index,SHIPPING_LOCATION,DEPARTMENT,CATEGORY,SUBCATEGORY,BREADCRUMBS,SKU,PRODUCT_URL,PRODUCT_NAME,BRAND,PRICE_RETAIL,PRICE_CURRENT,PRODUCT_SIZE,PROMOTION,RunDate,tid
88,88,48180,Beverages,Energy Drinks,,Beverages/Energy Drinks,133257890,https://www.walmart.com/ip/V8-ENERGY-Pomegrana...,"V8 +ENERGY Pomegranate Blueberry Energy Drink,...",V8,9.38,9.38,8,,2022-09-11 21:20:04,16163892
109,109,48180,Beverages,Energy Drinks,,Beverages/Energy Drinks,19766232,https://www.walmart.com/ip/V8-ENERGY-Pomegrana...,"V8 +ENERGY Pomegranate Blueberry Energy Drink,...",V8,5.14,5.14,8,,2022-09-11 21:20:04,16163913
111,111,48180,Beverages,Energy Drinks,,Beverages/Energy Drinks,158080429,https://www.walmart.com/ip/Red-Bull-Energy-Dri...,"Red Bull Energy Drink, Blueberry, 12 Fl Oz (4 ...",Red Bull,10.48,10.48,12,,2022-09-11 21:20:04,16163915
260,260,48180,Beverages,Energy Drinks,,Beverages/Energy Drinks,45733432,https://www.walmart.com/ip/Red-Bull-Energy-Dri...,"Red Bull Energy Drink, Blueberry, 12 Fl Oz",Red Bull,2.68,2.68,12,,2022-09-11 21:20:04,16164064
383,383,48180,Beverages,Energy Drinks,,Beverages/Energy Drinks,16821302,https://www.walmart.com/ip/vitaminwater-xxx-el...,"vitaminwater xxx, electrolyte enhanced water w...",vitaminwater,1.38,1.38,20,,2022-09-11 21:20:04,16164187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
567638,567638,77449,Alcohol,Wine,White Wine,Alcohol/Wine,382842125,https://www.walmart.com/ip/Oliver-Winery-Vine-...,"Oliver Winery - Vine Series Blueberry Moscato,...",Oliver,9.48,9.48,750,,2022-09-11 21:20:04,16731442
567682,567682,33647,Alcohol,Wine,White Wine,Alcohol/Wine,382842125,https://www.walmart.com/ip/Oliver-Winery-Vine-...,"Oliver Winery - Vine Series Blueberry Moscato,...",Oliver,9.98,8.98,750,,2022-09-11 21:20:04,16731486
568355,568355,75211,Alcohol,Wine,White Wine,Alcohol/Wine,382842125,https://www.walmart.com/ip/Oliver-Winery-Vine-...,"Oliver Winery - Vine Series Blueberry Moscato,...",Oliver,9.48,9.48,750,,2022-09-11 21:20:04,16732159
568485,568485,70072,Alcohol,Wine,White Wine,Alcohol/Wine,382842125,https://www.walmart.com/ip/Oliver-Winery-Vine-...,"Oliver Winery - Vine Series Blueberry Moscato,...",Oliver,10.98,10.98,750,,2022-09-11 21:20:04,16732289


In [41]:
dataframes['groceries']['PRODUCT_NAME']

0         Marketside Roasted Red Pepper Hummus, 10 Oz
1             Marketside Roasted Garlic Hummus, 10 Oz
2                    Marketside Classic Hummus, 10 Oz
3                 Marketside Everything Hummus, 10 oz
4                        Price's Jalapeno Dip, 12 Oz.
                             ...                     
568529             Farm Fresh Blueberry Moscato 750ml
568530                Farm Fresh Peach Moscato 750 Ml
568531             Farm Fresh Raspberry Moscato 750ml
568532                 Farm Fresh Mango Moscato 750ml
568533            Ole Orleans Heritage Riesling 750ml
Name: PRODUCT_NAME, Length: 568534, dtype: object

In [3]:
dataframes['openfoodfacts_products'].columns

Index(['code', 'url', 'creator', 'created_t', 'created_datetime',
       'last_modified_t', 'last_modified_datetime', 'product_name',
       'generic_name', 'quantity',
       ...
       'fruits-vegetables-nuts_100g', 'fruits-vegetables-nuts-estimate_100g',
       'collagen-meat-protein-ratio_100g', 'cocoa_100g', 'chlorophyl_100g',
       'carbon-footprint_100g', 'nutrition-score-fr_100g',
       'nutrition-score-uk_100g', 'glycemic-index_100g',
       'water-hardness_100g'],
      dtype='object', length=163)

In [35]:
# dataframes['openfoodfacts_products'][
#     dataframes['openfoodfacts_products']['product_name'].str.contains('blueberr', case=False, na=False)
# ]


In [12]:
dataframes['personalized_diet_recommendations'].columns

Index(['Patient_ID', 'Age', 'Gender', 'Height_cm', 'Weight_kg', 'BMI',
       'Chronic_Disease', 'Blood_Pressure_Systolic',
       'Blood_Pressure_Diastolic', 'Cholesterol_Level', 'Blood_Sugar_Level',
       'Genetic_Risk_Factor', 'Allergies', 'Daily_Steps', 'Exercise_Frequency',
       'Sleep_Hours', 'Alcohol_Consumption', 'Smoking_Habit', 'Dietary_Habits',
       'Caloric_Intake', 'Protein_Intake', 'Carbohydrate_Intake', 'Fat_Intake',
       'Preferred_Cuisine', 'Food_Aversions', 'Recommended_Calories',
       'Recommended_Protein', 'Recommended_Carbs', 'Recommended_Fats',
       'Recommended_Meal_Plan'],
      dtype='object')

In [20]:
dataframes['personalized_diet_recommendations'][['Dietary_Habits', 'Allergies']]

Unnamed: 0,Dietary_Habits,Allergies
0,Vegetarian,
1,Vegetarian,
2,Vegetarian,Gluten Intolerance
3,Vegetarian,Nut Allergy
4,Regular,
...,...,...
4995,Vegetarian,Gluten Intolerance
4996,Keto,Gluten Intolerance
4997,Regular,Nut Allergy
4998,Keto,


In [None]:
# secondary categories

dataframes['personalized_diet_recommendations'][['Preferred_Cuisine', 'Food_Aversions', 'Recommended_Calories',
       'Recommended_Protein', 'Recommended_Carbs', 'Recommended_Fats',
       'Recommended_Meal_Plan']]

In [39]:
dataframes['recipes'].columns

Index(['RecipeId', 'Name', 'AuthorId', 'AuthorName', 'CookTime', 'PrepTime',
       'TotalTime', 'DatePublished', 'Description', 'Images', 'RecipeCategory',
       'Keywords', 'RecipeIngredientQuantities', 'RecipeIngredientParts',
       'AggregatedRating', 'ReviewCount', 'Calories', 'FatContent',
       'SaturatedFatContent', 'CholesterolContent', 'SodiumContent',
       'CarbohydrateContent', 'FiberContent', 'SugarContent', 'ProteinContent',
       'RecipeServings', 'RecipeYield', 'RecipeInstructions'],
      dtype='object')

In [30]:
dataframes['recipes'][['RecipeIngredientParts']]

Unnamed: 0,RecipeIngredientParts
0,"[blueberries, granulated sugar, vanilla yogurt..."
1,"[saffron, milk, hot green chili peppers, onion..."
2,"[sugar, lemons, rind of, lemon, zest of, fresh..."
3,"[extra firm tofu, eggplant, zucchini, mushroom..."
4,"[plain tomato juice, cabbage, onion, carrots, ..."
...,...
522512,"[fresh ginger, unsalted butter, dark brown sug..."
522513,"[Dijon mustard, garlic, peppercorns, shallot, ..."
522514,"[half-and-half, heavy cream, brandy, sugar]"
522515,"[wasabi paste, dill, English cucumber, smoked ..."


In [26]:
dataframes['recipes'][['Calories', 'FatContent','SaturatedFatContent', 'CarbohydrateContent', 'ProteinContent']]

Unnamed: 0,Calories,FatContent,SaturatedFatContent,CarbohydrateContent,ProteinContent
0,170.9,2.5,1.3,37.1,3.2
1,1110.7,58.8,16.6,84.4,63.4
2,311.1,0.2,0.0,81.5,0.3
3,536.1,24.0,3.8,64.2,29.3
4,103.6,0.4,0.1,25.1,4.3
...,...,...,...,...,...
522512,316.6,12.5,7.6,48.5,3.9
522513,2063.4,172.4,71.4,3.2,117.0
522514,1271.3,117.2,72.6,33.9,12.8
522515,16.1,0.6,0.1,0.3,2.4


In [40]:
dataframes['reviews']

Unnamed: 0,ReviewId,RecipeId,AuthorId,AuthorName,Rating,Review,DateSubmitted,DateModified
0,2,992,2008,gayg msft,5,better than any you can get at a restaurant!,2000-01-25 21:44:00+00:00,2000-01-25 21:44:00+00:00
1,7,4384,1634,Bill Hilbrich,4,"I cut back on the mayo, and made up the differ...",2001-10-17 16:49:59+00:00,2001-10-17 16:49:59+00:00
2,9,4523,2046,Gay Gilmore ckpt,2,i think i did something wrong because i could ...,2000-02-25 09:00:00+00:00,2000-02-25 09:00:00+00:00
3,13,7435,1773,Malarkey Test,5,easily the best i have ever had. juicy flavor...,2000-03-13 21:15:00+00:00,2000-03-13 21:15:00+00:00
4,14,44,2085,Tony Small,5,An excellent dish.,2000-03-28 12:51:00+00:00,2000-03-28 12:51:00+00:00
...,...,...,...,...,...,...,...,...
1401977,2090339,139499,2002080368,terrylbiggs,2,I was disappointed. I couldn't wait to make th...,2020-12-27 23:57:54+00:00,2020-12-27 23:57:54+00:00
1401978,2090340,148484,41805321,rogerberry,5,Nothing to drain. And I don’t heat up the liqu...,2020-12-28 00:44:42+00:00,2020-12-28 00:44:42+00:00
1401979,2090341,264191,2002901848,Reiketsukan 6.,5,Good base recipe for someone to start with. I ...,2020-12-28 01:04:43+00:00,2020-12-28 01:04:43+00:00
1401980,2090345,411791,2002901938,Sue M.,5,Thank you so much for this amazing recipe! I l...,2020-12-28 03:07:10+00:00,2020-12-28 03:07:10+00:00
