# Exploratory Data Analysis

Here we will explore the raw recipe data that we stored in PostgreSQL. This should be relatively straightforward as we just need to do some basic cleaning and get it ready for our feature engineering step.

In [2]:
from sqlalchemy import create_engine, Column, Integer, String, JSON, Float
from sqlalchemy.orm import sessionmaker, declarative_base
import os
import pandas as pd
from dotenv import load_dotenv

load_dotenv()

postgresql_password = os.environ["POSTGRESQL_IIFYMATE_PASSWORD"]

In [4]:
Base = declarative_base()

class Raw_Recipe(Base):
    __tablename__ = 'raw_recipes'
    
    id = Column(Integer, primary_key=True)
    uri = Column(String, unique=True)
    label = Column(String)
    url = Column(String)
    yield_ = Column(Integer)
    dietLabels = Column(JSON)
    healthLabels = Column(JSON)
    cautions = Column(JSON)
    ingredientLines = Column(JSON)
    ingredients = Column(JSON)
    calories = Column(Float)
    totalWeight = Column(Float)
    totalTime = Column(Integer)
    cuisineType = Column(JSON)
    mealType = Column(JSON)
    dishType = Column(JSON)
    totalNutrients = Column(JSON)
    totalDaily = Column(JSON)
    digest = Column(JSON)
    tags = Column(JSON)

In [3]:
engine = create_engine(f'postgresql://iifymate:{postgresql_password}@localhost/raw_recipes')
#Session = sessionmaker(bind=engine)
#session = Session()
#recipes = session.query(Raw_Recipe).all()

In [7]:
df = pd.read_sql('SELECT * FROM raw_recipes', engine)

In [9]:
df.head(2)

Unnamed: 0,id,uri,label,url,yield_,dietLabels,healthLabels,cautions,ingredientLines,ingredients,calories,totalWeight,totalTime,cuisineType,mealType,dishType,totalNutrients,totalDaily,digest,tags
0,1,http://www.edamam.com/ontologies/edamam.owl#re...,Mom’s Swedish Potatoes recipes,https://kitchendivas.com/moms-swedish-potatoes/,4,[],"[Sugar-Conscious, Vegetarian, Pescatarian, Egg...",[Sulfites],"[4 potatoes - 4, 1/2 cup Parmesan cheese grate...","[{'text': '4 potatoes - 4', 'quantity': 4.0, '...",1867.94925,1066.853125,0,[nordic],[lunch/dinner],[condiments and sauces],"{'ENERC_KCAL': {'label': 'Energy', 'quantity':...","{'ENERC_KCAL': {'label': 'Energy', 'quantity':...","[{'label': 'Fat', 'tag': 'FAT', 'schemaOrgTag'...","[potatoes, potato, potato dishes, swedish, swe..."
1,2,http://www.edamam.com/ontologies/edamam.owl#re...,Soft Chocolate Chip Cookies,https://recipes.sparkpeople.com/recipe-detail....,125,[Low-Sodium],"[Low Potassium, Kidney-Friendly, Vegetarian, P...",[Sulfites],"[4.5 c. white flour, 2 tsp. baking soda, 2 c. ...","[{'text': '4.5 c. white flour', 'quantity': 4....",13300.936001,2778.9,36,[american],[teatime],[biscuits and cookies],"{'ENERC_KCAL': {'label': 'Energy', 'quantity':...","{'ENERC_KCAL': {'label': 'Energy', 'quantity':...","[{'label': 'Fat', 'tag': 'FAT', 'schemaOrgTag'...","[Dessert, Other, Desserts Dessert, Other Desse..."
