In [1]:
import google.generativeai as genai
import os
from dotenv import load_dotenv

# Set your API key
load_dotenv()
gemini_api_key = os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=gemini_api_key)

# Initialize the Gemini model
llm = genai.GenerativeModel('gemini-2.0-flash')

In [1]:
import pdfplumber
import pandas as pd

def extract_tables_from_pdf(pdf_path):
    tables = []
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            table = page.extract_table()
            if table:
                df = pd.DataFrame(table[1:], columns=table[0])
                tables.append(df)
    return pd.concat(tables, ignore_index=True) if tables else pd.DataFrame()

In [2]:
pdf_path = "data/pizza_ingredients.pdf"

In [4]:
df = extract_tables_from_pdf(pdf_path)

In [5]:
texts = df.apply(lambda row: " | ".join(row.astype(str)), axis=1).tolist()

In [7]:
df

Unnamed: 0,Pizza Name,Crust\nType,Sauce,Cheese,Toppings
0,Margherita,Thin,Tomato,Mozzarella,Basil
1,Pepperoni,Classic,Tomato,Mozzarella,Pepperoni
2,BBQ Chicken,Hand-\ntossed,BBQ,"Mozzarella,\nCheddar","Grilled Chicken, Red Onion, Cilantro"
3,Veggie\nSupreme,Whole\nWheat,Tomato,Mozzarella,"Bell Peppers, Olives, Mushrooms, Onions"
4,Hawaiian,Classic,Tomato,Mozzarella,"Ham, Pineapple"
5,Meat Lovers,Thick,Tomato,Mozzarella,"Pepperoni, Sausage, Bacon, Ham"
6,Four Cheese,Thin,Tomato,"Mozzarella,\nParmesan,\nGorgonzola,\nRicotta",—
7,Buffalo\nChicken,Hand-\ntossed,Buffalo\nSauce,Mozzarella,"Grilled Chicken, Red Onion, Ranch Drizzle"
8,Mediterranean,Thin,Olive Oil &\nGarlic,"Feta, Mozzarella","Kalamata Olives, Spinach, Sun-dried Tomatoes"
9,Mushroom\nTruffle,Thin,White\nSauce,Mozzarella,"Mushrooms, Truffle Oil"


In [6]:
texts

['Margherita | Thin | Tomato | Mozzarella | Basil',
 'Pepperoni | Classic | Tomato | Mozzarella | Pepperoni',
 'BBQ Chicken | Hand-\ntossed | BBQ | Mozzarella,\nCheddar | Grilled Chicken, Red Onion, Cilantro',
 'Veggie\nSupreme | Whole\nWheat | Tomato | Mozzarella | Bell Peppers, Olives, Mushrooms, Onions',
 'Hawaiian | Classic | Tomato | Mozzarella | Ham, Pineapple',
 'Meat Lovers | Thick | Tomato | Mozzarella | Pepperoni, Sausage, Bacon, Ham',
 'Four Cheese | Thin | Tomato | Mozzarella,\nParmesan,\nGorgonzola,\nRicotta | —',
 'Buffalo\nChicken | Hand-\ntossed | Buffalo\nSauce | Mozzarella | Grilled Chicken, Red Onion, Ranch Drizzle',
 'Mediterranean | Thin | Olive Oil &\nGarlic | Feta, Mozzarella | Kalamata Olives, Spinach, Sun-dried Tomatoes',
 'Mushroom\nTruffle | Thin | White\nSauce | Mozzarella | Mushrooms, Truffle Oil']

In [13]:
df.to_dict(orient="records")[1]

{'Pizza Name': 'Pepperoni',
 'Crust\nType': 'Classic',
 'Sauce': 'Tomato',
 'Cheese': 'Mozzarella',
 'Toppings': 'Pepperoni'}