# AI-Recipe-Generator


In [1]:
import os
from dotenv import find_dotenv, load_dotenv
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate
from groq import Groq
import base64
import streamlit as st
import requests

from IPython.display import Markdown, display

In [8]:
load_dotenv(find_dotenv())
GROQ_API = os.getenv("GROQ_API_KEY")

model_name = 'llama-3.3-70b-versatile'
llm  =  ChatGroq(api_key=GROQ_API,
    model_name=model_name,
    temperature=0.0,
    
)

client = Groq(api_key= GROQ_API)

In [9]:
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

In [13]:
def image_to_text(url):
    base64_image = encode_image(url)
    completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "What's in this image?"},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}",
                        },
                    },
                ],
            }
        ],
        model="llama-3.2-11b-vision-preview",
    )
    text = completion.choices[0].message
    return text




In [14]:
def generate_recipe(ingredients):
    template = """
    You are a extremely knowledgeable nutritionist, bodybuilder and chef who also knows
                everything one needs to know about the best quick, healthy recipes. 
                You know all there is to know about healthy foods, healthy recipes that keep 
                people lean and help them build muscles, and lose stubborn fat.
                
                You've also trained many top performers athletes in body building, and in extremely 
                amazing physique. 
                
                You understand how to help people who don't have much time and or 
                ingredients to make meals fast depending on what they can find in the kitchen. 
                Your job is to assist users with questions related to finding the best recipes and 
                cooking instructions depending on the following variables:
                0/ {ingredients}
                
                When finding the best recipes and instructions to cook,
                you'll answer with confidence and to the point.
                Keep in mind the time constraint of 5-10 minutes when coming up
                with recipes and instructions as well as the recipe.
                
                If the {ingredients} are less than 3, feel free to add a few more
                as long as they will compliment the healthy meal.
                
            
                Make sure to format your answer as follows:
                - The name of the meal as bold title (new line)
                - Best for recipe category (bold)
                    
                - Preparation Time (header)
                    
                - Difficulty (bold):
                    Easy
                - Ingredients (bold)
                    List all ingredients 
                - Kitchen tools needed (bold)
                    List kitchen tools needed
                - Instructions (bold)
                    List all instructions to put the meal together
                - Macros (bold): 
                    Total calories
                    List each ingredient calories
                    List all macros 
                    
                    Please make sure to be brief and to the point.  
                    Make the instructions easy to follow and step-by-step .
    """
    prompt = PromptTemplate(template=template, input_variables=["ingredients"])
    recipe_chain = prompt | llm
    recipe = recipe_chain.invoke(ingredients)


    return recipe

In [15]:
ingredients =image_to_text("mango_fruits.jpeg")

In [16]:

display(Markdown(ingredients.content))

The image displays a variety of fruits, including frozen strawberries and bananas, and several bowls of chopped fruit and a glass of water is included. In the lower-left corner of the image, there is a glasses filled with ice and a pitcher filled with water, with three lemon halves next to it. Scattered strawberries and an almond are below it. Above the water glasses, there are two bowls of chopped fruit: on the left is a white bowl with dark gray edging filled with diced yellow mango and a white bowl with black edging filled with whole strawberries that have been frozen. To the right is another white bowl with black edging filled with sliced yellow bananas. Above those are one final white bowl with a black edging filled with more yellow banana slices. Next to the bowls are bunches of ripe, yellow bananas; whole mangoes (one above the other); and almonds above a tall, silver measuring cup that that says "1 TBSP" on one side. In front of the measuring cups are a round measuring cup with brown liquid that says "1 TBSP" and a wide, white cup with an indentation in the shape of a bowl. The cups are placed on a wooden table with natural wood grain.

In [31]:
recipe = generate_recipe(ingredients=ingredients)

In [32]:

display(Markdown(recipe.content))

**Mango Strawberry Banana Smoothie**
**Best for:** **Post-Workout Recovery**, **Healthy Snack**, **Quick Breakfast**

## Preparation Time
5 minutes

## Difficulty
**Easy**

## Ingredients
* 1 ripe mango
* 2 bananas
* 1/2 cup sliced frozen strawberries
* 1/2 cup whole strawberries
* 1/2 cup ice
* 1/2 cup water
* 1 lemon slice
* 1 tablespoon almond slivers
* 1 tablespoon brown sugar
* 2 tablespoons coconut cream

## Kitchen tools needed
* Blender
* Cutting board
* Knife
* Measuring cups
* Spoon

## Instructions
1. Add the mango, bananas, frozen strawberries, whole strawberries, ice, and water to a blender.
2. Squeeze the lemon slice into the blender and add the almond slivers.
3. Add the brown sugar and coconut cream to the blender.
4. Blend the mixture on high speed until smooth and creamy.
5. Pour the smoothie into a glass and serve immediately.

## Macros
* Total calories: 350
* Mango: 100 calories
* Bananas: 140 calories
* Strawberries: 50 calories
* Ice: 0 calories
* Water: 0 calories
* Lemon slice: 2 calories
* Almond slivers: 50 calories
* Brown sugar: 60 calories
* Coconut cream: 100 calories
* Protein: 5g
* Fat: 15g
* Carbohydrates: 50g
* Fiber: 5g
* Sugar: 30g
* Sodium: 50mg

# AI-Invoice-Extractor


In [33]:

from pypdf import PdfReader
import pandas as pd
import re
from langchain.prompts import PromptTemplate
from langchain.agents.agent_types import AgentType
from ast import literal_eval
import os
from dotenv import find_dotenv, load_dotenv
from langchain_groq import ChatGroq
from io import BytesIO
from pypdf import PdfReader
import pandas as pd
import re
from ast import literal_eval

In [34]:
load_dotenv()
GROQ_API = os.getenv("GROQ_API_KEY")
model_name = 'llama-3.3-70b-versatile'
llm  =  ChatGroq(api_key=GROQ_API,
    model_name=model_name,
    temperature=0.0,
    
)

In [35]:
# Modified PDF text extraction function
def get_pdf_text(uploaded_file):
    text = ""
    # Create a bytes stream from uploaded file
    pdf_bytes = BytesIO(uploaded_file.getvalue())
    pdf_reader = PdfReader(pdf_bytes)
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

In [37]:
def extracted_data(pages_data):
	template = """Extract all the following values: Invoice ID, DESCRIPTION, Issue Date, 
		UNIT PRICE, AMOUNT, Bill For, From, and Terms from: {pages}
	
		Expected output format (REMOVE DOLLAR SIGNS AND COMMAS):
		{{
			'Invoice ID': '1001329',
			'DESCRIPTION': 'Professional Services',
			'Issue Date': '5/4/2023',
			'UNIT PRICE': '100.00',
			'AMOUNT': '1100.00',
			'Bill For': 'James',
			'From': 'Excel Company',
			'Terms': 'Due on receipt'
		}}"""
	prompt_template = PromptTemplate(input_variables=["pages"], template=template)
	response = llm.invoke(prompt_template.format(pages=pages_data))
	return response.content

In [38]:
def clean_currency(value):
    if isinstance(value, str):
        return float(value.replace('$', '').replace(',', '').strip())
    return value


In [39]:
def clean_currency(value):
    if isinstance(value, str):
        return float(value.replace('$', '').replace(',', '').strip())
    return value


def create_docs(user_pdf_list):
    # Initialize DataFrame with proper types
    df = pd.DataFrame({
        'Invoice ID': pd.Series(dtype='str'),
        'DESCRIPTION': pd.Series(dtype='str'),
        'Issue Date': pd.Series(dtype='str'),
        'UNIT PRICE': pd.Series(dtype='float'),
        'AMOUNT': pd.Series(dtype='float'),
        'Bill For': pd.Series(dtype='str'),
        'From': pd.Series(dtype='str'),
        'Terms': pd.Series(dtype='str')
    })

    for filename in user_pdf_list:
        try:
            raw_data = get_pdf_text(filename)
            llm_extracted_data = extracted_data(raw_data)
            
            # Extract content between curly braces
            pattern = r'{(.+?)}'
            match = re.search(pattern, llm_extracted_data, re.DOTALL)
            
            if not match:
                print("No valid data found in LLM output")
                continue
                
            extracted_text = match.group(1).strip()
            
            try:
                # Safely evaluate the string to dictionary
                data_dict = literal_eval('{' + extracted_text + '}')
                
                # Clean currency fields
                for money_col in ['UNIT PRICE', 'AMOUNT']:
                    if money_col in data_dict:
                        data_dict[money_col] = clean_currency(data_dict[money_col])
                
                # Handle key mismatches
                if 'Date' in data_dict and 'Issue Date' not in data_dict:
                    data_dict['Issue Date'] = data_dict.pop('Date')
                
                # Add to DataFrame
                df = pd.concat([df, pd.DataFrame([data_dict])], ignore_index=True)
                
            except (SyntaxError, ValueError, KeyError) as e:
                print(f"Error processing extracted data: {e}")
                continue
                
        except Exception as e:
            print(f"Error processing file {filename}: {e}")
            continue

    return df

['data/Phone-bill.pdf', 'data/Rental-bill.pdf', 'data/Water-sew-bill.pdf']

In [71]:
import pandas as pd
import re
import json
from io import BytesIO
from PyPDF2 import PdfReader
from langchain.prompts import PromptTemplate

In [73]:
# Function to extract text from a PDF
def get_pdf_text(uploaded_file):
    """Extracts text from a PDF file."""
    text = ""
    pdf_bytes = BytesIO(uploaded_file.read())  # Read file into a BytesIO object
    pdf_reader = PdfReader(pdf_bytes)
    
    for page in pdf_reader.pages:
        text += page.extract_text() if page.extract_text() else ""
    
    return text

In [75]:
with open("data/Phone-bill.pdf", "rb") as file:
    pdf_text = get_pdf_text(BytesIO(file.read()))

pdf_text

'Phone BillFrom: DR-TeleP\n1583 E. TanneVa Ln\nNekaspo, WE 99010\nTel #590-327-3987\nBill For: Paul Regex Invoice ID 2,389\n1110 112THAVE W, SUITE 89626 Issue Date 11/27/2026\nSATURNEY, WA 99765 Due Date Upon receipt\nTel # 12876494 Terms Due upon receipt\nDESCRIPTION QUANTITY UNIT PRICE AMOUNT\nPhone and data bill $500.00 $500.00\n$0.00\n$0.00\n$0.00\n$0.00\n$0.00\n$0.00\n$0.00\n$0.00\nSUBTOTAL $500.00\nTAX RATE 0.00%\nTAX $0.00\nAMOUNT DUE $500.00'

In [70]:
# Function to extract structured invoice data using LLM
def extracted_data(pages_data, llm):
    """Uses LLM to extract structured invoice data."""
    template = """Extract all the following values: Invoice ID, DESCRIPTION, Issue Date, 
    UNIT PRICE, AMOUNT, Bill For, From, and Terms from the given text:

    {pages}

    Expected output format (REMOVE DOLLAR SIGNS AND COMMAS):
    {{
        "Invoice ID": "1001329",
        "DESCRIPTION": "Professional Services",
        "Issue Date": "5/4/2023",
        "UNIT PRICE": "100.00",
        "AMOUNT": "1100.00",
        "Bill For": "James",
        "From": "Excel Company",
        "Terms": "Due on receipt"
    }}"""

    prompt_template = PromptTemplate(input_variables=["pages"], template=template)

    # Generate response from LLM
    response = llm.invoke(prompt_template.format(pages=pages_data))

    # Handle response content safely
    return response.content if hasattr(response, 'content') else str(response)

In [76]:
data = extracted_data(pdf_text, llm)
print(data)

Here are the extracted values in the expected output format:

```
{
    "Invoice ID": "2389",
    "DESCRIPTION": "Phone and data bill",
    "Issue Date": "11/27/2026",
    "UNIT PRICE": "500.00",
    "AMOUNT": "500.00",
    "Bill For": "Paul Regex",
    "From": "DR-TeleP",
    "Terms": "Due upon receipt"
}
```


In [69]:
# Function to clean currency values
def clean_currency(value):
    """Removes currency symbols and commas, converts to float."""
    if isinstance(value, str):
        return float(value.replace('$', '').replace(',', '').strip())
    return value

In [68]:
# Main function to process PDF invoices
def create_docs(user_pdf_list, llm):
    """Extracts structured invoice data from PDFs and returns a DataFrame."""
    
    # Initialize an empty DataFrame
    df = pd.DataFrame(columns=[
        'Invoice ID', 'DESCRIPTION', 'Issue Date', 'UNIT PRICE', 
        'AMOUNT', 'Bill For', 'From', 'Terms'
    ])

    for file_path in user_pdf_list:
        try:
            # Open the PDF file
            with open(file_path, "rb") as file:
                raw_data = get_pdf_text(file)  # Extract text
            
            # Extract structured data using LLM
            llm_extracted_data = extracted_data(raw_data, llm)

            # Find JSON-like structure in the response
            match = re.search(r'\{.*\}', llm_extracted_data, re.DOTALL)
            if not match:
                print(f"⚠️ No structured data found in LLM response for {file_path}")
                continue
            
            extracted_text = match.group(0).strip()  # Extract full JSON-like text

            try:
                # Convert extracted JSON string into a dictionary
                data_dict = json.loads(extracted_text)

                # Clean currency fields
                for money_col in ['UNIT PRICE', 'AMOUNT']:
                    if money_col in data_dict:
                        data_dict[money_col] = clean_currency(data_dict[money_col])
                
                # Handle key mismatches (e.g., "Date" instead of "Issue Date")
                if 'Date' in data_dict and 'Issue Date' not in data_dict:
                    data_dict['Issue Date'] = data_dict.pop('Date')
                
                # Append extracted data to the DataFrame
                df.loc[len(df)] = data_dict  # Faster than using pd.concat()

            except json.JSONDecodeError as e:
                print(f"❌ JSON parsing error in {file_path}: {e}")
                continue
                
        except Exception as e:
            print(f"❌ Error processing file {file_path}: {e}")
            continue

    return df


In [77]:
llm  =  ChatGroq(api_key=GROQ_API,
    model_name=model_name,
    temperature=0.0,
    
)

files = os.listdir('data')
files_list = [ f"data/{f}"  for f in files]
df = create_docs(files_list, llm)



In [78]:
df

Unnamed: 0,Invoice ID,DESCRIPTION,Issue Date,UNIT PRICE,AMOUNT,Bill For,From,Terms
0,2389,Phone and data bill,11/27/2026,500.0,500.0,Paul Regex,DR-TeleP,Due upon receipt
1,1000,Condo Rental,11/27/2026,2500.0,2500.0,Paul Regex,DR-TeleP,Due upon receipt
2,1000,Water and Sewage,11/27/2026,134.0,134.0,Paul Regex,DR-TeleP,Due upon receipt


# AI-Tech-Newslette

In [3]:
import os
import faiss
import json
import numpy as np
import streamlit as st
from dotenv import find_dotenv, load_dotenv
from langchain import LLMChain, PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_groq import ChatGroq
from langchain_community.tools import TavilySearchResults

In [4]:
# Load environment variables
load_dotenv(find_dotenv())
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")
GROQ_API = os.getenv("GROQ_API_KEY")

# Initialize LLM once to avoid re-instantiating in functions
model_name = 'llama-3.3-70b-versatile'
llm = ChatGroq(api_key=GROQ_API, model_name=model_name, temperature=0.0)

# Initialize OpenAI Embeddings
embeddings = OpenAIEmbeddings()

# FAISS Index
dimension = 1536  # OpenAI's embedding dimension
index = faiss.IndexFlatL2(dimension)
documents = []  # Store document texts separately

In [5]:
def search_tavily(query: str, max_results: int = 5) -> dict:
    """
    Searches for relevant articles using TavilySearchResults.
    """
    tool = TavilySearchResults(max_results=max_results)
    response_json = tool.invoke({"query": query})
    print(f"Response JSON from SERP: {response_json}")
    return response_json


In [6]:
query = "What happen last week in syria"
search_results = search_tavily(query=query)
search_results

Response JSON from SERP: [{'title': 'Entire families killed in Syria sectarian violence, UN says | CNN', 'url': 'https://www.cnn.com/2025/03/12/middleeast/syria-sectarian-violence-un-ohcr-intl-hnk/index.html', 'content': 'Armed groups killed entire families, including women and children, during an outbreak of sectarian violence in Syria last week,', 'score': 0.6698534}, {'title': "Syria | Today's latest from Al Jazeera", 'url': 'https://www.aljazeera.com/where/syria/', 'content': 'Thousands of Syrians flee to Lebanon after clashes between government forces and pro-Assad fighters led to mass killings.', 'score': 0.44261298}, {'title': 'Entire families killed during recent violence in Syria, UN says - BBC', 'url': 'https://www.bbc.com/news/articles/cedlx65988qo', 'content': 'The violence escalated on Thursday, after 13 security personnel were killed in an ambush by gunmen in the coastal town of Jableh. Security', 'score': 0.41094592}, {'title': 'Syria news - breaking stories, video, anal

[{'title': 'Entire families killed in Syria sectarian violence, UN says | CNN',
  'url': 'https://www.cnn.com/2025/03/12/middleeast/syria-sectarian-violence-un-ohcr-intl-hnk/index.html',
  'content': 'Armed groups killed entire families, including women and children, during an outbreak of sectarian violence in Syria last week,',
  'score': 0.6698534},
 {'title': "Syria | Today's latest from Al Jazeera",
  'url': 'https://www.aljazeera.com/where/syria/',
  'content': 'Thousands of Syrians flee to Lebanon after clashes between government forces and pro-Assad fighters led to mass killings.',
  'score': 0.44261298},
 {'title': 'Entire families killed during recent violence in Syria, UN says - BBC',
  'url': 'https://www.bbc.com/news/articles/cedlx65988qo',
  'content': 'The violence escalated on Thursday, after 13 security personnel were killed in an ambush by gunmen in the coastal town of Jableh. Security',
  'score': 0.41094592},
 {'title': 'Syria news - breaking stories, video, analysis

In [7]:
def pick_best_articles_urls(response_json: dict, query: str) -> list:
    """
    Uses an LLM to select the best articles from search results and return a list of URLs.
    """
    response_str = json.dumps(response_json)

    prompt_template = PromptTemplate(
        input_variables=["response_str", "query"],
        template="""
          You are a world-class journalist, researcher, and tech expert.
          You excel at selecting the most relevant and high-quality articles.

          SEARCH RESULTS: {response_str}

          QUERY: {query}

          Select the best 3 articles and return ONLY an array of their URLs.
          If a URL is invalid, replace it with 'www.google.com'.

          Return ONLY a JSON array with the URLs.
        """
    )

    article_chooser_chain = LLMChain(llm=llm, prompt=prompt_template, verbose=False)

    try:
        urls_str = article_chooser_chain.run(response_str=response_str, query=query)
        url_list = json.loads(urls_str)
    except json.JSONDecodeError:
        print("Warning: LLM did not return valid JSON. Returning an empty list.")
        url_list = []

    return ["https://www.google.com" if not url.startswith("http") else url for url in url_list]

In [8]:
urls = pick_best_articles_urls(response_json=search_results, query=query)
urls

  article_chooser_chain = LLMChain(llm=llm, prompt=prompt_template, verbose=False)
  urls_str = article_chooser_chain.run(response_str=response_str, query=query)


['https://www.cnn.com/2025/03/12/middleeast/syria-sectarian-violence-un-ohcr-intl-hnk/index.html',
 'https://www.aljazeera.com/where/syria/',
 'https://www.bbc.com/news/articles/cedlx65988qo']

In [9]:
def extract_content_from_urls(urls: list):
    """
    Loads and processes content from URLs, then stores it in a FAISS vector database.
    """
    global index, documents

    loader = UnstructuredURLLoader(urls=urls)
    data = loader.load()

    text_splitter = CharacterTextSplitter(
        separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len
    )
    docs = text_splitter.split_documents(data)

    # Convert text into embeddings and store in FAISS
    text_embeddings = []
    for doc in docs:
        embedding = embeddings.embed_query(doc.page_content)
        text_embeddings.append(embedding)
        documents.append(doc.page_content)  # Store actual text

    text_embeddings = np.array(text_embeddings, dtype='float32')
    
    if len(text_embeddings) > 0:
        index.add(text_embeddings)  # Add to FAISS index

    return index  # Return FAISS index

In [10]:
extract_content_from_urls(urls)



KeyboardInterrupt



In [84]:
def summarizer(query: str, k: int = 4) -> str:
    """
    Retrieves relevant document chunks from FAISS and summarizes them using LLM.
    """
    global index, documents

    query_embedding = np.array([embeddings.embed_query(query)], dtype='float32')

    # Perform similarity search in FAISS
    _, nearest_indices = index.search(query_embedding, k)
    
    retrieved_docs = [documents[i] for i in nearest_indices[0] if i < len(documents)]
    docs_page_content = " ".join(retrieved_docs) if retrieved_docs else "No relevant content found."

    prompt_template = PromptTemplate(
        input_variables=["docs", "query"],
        template="""
           {docs}
           You are a top journalist and researcher. Write a concise and engaging newsletter summary about {query}.
           Ensure that:
             1) The content is informative and engaging.
             2) The length is appropriate for a newsletter.
             3) Insights, practical advice, and links (if necessary) are included.
        """
    )

    summarizer_chain = LLMChain(llm=llm, prompt=prompt_template, verbose=False)
    response = summarizer_chain.run(docs=docs_page_content, query=query)

    return response.replace("\n", " ")


In [None]:
summaries = summarizer(query)
display(Markdown(summaries))

In [None]:
search_results = search_tavily(query=query)
urls = pick_best_articles_urls(response_json=search_results, query=query)
extract_content_from_urls(urls)
summaries = summarizer(query)
newsletter_thread = generate_newsletter(summaries, query)