# BreakoutAI Assessment: AI Agent Project
This notebook implements an AI agent capable of:
- Reading datasets from CSV or Google Sheets.
- Performing web searches for entities in the dataset.
- Extracting information using a language model.
- Displaying results in a Streamlit dashboard with options to download.

## Features
- File Upload and Google Sheets Integration.
- Dynamic Query Input.
- Web Search Automation.
- LLM Integration for Information Extraction.
- Results Display and CSV Export.

In [9]:
# Install required libraries
!pip install streamlit pandas google-api-python-client openai serpapi


Defaulting to user installation because normal site-packages is not writeable


In [10]:
pip install --upgrade google-search-results


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [1]:
!pip install python-dotenv


Defaulting to user installation because normal site-packages is not writeable


In [11]:
# Import necessary modules
import streamlit as st
import pandas as pd
import openai
from googleapiclient.discovery import build
from serpapi import GoogleSearch
import os

# Set API keys as environment variables or load from a `.env` file
openai.api_key = os.getenv("OPENAI_API_KEY")
SERP_API_KEY = os.getenv("SERP_API_KEY")


In [12]:
# Define functions for data processing, search, and LLM interaction

def process_csv_file(file):
    """Process the uploaded CSV file."""
    data = pd.read_csv(file)
    return data

def search_web(entity, query_template):
    """Perform web search using SerpAPI."""
    search_query = query_template.replace("{entity}", entity)
    params = {
        "q": search_query,
        "api_key": SERP_API_KEY,
    }
    search = GoogleSearch(params)
    results = search.get("organic_results", [])
    return results

def extract_information(search_results, prompt_template):
    """Extract information from web search results using LLM."""
    prompt = prompt_template + "\n" + "\n".join(
        [result['snippet'] for result in search_results if 'snippet' in result]
    )
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        max_tokens=100
    )
    return response["choices"][0]["text"].strip()


In [13]:
# Streamlit Dashboard Implementation

st.title("AI Data Extraction Agent")

# File upload
uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
if uploaded_file:
    data = process_csv_file(uploaded_file)
    st.write("## Uploaded Data Preview")
    st.dataframe(data.head())
    
    # Query input
    query_template = st.text_input("Enter query template (use {entity} for placeholders)", "Get the email address of {entity}")
    prompt_template = st.text_area("Enter LLM prompt template", "Extract the email address from the following results:")

    # Process data
    if st.button("Process Data"):
        results = []
        for entity in data.iloc[:, 0]:
            search_results = search_web(entity, query_template)
            extracted_info = extract_information(search_results, prompt_template)
            results.append({"Entity": entity, "Extracted Info": extracted_info})
        
        # Display results
        results_df = pd.DataFrame(results)
        st.write("## Extraction Results")
        st.dataframe(results_df)
        
        # Download results
        csv = results_df.to_csv(index=False).encode('utf-8')
        st.download_button("Download Results", csv, "results.csv", "text/csv")




In [1]:
# Install required libraries
!pip install streamlit pandas google-api-python-client openai serpapi python-dotenv

# Import necessary modules
import streamlit as st
import pandas as pd
import openai
from googleapiclient.discovery import build
from serpapi import GoogleSearch
import os
from dotenv import load_dotenv  # Import dotenv for loading .env file

# Load .env file
load_dotenv()

# Set API keys from the .env file
openai.api_key = os.getenv("OPENAI_API_KEY")
SERP_API_KEY = os.getenv("SERP_API_KEY")

# Define functions for data processing, search, and LLM interaction

def process_csv_file(file):
    """Process the uploaded CSV file."""
    data = pd.read_csv(file)
    return data

def search_web(entity, query_template):
    """Perform web search using SerpAPI."""
    search_query = query_template.replace("{entity}", entity)
    params = {
        "q": search_query,
        "api_key": SERP_API_KEY,
    }
    search = GoogleSearch(params)
    results = search.get("organic_results", [])
    return results

def extract_information(search_results, prompt_template):
    """Extract information from web search results using LLM."""
    prompt = prompt_template + "\n" + "\n".join(
        [result['snippet'] for result in search_results if 'snippet' in result]
    )
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        max_tokens=100
    )
    return response["choices"][0]["text"].strip()

# Streamlit Dashboard Implementation

st.title("AI Data Extraction Agent")

# File upload
uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
if uploaded_file:
    data = process_csv_file(uploaded_file)
    st.write("## Uploaded Data Preview")
    st.dataframe(data.head())
    
    # Query input
    query_template = st.text_input("Enter query template (use {entity} for placeholders)", "Get the email address of {entity}")
    prompt_template = st.text_area("Enter LLM prompt template", "Extract the email address from the following results:")

    # Process data
    if st.button("Process Data"):
        results = []
        for entity in data.iloc[:, 0]:
            search_results = search_web(entity, query_template)
            extracted_info = extract_information(search_results, prompt_template)
            results.append({"Entity": entity, "Extracted Info": extracted_info})
        
        # Display results
        results_df = pd.DataFrame(results)
        st.write("## Extraction Results")
        st.dataframe(results_df)
        
        # Download results
        csv = results_df.to_csv(index=False).encode('utf-8')
        st.download_button("Download Results", csv, "results.csv", "text/csv")


Defaulting to user installation because normal site-packages is not writeable


2024-11-18 23:27:38.705 
  command:

    streamlit run C:\Users\indra\AppData\Roaming\Python\Python312\site-packages\ipykernel_launcher.py [ARGUMENTS]
