UsageError: %%capture is a cell magic, but the cell body is empty.


In [1]:


import os
from getpass import getpass


def _getpass(env_var: str):
    if not os.environ.get(env_var):
        os.environ[env_var] = getpass(f"{env_var}=")


_getpass("OPENAI_API_KEY")



In [29]:
# Load businesses from csv file with pandas.

import pandas as pd
import json

df = pd.read_csv('dummy_data.csv')

# Get the reviews of the business in first row.
# Parse the reviews json string into a list of reviews.
reviews_string = df.iloc[0]['user_reviews']

parsed = json.loads(reviews_string)

parsed[0]
df.iloc[0]["link"]

'https://www.google.com/maps/place/Blubber+Bar/data=!4m7!3m6!1s0x47a62feb7701f637:0xf37327515dab0963!8m2!3d51.8741938!4d12.6628238!16s%2Fg%2F11fnx7cv6v!19sChIJN_YBd-svpkcRYwmrXVEnc_M?authuser=0&hl=en&rclk=1'

### Create a simple version that devises a plan just based on reviews.


# Notes on prompt engineering
- Keep an eye on if the domain points to a facebook site and the establishment thus might not have its own webpage.
-

In [2]:
from typing import List
from typing_extensions import TypedDict

class Review(TypedDict):
    """
    Represents a google business review

    Attributes:
        author_name : Name of the author
        rating : Rating of the review
        text : Text of the review
        time : Date of the review
    """
    author_name: str
    rating: int
    text: str
    time: str


class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
´        messages : With user question, error messages, reasoning
        iterations : Number of tries
    """

    reviews: List[Review]
    messages: List
    iterations: int

In [11]:
import os
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from typing import List



class Review(BaseModel):
    reviewer: str = Field(description="Name of the reviewer")
    rating: float = Field(description="Rating given by the reviewer")
    comment: str = Field(description="Comment provided by the reviewer")

class Insight(BaseModel):
    insight: str = Field(description="Insight derived from the review")
    anomaly: bool = Field(description="Indicates if the insight is an anomaly")

class Insights(BaseModel):
    insights: List[Insight] = Field(description="List of insights derived from the reviews")

# Create an output parser for the Insight schema
output_parser = PydanticOutputParser(pydantic_object=Insights)

# Define the prompt template
prompt_template = PromptTemplate(
    template="""
    Given the following list of Google reviews for a business:

    {reviews}

    Generate a list of insights that will be used to determine what type of software services the business might require. Note any anomalies.

    {format_instructions}
    """,
    input_variables=["reviews"],
    partial_variables={"format_instructions": output_parser.get_format_instructions()},
)

# Initialize the OpenAI LLM
llm = OpenAI(model="gpt-4o-mini")

# Create an LLMChain with the output parser
chain = prompt_template | llm | output_parser
#chain = LLMChain(llm=llm, prompt=prompt_template, output_parser=output_parser)


In [None]:
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

def initial_scan(state: GraphState):
    """
    In the initial scan we get all the info by running the review analysis, webpage analysis, business analysis.
    """

    # Filter reviews. If there are more than 100, only process the 100 newest.
    chronological_sort = state['reviews'].sort(key=lambda x: x['time'], reverse=True)
    
    filtered_reviews = chronological_sort[:100]

    # Example list of reviews
    reviews = [
        Review(reviewer="John Doe", rating=4.5, comment="Great service and friendly staff."),
        Review(reviewer="Jane Smith", rating=3.0, comment="Average experience, could be better."),
        Review(reviewer="Alice Johnson", rating=5.0, comment="Excellent! Highly recommend."),
    ]

    # Convert reviews to string format for the prompt
    reviews_str = "\n".join([f"Reviewer: {review.reviewer}, Rating: {review.rating}, Comment: {review.comment}" for review in reviews])

    # Use the chain to generate insights
    response = chain.run(reviews=reviews_str)

    # Parse the response into structured data
    insights = output_parser.parse(response)

    print(insights)


graph_state = {
    "reviews": [
        {
            "author_name": "John Doe",
            "rating": 4.5,
            "text": "Great service and friendly staff.",
            "time": "2022-01-01"
        },
        {
            "author_name": "Jane Smith",
            "rating": 3.0,
            "text": "Average experience, could be better.",
            "time": "2022-01-02"
        },
        {
            "author_name": "Alice Johnson",
            "rating": 5.0,
            "text": "Excellent! Highly recommend.",
            "time": "2022-01-03"
        }
    ],
    "messages": [],
    "iterations": 0
}
initial_scan(graph_state)


TypeError: 'NoneType' object is not subscriptable

In [None]:
from langgraph.graph import END, StateGraph, START

workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("generate", generate)  # generation solution
workflow.add_node("check_code", code_check)  # check code
workflow.add_node("reflect", reflect)  # reflect

# Build graph
workflow.add_edge(START, "generate")
workflow.add_edge("generate", "check_code")
workflow.add_conditional_edges(
    "check_code",
    decide_to_finish,
    {
        "end": END,
        "reflect": "reflect",
        "generate": "generate",
    },
)
workflow.add_edge("reflect", "generate")
app = workflow.compile()