This is a starter notebook for the project, you'll have to import the libraries you'll need, you can find a list of the ones available in this workspace in the requirements.txt file in this workspace. 

In [None]:
import openai
import os

from langchain.prompts import FewShotPromptTemplate, PromptTemplate

from langchain.llms import OpenAI


In [None]:
from langchain.prompts import PromptTemplate

listing_template = PromptTemplate(template=
"""Neighborhood: {neighborhood}
Price: ${price}
Bedrooms: {bathrooms}
Bathrooms: {bedrooms}
House Size: {house_size} sqft

Description: {description}

Neighborhood Description: {neighborhood_description}
""",
                                  input_variables=["neighborhood", "price", "bedrooms", "bathrooms",
                                                   "house_size", "description", "neighborhood_description"]
                                  )



In [None]:
example_inputs = [
    {
        "neighborhood": "Downtown Abbey",
        "price": 300000,
        "bedrooms": 3,
        "bathrooms": 2,
        "house_size": 2000,
        "description": "A beautiful appartment with a large terrace overlooking the city's skyscrapers with 3 well-lit bedrooms and 2 bathrooms means you have enough room even for the occasional guest coming over to celebrate new year's with you :)",
        "neighborhood_description": "A vibrant neighborhood which is very centrally located and specially well connected public transport connections means you are close to everything you need for your everyday life from groceries to cafes and restaurants and shopping."
    },
    {
        "neighborhood": "City Heights",
        "price": 500000,
        "bedrooms": 4,
        "bathrooms": 3,
        "house_size": 3000,
        "description": "A cozy house with a wonderful garden overlooking the city from its hillside location",
        "neighborhood_description": "Located in the city outskirts, this neighborhood is known for its great views and quiet streets with a very family-friendly atmosphere. In no time you are in the midst of a beautiful forest where you can calm all your senses and breath in the cool fresh air. Sightings of deer and other wildlife are common."
    },
    {
        "neighborhood": "Green Oaks",
        "price": 800000,
        "bedrooms": 3,
        "bathrooms": 2,
        "house_size": 2000,
        "description": "A luxurious villa with a large garden and a swimming pool with a winter garden, 2 parking spots, and a garage as well as a large terrace and wonderful finishing.",
        "neighborhood_description": "Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze."
    }
]

In [None]:
from langchain.prompts import FewShotPromptTemplate

few_shot_template = FewShotPromptTemplate(
    example_prompt=listing_template,
    input_variables=["input"],
    examples=example_inputs,
    suffix="Use the examples above to generate the following: {input}",
)

listing_seperator = "===="
instruction = "Generate {} listings of houses. Be creative in regards neighborhood_description, and description and keep it confined to three sentences each. Keep the prices under $1500000, the number of bedrooms under 6 and the number of bathrooms under 3 and the total area under 3500sqft. VERY IMPORTANT: Output the results in the same format as the examples keeping even the order of properties the same as in the examples. Add the characters '%s' before each new listing" % listing_seperator
num_listings = 10
prompt_to_use = few_shot_template.format(input=instruction.format(num_listings))



In [None]:
print(prompt_to_use)

In [None]:
import os

os.environ["OPENAI_API_KEY"] =
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

In [None]:
from langchain.chat_models import ChatOpenAI

model_name = "gpt-3.5-turbo"
llm = OpenAI(model_name=model_name, temperature=0.0, max_tokens=2000)

In [None]:
result = llm(prompt=prompt_to_use)

In [None]:
print(f"LLM result:\n{result}")

In [None]:
import pandas as pd
import locale
import re

# Set the locale to 'en_US.UTF-8' for parsing numbers with commas
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')

# Split the result into individual listings
listings = result.split(listing_seperator)[1:]
# print(f"Here are the listings in a list:\n{listings}")

# Define the columns based on input_variables
columns = ["neighborhood", "price", "bedrooms", "bathrooms", "house_size", "description", "neighborhood_description"]

# Parse the listings into a list of dictionaries
data = []
for listing in listings:
    print(f"Parsing listing: {listing[1:150]} ...\n")
    try:
        entry = {
            "neighborhood": re.search(r"^Neighborhood: (.+)$", listing, re.MULTILINE).group(1),
            "price": locale.atoi(re.search(r"^Price: \$(.+)$", listing, re.MULTILINE).group(1)),
            "bedrooms": int(re.search(r"^Bedrooms: (.+)$", listing, re.MULTILINE).group(1)),
            "bathrooms": int(re.search(r"^Bathrooms: (.+)$", listing, re.MULTILINE).group(1)),
            "house_size": locale.atoi(re.search(r"^House Size: (.+) sqft$", listing, re.MULTILINE).group(1)),
            "description": re.search(r"^Description: (.+)$", listing, re.MULTILINE).group(1),
            "neighborhood_description": re.search(r"^Neighborhood Description: (.+)$", listing, re.MULTILINE).group(1)
        }

        data.append(entry)
    except Exception as e:
        print("Error parsing listing")
        print(listing)
        print(f"Because of \n{e}")
        print("\n")
        continue

# Create a DataFrame
df = pd.DataFrame(data, columns=columns)

Let's view the dataframe and assert the generated input makes sense and finally save into a csv.

In [None]:
df.head(n=num_listings)

In [None]:
df.to_csv("listings.csv", index=False)
