# File for generating GPT insights 
Note:
- Run this file before WoofyaCustomDev to generate correct input file for WoofyaCustomDev.
- This file features the OpenAI package and requires an OpenAI API Key.
- This file filters the dataset to only those with 500+ characters of review for robust analysis.
- Gain new insights using the format below labelled 'GPT Insights'

In [1]:
import openai
import pandas as pd
from openai import OpenAI

In [2]:
# Load the CSV file
file_path = 'woofya_db_2.csv'
data = pd.read_csv(file_path)

In [3]:
# In order have sufficient data for analysis, we have set a lower bound of 500 characters worth of review.
woofya_db_r500 = data[data['review_text'].apply(len) > 499]
woofya_db_r500.to_csv('woofya_db_r500.csv', index=False)

## GPT Insights

In [4]:
client = OpenAI(api_key='replace_with_your_api_key')
# Load the CSV file
file_path = 'woofya_db_r500.csv'
feed = pd.read_csv(file_path)

# Function to generate a description for each row
def generate_description(row):
    name = row['name']
    vicinity = row['vicinity']
    review = row['review_text']
    rating = row['rating']
    type = row['type']
    editorial = row['editorial_summary.overview']
    prompt = f"List 5 description tags of this location based on the '{name}' and these reviews{review} and these editorials{editorial}. It MUST be in the format ['1', '2', '3', '4', '5'] with no other text included."

    # Call OpenAI API to generate description using the new method
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that creates location descriptions."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=150
    )
    
    # Extract and return the response content
    return(completion.choices[0].message.content)

# Apply the function to each row and create a new column 'description'
woofya_db_r500['description'] = woofya_db_r500.apply(generate_description, axis=1)

# Save the DataFrame with descriptions to a new CSV file
woofya_db_r500.to_csv('recommender_input.csv', index=False)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  woofya_db_r500['description'] = woofya_db_r500.apply(generate_description, axis=1)


In [5]:
# Function to generate a suburb for each row
def generate_suburb(row):
    name = row['name']
    address = row['formatted_address']
    prompt = f"State the name of the suburb for each entry using the name{name} and formatted address{address}. State ONLY the suburb name itself."

    # Call OpenAI API to generate description using the new method
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that states the suburb of a location."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=150
    )
    
    # Extract and return the response content
    return(completion.choices[0].message.content)

# Apply the function to each row and create a new column 'description'
woofya_db_r500['suburb'] = woofya_db_r500.apply(generate_suburb, axis=1)

# Save the DataFrame with descriptions to a new CSV file
woofya_db_r500.to_csv('recommender_input.csv', index=False)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  woofya_db_r500['suburb'] = woofya_db_r500.apply(generate_suburb, axis=1)


In [6]:
# Function to generate an off/on leash category for each row
def generate_ooleash(row):
    name = row['name']
    review = row['review_text']
    descrip = row['description']
    type = row['type']
    editorial = row['editorial_summary.overview']
    prompt = f"Return '1' if you can confirm that the location {name} allows dogs to be off-leash based on the name {name}, description {descrip}, these reviews {review}, these editorials {editorial} and location type {type}. Answer only using '1' for yes or '0' for no."

    # Call OpenAI API to generate description using the new method
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that confirms location features."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=150
    )
    
    # Extract and return the response content
    return(completion.choices[0].message.content)

# Apply the function to each row and create a new column 'description'
woofya_db_r500['off_leash'] = woofya_db_r500.apply(generate_ooleash, axis=1)

# Save the DataFrame with descriptions to a new CSV file
woofya_db_r500.to_csv('recommender_input.csv', index=False)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  woofya_db_r500['off_leash'] = woofya_db_r500.apply(generate_ooleash, axis=1)


In [7]:
# Function to generate an assessment of the availability of water for dogs
def generate_water(row):
    name = row['name']
    review = row['review_text']
    descrip = row['description']
    type = row['type']
    editorial = row['editorial_summary.overview']
    prompt = f"Return '1' if you can confirm that the location {name} has clean drinking water or bowls for dogs to drink based on the name {name}, description {descrip}, these reviews {review}, these editorials {editorial} and location type {type}. Answer only using '1' for yes or '0' for no."

    # Call OpenAI API to generate description using the new method
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that confirms location features."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=150
    )
    
    # Extract and return the response content
    return(completion.choices[0].message.content)

# Apply the function to each row and create a new column 'description'
woofya_db_r500['dog_water'] = woofya_db_r500.apply(generate_water, axis=1)

# Save the DataFrame with descriptions to a new CSV file
woofya_db_r500.to_csv('recommender_input.csv', index=False)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  woofya_db_r500['dog_water'] = woofya_db_r500.apply(generate_water, axis=1)


In [8]:
# Function to generate an assessment of the availability of water for dogs
def generate_fence(row):
    name = row['name']
    review = row['review_text']
    descrip = row['description']
    type = row['type']
    editorial = row['editorial_summary.overview']
    prompt = f"Return '1' if you can confirm that the location {name} has a fenced off area for dogs based on the name {name}, description {descrip},  these reviews {review}, these editorials {editorial} and location type {type}. Answer only using '1' for yes or '0' for no."

    # Call OpenAI API to generate description using the new method
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that confirms location features."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=150
    )
    
    # Extract and return the response content
    return(completion.choices[0].message.content)

# Apply the function to each row and create a new column 'description'
woofya_db_r500['fenced_off'] = woofya_db_r500.apply(generate_fence, axis=1)

# Save the DataFrame with descriptions to a new CSV file
woofya_db_r500.to_csv('recommender_input.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  woofya_db_r500['fenced_off'] = woofya_db_r500.apply(generate_fence, axis=1)


In [9]:
# Function to generate an assessment of the availability of available seating. Checks for seating, NOT seating for dogs/owners.
def generate_seating(row):
    name = row['name']
    review = row['review_text']
    type = row['type']
    descrip = row['description']
    editorial = row['editorial_summary.overview']
    prompt = f"Return '1' if you can confirm that the location {name} has seating based on the name {name}, description {descrip},  these reviews {review}, these editorials {editorial} and location type {type}. Answer only using '1' for yes or '0' for no."

    # Call OpenAI API to generate description using the new method
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that confirms location features."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=150
    )
    
    # Extract and return the response content
    return(completion.choices[0].message.content)

# Apply the function to each row and create a new column 'description'
woofya_db_r500['seating'] = woofya_db_r500.apply(generate_seating, axis=1)

# Save the DataFrame with descriptions to a new CSV file
woofya_db_r500.to_csv('recommender_input.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  woofya_db_r500['seating'] = woofya_db_r500.apply(generate_seating, axis=1)


In [10]:
# Function to generate an assessment of the availability of car parking
def generate_parking(row):
    name = row['name']
    review = row['review_text']
    type = row['type']
    descrip = row['description']
    editorial = row['editorial_summary.overview']
    prompt = f"Return '1' if you can confirm that the location {name} has car parking available based on the name {name}, description {descrip},  these reviews {review}, these editorials {editorial} and location type {type}. Answer only using '1' for yes or '0' for no."

    # Call OpenAI API to generate description using the new method
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that confirms location features."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=150
    )
    
    # Extract and return the response content
    return(completion.choices[0].message.content)

# Apply the function to each row and create a new column 'description'
woofya_db_r500['parking'] = woofya_db_r500.apply(generate_parking, axis=1)

# Save the DataFrame with descriptions to a new CSV file
woofya_db_r500.to_csv('recommender_input.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  woofya_db_r500['parking'] = woofya_db_r500.apply(generate_parking, axis=1)


In [11]:
# Function to generate an assessment of the availability of car parking
def generate_food(row):
    name = row['name']
    review = row['review_text']
    type = row['type']
    descrip = row['description']
    editorial = row['editorial_summary.overview']
    prompt = f"Return '1' if you can confirm that the location {name} has serves food on the name {name}, description {descrip},  these reviews {review}, these editorials {editorial} and location type {type}. Answer only using '1' for yes or '0' for no."

    # Call OpenAI API to generate description using the new method
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that confirms location features."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=150
    )
    
    # Extract and return the response content
    return(completion.choices[0].message.content)

# Apply the function to each row and create a new column 'description'
woofya_db_r500['food'] = woofya_db_r500.apply(generate_food, axis=1)

# Save the DataFrame with descriptions to a new CSV file
woofya_db_r500.to_csv('recommender_input.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  woofya_db_r500['food'] = woofya_db_r500.apply(generate_food, axis=1)


In [12]:
# Function to generate an assessment of the availability of car parking
def generate_night_light(row):
    name = row['name']
    review = row['review_text']
    type = row['type']
    descrip = row['description']
    editorial = row['editorial_summary.overview']
    prompt = f"Return '1' if you can confirm that the location {name} has lighting at night time based on the name {name}, description {descrip},  these reviews {review}, these editorials {editorial} and location type {type}. Answer only using '1' for yes or '0' for no."

    # Call OpenAI API to generate description using the new method
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that confirms location features."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=150
    )
    
    # Extract and return the response content
    return(completion.choices[0].message.content)

# Apply the function to each row and create a new column 'description'
woofya_db_r500['night_light'] = woofya_db_r500.apply(generate_night_light, axis=1)

# Save the DataFrame with descriptions to a new CSV file
woofya_db_r500.to_csv('recommender_input.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  woofya_db_r500['night_light'] = woofya_db_r500.apply(generate_night_light, axis=1)


In [13]:
# Function to generate an assessment of the availability of car parking
def generate_shade(row):
    name = row['name']
    review = row['review_text']
    type = row['type']
    descrip = row['description']
    editorial = row['editorial_summary.overview']
    prompt = f"Return '1' if you can confirm that the location {name} has shade based on the name {name}, description {descrip},  these reviews {review}, these editorials {editorial} and location type {type}. Answer only using '1' for yes or '0' for no."

    # Call OpenAI API to generate description using the new method
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that confirms location features."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=150
    )
    
    # Extract and return the response content
    return(completion.choices[0].message.content)

# Apply the function to each row and create a new column 'description'
woofya_db_r500['shade'] = woofya_db_r500.apply(generate_shade, axis=1)

# Save the DataFrame with descriptions to a new CSV file
woofya_db_r500.to_csv('recommender_input.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  woofya_db_r500['shade'] = woofya_db_r500.apply(generate_shade, axis=1)


In [14]:
# Function to generate an assessment of the availability of car parking
def generate_open_field(row):
    name = row['name']
    review = row['review_text']
    type = row['type']
    descrip = row['description']
    editorial = row['editorial_summary.overview']
    prompt = f"Return '1' if you can confirm that the location {name} features a large, open field based on the name {name}, description {descrip},  these reviews {review}, these editorials {editorial} and location type {type}. Answer only using '1' for yes or '0' for no."

    # Call OpenAI API to generate description using the new method
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that confirms location features."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=150
    )
    
    # Extract and return the response content
    return(completion.choices[0].message.content)

# Apply the function to each row and create a new column 'description'
woofya_db_r500['open_field'] = woofya_db_r500.apply(generate_open_field, axis=1)

# Save the DataFrame with descriptions to a new CSV file
woofya_db_r500.to_csv('recommender_input.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  woofya_db_r500['open_field'] = woofya_db_r500.apply(generate_open_field, axis=1)


In [15]:
# Function to generate an assessment of the availability of car parking
def generate_wildlife(row):
    name = row['name']
    review = row['review_text']
    type = row['type']
    descrip = row['description']
    editorial = row['editorial_summary.overview']
    prompt = f"Return '1' if you can confirm that the location {name} has local wildlife based on the name {name}, description {descrip},  these reviews {review}, these editorials {editorial} and location type {type}. Answer only using '1' for yes or '0' for no."

    # Call OpenAI API to generate description using the new method
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that confirms location features."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=150
    )
    
    # Extract and return the response content
    return(completion.choices[0].message.content)

# Apply the function to each row and create a new column 'description'
woofya_db_r500['wildlife'] = woofya_db_r500.apply(generate_wildlife, axis=1)

# Save the DataFrame with descriptions to a new CSV file
woofya_db_r500.to_csv('recommender_input.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  woofya_db_r500['wildlife'] = woofya_db_r500.apply(generate_wildlife, axis=1)


In [16]:
# Function to generate an assessment of the availability of car parking
def generate_alcohol(row):
    name = row['name']
    review = row['review_text']
    type = row['type']
    descrip = row['description']
    editorial = row['editorial_summary.overview']
    prompt = f"Return '1' if you can confirm that the location {name} serves alcohol based on the name {name}, description {descrip},  these reviews {review}, these editorials {editorial} and location type {type}. Answer only using '1' for yes or '0' for no."

    # Call OpenAI API to generate description using the new method
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that confirms location features."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=150
    )
    
    # Extract and return the response content
    return(completion.choices[0].message.content)

# Apply the function to each row and create a new column 'description'
woofya_db_r500['alcohol'] = woofya_db_r500.apply(generate_alcohol, axis=1)

# Save the DataFrame with descriptions to a new CSV file
woofya_db_r500.to_csv('recommender_input.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  woofya_db_r500['alcohol'] = woofya_db_r500.apply(generate_alcohol, axis=1)
