In [1]:
import pandas as pd
from openai import OpenAI
from langchain.prompts import PromptTemplate
import os

In [2]:
os.environ['OPENAI_API_KEY'] = os.environ.get('OPENAI_API_KEY')

In [23]:
prompt_template = PromptTemplate.from_template(
    '''
    Your task is to extract keywords from {sentence}.
    You are an assistant for labeling negative annotaded keywords in user reviews.
    Return the words that are assumed to be negative words in form of a list.
    Note: If now negative words detected reutrn empty list.
    Note: Words have to stand alone as negative to be included. No fill words.
    Note: Only words that are explicit in the sentence should be included.
    Provide output without further text information. Use the following schema ['keyword 1', 'keyword 2', ...]
    '''
)


In [24]:
client = OpenAI()

In [25]:
def extract_keywords(sentence: str) -> str:
    try:
        # Format the prompt dynamically with the input sentence
        prompt = prompt_template.format(sentence=sentence)
        
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {
                    "role": "user",
                    "content": prompt,
                }
            ],
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"Error: {e}"

In [26]:
df = pd.read_csv('../data/raw/reviews_en_goerlitzerpark.csv')

In [27]:
snippet = df.copy()
snippet = snippet[0:10]

In [28]:
############# kill stop words in before

In [29]:
snippet['keywords'] = snippet['review_text_english'].apply(lambda x: extract_keywords(x))

In [32]:
snippet

Unnamed: 0.1,Unnamed: 0,google_id,review_text,review_img_urls,review_rating,review_timestamp,review_likes,review_text_english,keywords
0,0,0x47a84f4c8819b367:0xedafc45032d4521b,I happened to visit the park around midday on ...,['https://lh5.googleusercontent.com/p/AF1QipNH...,1,1730831520,5,I happened to visit the park around midday on ...,"['tipsy', 'drugs', 'homeless', 'rubbish']"
1,1,0x47a84f4c8819b367:0xedafc45032d4521b,"The park is nice, it surprised me that there w...",['https://lh5.googleusercontent.com/p/AF1QipNs...,5,1723813004,0,"The park is nice, it surprised me that there w...",['alone']
2,2,0x47a84f4c8819b367:0xedafc45032d4521b,Legenday park in Berlin! Somehow between Kreuz...,['https://lh5.googleusercontent.com/p/AF1QipNA...,5,1724322017,0,Legenday park in Berlin! Somehow between Kreuz...,['not many']
3,3,0x47a84f4c8819b367:0xedafc45032d4521b,I have to be honest and didn't feel particular...,,4,1733680520,0,I have to be honest and didn't feel particular...,"['unsafe', 'dealers', 'nicked', 'business', 'l..."
4,4,0x47a84f4c8819b367:0xedafc45032d4521b,Watched a local men’s soccer game! Looks a lit...,['https://lh5.googleusercontent.com/p/AF1QipPM...,5,1732366664,0,Watched a local men’s soccer game! Looks a lit...,['sketch']
5,5,0x47a84f4c8819b367:0xedafc45032d4521b,"Its a beautiful park , just be careful from bi...",['https://lh5.googleusercontent.com/p/AF1QipPB...,4,1719173731,0,"Its a beautiful park, just be careful from bic...","['careful', 'bicycles']"
6,6,0x47a84f4c8819b367:0xedafc45032d4521b,I was here during May Day and this spot was fi...,['https://lh5.googleusercontent.com/p/AF1QipO4...,5,1683439337,4,I was here during May Day and this spot was fi...,[]
7,7,0x47a84f4c8819b367:0xedafc45032d4521b,We always come here for the children’s farm bu...,,4,1721196847,0,We always come here for the children’s farm bu...,```[]```
8,8,0x47a84f4c8819b367:0xedafc45032d4521b,This is my neighborhood park. I've never once ...,,5,1729505981,0,This is my neighborhood park. I've never once ...,"['drugs', 'crimes']"
9,9,0x47a84f4c8819b367:0xedafc45032d4521b,I spent 10 days living right in front of the p...,,2,1726002261,2,I spent 10 days living right in front of the p...,"['worst', 'drugs', 'pushy', 'avoid']"
