In [1]:
import openai
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from dotenv import load_dotenv
import os

In [2]:
# Load environment variables from .env file
load_dotenv()

True

In [3]:
# Define categories
categories = [
    "Encampment Reporting", "Graffiti", "Pothole", "Animal Complaint", "Weeds & Vegetation", 
    "Neighborhood Issue", "Other", "Snow on Sidewalk", "Abandoned Vehicle", "Illegal Parking", 
    "Damaged/Fallen Tree", "Police: Non-emergency", "Fireworks", "Illegal Dumping", 
    "Shared Micromobility", "No Heat No Water No Electricity", "Missed Trash Pickup"
]

In [4]:
# Define keywords for the initial classification
keywords = {
    "Encampment Reporting": ["encampment", "tent", "homeless"],
    "Graffiti": ["graffiti"],
    "Pothole": ["pothole", "road damage"],
    "Animal Complaint": ["animal", "dog", "cat", "wildlife"],
    "Weeds & Vegetation": ["weeds", "vegetation", "overgrowth", "lawn"],
    "Neighborhood Issue": ["neighborhood", "community", "local issue"],
    "Snow on Sidewalk": ["snow", "sidewalk", "ice"],
    "Abandoned Vehicle": ["abandoned vehicle", "car left", "parked"],
    "Illegal Parking": ["illegal parking", "no parking", "blocked driveway"],
    "Damaged/Fallen Tree": ["fallen tree", "damaged tree", "branch"],
    "Police: Non-emergency": ["police", "non-emergency", "law enforcement"],
    "Fireworks": ["fireworks", "loud noise"],
    "Illegal Dumping": ["illegal dumping", "trash"],
    "Shared Micromobility": ["scooter", "shared bike", "shared micromobility"],
    "No Heat No Water No Electricity": ["no heat", "no water", "no electricity"],
    "Missed Trash Pickup": ["missed trash", "garbage not collected"]
}

In [5]:
# Load the data
file_path = '../Resources/scrubbed_data.csv'
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,name,affected_address,email_address,case_number,date,constituent_email_1,d4_response_1,d4_staff_member,constituent_email_2,d4_response_2
0,Ron,6864 East Bucknell Place,ron@email.com,0,2024-08-05,The lack of police presence and code enforceme...,"Good morning Ron, \n\nThank you for reaching o...",Staff1,,
1,Renee,Dahlia & High Line intersection,renee@email.com,0,2024-08-19,I would like to know if there is a possible to...,Thank you for reaching out to our office. The...,Staff2,,
2,JW,Happy Canyon & Jasmine St,jw@email.com,0,2024-08-19,Accident took place at Happy Canyon & Jasmine....,"Good morning TJ,\n\nThank you for reaching out...",Staff1,,
3,Clara,Hutchinson Hills,clara@email.com,9578014,2024-08-20,Has many concerns about getting a compost cart...,Thank you for reaching out to our office and f...,Staff2,Clara sent another email back with more questi...,Did not respond as they were the same question...
4,Pauline,,pauline@email.com,0,2024-08-21,At any rate my concern is how will HOA condo u...,"Good afternoon Pauline, \n\nThank you for reac...",Staff1,,


In [6]:
# Initialize VADER for sentiment
analyzer = SentimentIntensityAnalyzer()

In [7]:
# Apply VADER sentiment analysis to the 'constituent_email_1' column
df['VADER_sentiment'] = df['constituent_email_1'].fillna('').apply(lambda x: analyzer.polarity_scores(x)['compound'])

In [8]:
df.head()

Unnamed: 0,name,affected_address,email_address,case_number,date,constituent_email_1,d4_response_1,d4_staff_member,constituent_email_2,d4_response_2,VADER_sentiment
0,Ron,6864 East Bucknell Place,ron@email.com,0,2024-08-05,The lack of police presence and code enforceme...,"Good morning Ron, \n\nThank you for reaching o...",Staff1,,,-0.5
1,Renee,Dahlia & High Line intersection,renee@email.com,0,2024-08-19,I would like to know if there is a possible to...,Thank you for reaching out to our office. The...,Staff2,,,0.3612
2,JW,Happy Canyon & Jasmine St,jw@email.com,0,2024-08-19,Accident took place at Happy Canyon & Jasmine....,"Good morning TJ,\n\nThank you for reaching out...",Staff1,,,-0.5095
3,Clara,Hutchinson Hills,clara@email.com,9578014,2024-08-20,Has many concerns about getting a compost cart...,Thank you for reaching out to our office and f...,Staff2,Clara sent another email back with more questi...,Did not respond as they were the same question...,-0.296
4,Pauline,,pauline@email.com,0,2024-08-21,At any rate my concern is how will HOA condo u...,"Good afternoon Pauline, \n\nThank you for reac...",Staff1,,,0.1179


In [9]:
# Define the interpretation function
def interpret_sentiment(compound_score):
    if compound_score > 0.05:
        return "Positive"
    elif compound_score < -0.05:
        return "Negative"
    else:
        return "Neutral"

In [10]:
# Apply the interpretation function to create a new column in the DataFrame
df['Sentiment_Interpretation'] = df['VADER_sentiment'].apply(interpret_sentiment)

# Display the updated DataFrame
df.head()

Unnamed: 0,name,affected_address,email_address,case_number,date,constituent_email_1,d4_response_1,d4_staff_member,constituent_email_2,d4_response_2,VADER_sentiment,Sentiment_Interpretation
0,Ron,6864 East Bucknell Place,ron@email.com,0,2024-08-05,The lack of police presence and code enforceme...,"Good morning Ron, \n\nThank you for reaching o...",Staff1,,,-0.5,Negative
1,Renee,Dahlia & High Line intersection,renee@email.com,0,2024-08-19,I would like to know if there is a possible to...,Thank you for reaching out to our office. The...,Staff2,,,0.3612,Positive
2,JW,Happy Canyon & Jasmine St,jw@email.com,0,2024-08-19,Accident took place at Happy Canyon & Jasmine....,"Good morning TJ,\n\nThank you for reaching out...",Staff1,,,-0.5095,Negative
3,Clara,Hutchinson Hills,clara@email.com,9578014,2024-08-20,Has many concerns about getting a compost cart...,Thank you for reaching out to our office and f...,Staff2,Clara sent another email back with more questi...,Did not respond as they were the same question...,-0.296,Negative
4,Pauline,,pauline@email.com,0,2024-08-21,At any rate my concern is how will HOA condo u...,"Good afternoon Pauline, \n\nThank you for reac...",Staff1,,,0.1179,Positive


In [6]:
# Keyword matching function for classification
def classify_email_initial(email_text):
    email_text_lower = email_text.lower()  # Lowercase for easier matching
    
    # Check for keywords in the email text and assign a category
    for category, words in keywords.items():
        if any(word in email_text_lower for word in words):
            return category
    return "Other"  # Default category if no keywords match

In [7]:
# Apply the initial classification
df['initial_category'] = df['constituent_email_1'].apply(lambda email: classify_email_initial(email) if pd.notnull(email) else "Other")
df.head()

Unnamed: 0,name,affected_address,email_address,case_number,date,constituent_email_1,d4_response_1,d4_staff_member,constituent_email_2,d4_response_2,initial_category
0,Ron,6864 East Bucknell Place,ron@email.com,0,2024-08-05,The lack of police presence and code enforceme...,"Good morning Ron, \n\nThank you for reaching o...",Staff1,,,Snow on Sidewalk
1,Renee,Dahlia & High Line intersection,renee@email.com,0,2024-08-19,I would like to know if there is a possible to...,Thank you for reaching out to our office. The...,Staff2,,,Other
2,JW,Happy Canyon & Jasmine St,jw@email.com,0,2024-08-19,Accident took place at Happy Canyon & Jasmine....,"Good morning TJ,\n\nThank you for reaching out...",Staff1,,,Other
3,Clara,Hutchinson Hills,clara@email.com,9578014,2024-08-20,Has many concerns about getting a compost cart...,Thank you for reaching out to our office and f...,Staff2,Clara sent another email back with more questi...,Did not respond as they were the same question...,Other
4,Pauline,,pauline@email.com,0,2024-08-21,At any rate my concern is how will HOA condo u...,"Good afternoon Pauline, \n\nThank you for reac...",Staff1,,,Animal Complaint


In [12]:
# Load the API key from environment variables
api_key = os.getenv('OPENAI_API_KEY')
openai.api_key = api_key
print(f"API key loaded: {api_key}")

API key loaded: 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


In [19]:
def classify_email_with_gpt(email_text):
    messages = [
        {
            "role": "system",
            "content": (
                "You are an assistant that classifies emails based on two criteria: "
                "1) category and 2) sentiment. "
                "The category options are: Encampment Reporting, Graffiti, Pothole, Animal Complaint, "
                "Weeds & Vegetation, Neighborhood Issue, Other, Snow on Sidewalk, Abandoned Vehicle, "
                "Illegal Parking, Damaged/Fallen Tree, Police: Non-emergency, Fireworks, Illegal Dumping, "
                "Shared Micromobility, No Heat No Water No Electricity, Missed Trash Pickup. "
                "The sentiment options are: Positive, Neutral, and Negative."
            )
        },
        {
            "role": "user",
            "content":  {email_text}
        }
    ]

    # OpenAI API for category and sentiment
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=messages,
        max_tokens=50,
        temperature=0.0
    )

    # Extract  category and sentiment
    result = response['choices'][0]['message']['content'].strip()
    
    # Category: [Category]. Sentiment: [Sentiment]
    try:
        category, sentiment = None, None
        if "Category:" in result and "Sentiment:" in result:
            category = result.split("Category:")[1].split("Sentiment:")[0].strip()
            sentiment = result.split("Sentiment:")[1].strip()
        return category, sentiment
    except Exception as e:
        print("Error parsing response:", e)
        return "Unknown", "Unknown"




In [23]:
# Apply the GPT-based classification
df['gpt_sentiment'] = df['constituent_email_1'].apply(lambda email: classify_email_with_gpt(email) if pd.notnull(email) else "Other")


APIRemovedInV1: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742


In [21]:
df.head()

Unnamed: 0,name,affected_address,email_address,case_number,date,constituent_email_1,d4_response_1,d4_staff_member,constituent_email_2,d4_response_2,VADER_sentiment,Sentiment_Interpretation
0,Ron,6864 East Bucknell Place,ron@email.com,0,2024-08-05,The lack of police presence and code enforceme...,"Good morning Ron, \n\nThank you for reaching o...",Staff1,,,-0.5,Negative
1,Renee,Dahlia & High Line intersection,renee@email.com,0,2024-08-19,I would like to know if there is a possible to...,Thank you for reaching out to our office. The...,Staff2,,,0.3612,Positive
2,JW,Happy Canyon & Jasmine St,jw@email.com,0,2024-08-19,Accident took place at Happy Canyon & Jasmine....,"Good morning TJ,\n\nThank you for reaching out...",Staff1,,,-0.5095,Negative
3,Clara,Hutchinson Hills,clara@email.com,9578014,2024-08-20,Has many concerns about getting a compost cart...,Thank you for reaching out to our office and f...,Staff2,Clara sent another email back with more questi...,Did not respond as they were the same question...,-0.296,Negative
4,Pauline,,pauline@email.com,0,2024-08-21,At any rate my concern is how will HOA condo u...,"Good afternoon Pauline, \n\nThank you for reac...",Staff1,,,0.1179,Positive
