In [None]:
pip install pandas tqdm openpyxl
pip install textblob
pip install nltk
pip install transformers torch

In [None]:
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from tqdm import tqdm

# Download VADER lexicon
nltk.download('vader_lexicon')

# Load Excel file
df = pd.read_excel("/Users/prakashreddy/Documents/Grad_Project/LAPD_Police_Data_Cleaned.xlsx")

# Initialize VADER analyzer
vader_analyzer = SentimentIntensityAnalyzer()

# Function to classify sentiment
def get_vader_sentiment(text):
    scores = vader_analyzer.polarity_scores(str(text))
    compound = scores['compound']
    if compound >= 0.05:
        return "POSITIVE"
    elif compound <= -0.05:
        return "NEGATIVE"
    else:
        return "NEUTRAL"

# Apply VADER sentiment to all cleaned_text rows
tqdm.pandas(desc="VADER Sentiment")
df["vader_sentiment"] = df["Cleaned_Text"].astype(str).progress_apply(get_vader_sentiment)

# Save updated file
df.to_excel("/Users/prakashreddy/Documents/LAPD_with_vader_sentiment.xlsx", index=False)
print("✅ VADER sentiment analysis complete. File saved as 'LAPD_with_vader_sentiment.xlsx'.")

In [None]:
import pandas as pd
from transformers import pipeline
from tqdm import tqdm

# Load your Excel file
df = pd.read_excel("/Users/prakashreddy/Documents/Grad_Project/LAPD_Police_Data_Cleaned.xlsx")

# Ensure required column exists
if "hf_sentiment" not in df.columns:
    df["hf_sentiment"] = None
    df["hf_confidence"] = None

# Filter rows missing Hugging Face sentiment
missing_mask = df["hf_sentiment"].isna()
texts_to_analyze = df.loc[missing_mask, "Cleaned_Text"].astype(str).tolist()

# Initialize Hugging Face sentiment classifier
hf_classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

# Analyze each tweet using Hugging Face
hf_sentiments = []
hf_confidences = []

for text in tqdm(texts_to_analyze, desc="Hugging Face Sentiment"):
    try:
        result = hf_classifier(text[:512])[0]  # Truncate to 512 characters
        hf_sentiments.append(result["label"])
        hf_confidences.append(result["score"])
    except Exception:
        hf_sentiments.append(None)
        hf_confidences.append(None)

# Assign results back to the DataFrame
df.loc[missing_mask, "hf_sentiment"] = hf_sentiments
df.loc[missing_mask, "hf_confidence"] = hf_confidences

# Save to a new Excel file
df.to_excel("/Users/prakashreddy/Documents/LAPD_with_hf_sentiment.xlsx", index=False)
print("✅ Hugging Face sentiment analysis complete. Saved as 'LAPD_with_hf_sentiment.xlsx'")


In [24]:
import pandas as pd
from transformers import pipeline
from tqdm import tqdm

# Load your Excel file
df = pd.read_excel("/Users/prakashreddy/Documents/Grad_Project/Preprocessed Data/LAPD_Police_Data_Cleaned.xlsx")

# Ensure required columns exist
if "hf_sentiment" not in df.columns:
    df["hf_sentiment"] = None
    df["hf_confidence"] = None

# Filter rows missing Hugging Face sentiment
missing_mask = df["hf_sentiment"].isna()
texts_to_analyze = df.loc[missing_mask, "Cleaned_Text"].astype(str).tolist()

# Initialize Hugging Face sentiment classifier
hf_classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

# Analyze each tweet using Hugging Face
hf_sentiments = []
hf_confidences = []

for text in tqdm(texts_to_analyze, desc="Hugging Face Sentiment"):
    try:
        result = hf_classifier(text[:512])[0]  # Truncate to 512 characters
        label = result["label"]
        score = result["score"]

        
        if label == "POSITIVE" and score > 0.6:
            sentiment = "positive"
        elif label == "NEGATIVE" and score > 0.6:
            sentiment = "negative"
        elif score <= 0.6:
            sentiment = "neutral"


        hf_sentiments.append(sentiment)
        hf_confidences.append(score)
    except Exception:
        hf_sentiments.append(None)
        hf_confidences.append(None)

# Assign results back to the DataFrame
df.loc[missing_mask, "hf_sentiment"] = hf_sentiments
df.loc[missing_mask, "hf_confidence"] = hf_confidences

# Save to a new Excel file
df.to_excel("/Users/prakashreddy/Documents/NYPD_with_hf_sentiment_custom_logic.xlsx", index=False)
print("✅ Sentiment analysis complete with custom logic. File saved as 'LAPD_with_hf_sentiment_custom_logic.xlsx'")


Device set to use mps:0
Hugging Face Sentiment: 100%|████████████| 60894/60894 [09:09<00:00, 110.77it/s]


✅ Sentiment analysis complete with custom logic. File saved as 'LAPD_with_hf_sentiment_custom_logic.xlsx'
