# Sentiment Classifier

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px

### Load Labeled Data

In [None]:
# load labeled geolocation tesla data
labeled_geo_tesla_data = pd.read_csv('labeled_tesla_geo_data.csv', index_col='Unnamed: 0', parse_dates=['created_utc'])

### Visulizations

In [None]:
# Group by year and bert_label
grouped = labeled_geo_tesla_data.groupby(['year', 'bert_label']).size().unstack(fill_value=0)

# Map label index to names
label_map = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}

# Create the plot
plt.figure(figsize=(10, 6))

# Plot a line for each label across years
for label_id in [0, 1, 2]:
    plt.plot(grouped.index, grouped[label_id], label=label_map[label_id], marker='o')

# Add labels, title, and legend
plt.xlabel('Year')
plt.ylabel('Count')
plt.title('BERT Label Distribution Over Time')
plt.legend(title='Sentiment')

# Display the plot
plt.grid(True)
plt.show()

In [None]:
# Extract year into a new column (better than overwriting)
labeled_geo_tesla_data['year'] = pd.to_datetime(labeled_geo_tesla_data['created_utc']).dt.year

# Group by year and bert_label
grouped = labeled_geo_tesla_data.groupby(['year', 'bert_label']).size().unstack(fill_value=0)

# Prepare dropdown options
years = grouped.index.tolist()
labels = ['Negative', 'Neutral', 'Positive']
label_map = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}

# Create traces for each year
data = []
for idx, year in enumerate(years):
    counts = grouped.loc[year].reindex([0, 1, 2], fill_value=0)
    hover_texts = [f"{c}" for c in counts]
    
    bar = go.Bar(
        x=labels,
        y=counts.values,
        name=str(year),
        visible=(idx == len(years) - 1),  # Only last year visible by default
        hovertext=hover_texts,
        hoverinfo="text"
    )
    data.append(bar)

# Create dropdown menu (default = last year)
dropdown_buttons = [
    dict(
        label=str(year),
        method='update',
        args=[
            {'visible': [i == idx for i in range(len(data))]},
            {'title': f"BERT Label Distribution - {year}"}
        ]
    )
    for idx, year in enumerate(years)
]

# Build figure
fig = go.Figure(data=data)
fig.update_layout(
    title=f"BERT Label Distribution - {years[-1]}",
    xaxis_title="BERT Label",
    yaxis_title="Count",
    updatemenus=[dict(active=len(years) - 1, buttons=dropdown_buttons)],
    template='plotly_white'
)

fig.show()

In [None]:
state_mapping = {
    'texas': 'TX', 'newyork': 'NY', 'california': 'CA', 'florida': 'FL', 'illinois': 'IL', 
    'pennsylvania': 'PA', 'ohio': 'OH', 'georgia': 'GA', 'northcarolina': 'NC', 'michigan': 'MI', 
    'newjersey': 'NJ', 'virginia': 'VA', 'washington': 'WA', 'arizona': 'AZ', 'massachusetts': 'MA',
    'tennessee': 'TN', 'indiana': 'IN', 'missouri': 'MO', 'maryland': 'MD', 'wisconsin': 'WI',
    'colorado': 'CO', 'minnesota': 'MN', 'southcarolina': 'SC', 'alabama': 'AL', 'louisiana': 'LA',
    'kentucky': 'KY', 'oregon': 'OR', 'connecticut': 'CT', 'oklahoma': 'OK', 'iowa': 'IA',
    'mississippi': 'MS', 'arkansas': 'AR', 'kansas': 'KS', 'nevada': 'NV', 'utah': 'UT',
    'newmexico': 'NM', 'nebraska': 'NE', 'westvirginia': 'WV', 'idaho': 'ID', 'hawaii': 'HI',
    'maine': 'ME', 'newhampshire': 'NH', 'montana': 'MT', 'rhodeisland': 'RI', 'delaware': 'DE',
    'southdakota': 'SD', 'northdakota': 'ND', 'alaska': 'AK', 'vermont': 'VT', 'wyoming': 'WY'
}

# Apply state mapping and map the 'state_code'
labeled_geo_tesla_data['state_code'] = labeled_geo_tesla_data['state'].str.lower().map(state_mapping)

# Group data by state and sentiment label
grouped = labeled_geo_tesla_data.groupby(['state_code', 'bert_label']).size().unstack(fill_value=0)

# Define the labels and colors for each sentiment
labels = ['Negative', 'Neutral', 'Positive']
colors = ['Reds', 'Blues', 'Greens']

# Prepare the choropleth data for each label
data = []
for i, label in enumerate([0, 1, 2]):  # 0 = Negative, 1 = Neutral, 2 = Positive
    trace = go.Choropleth(
        locations=grouped.index,  # State codes (e.g., TX, NY)
        locationmode='USA-states',
        z=grouped[label],  # Values for each sentiment label
        colorscale=colors[i],  # Color for each sentiment label
        colorbar_title='Count',  # Title for the color bar
        visible=(i == 0),  # Only show the first label by default
        name=labels[i],  # Name for the label in the legend
        hovertemplate=(
            "<b>%{location}</b><br>"  # State name
            "Count: %{z}<br>"  # Count of sentiment
            "<extra></extra>"  # Remove the additional trace info
        ),
        text=[labels[i] * len(grouped)]
    )
    data.append(trace)

# Create dropdown buttons to toggle between labels
dropdown_buttons = [
    dict(
        label=label,
        method='update',
        args=[{'visible': [j == i for j in range(len(labels))]},  # Toggle visibility
              {'title': f'{label} Sentiment Posts by State'}]
    )
    for i, label in enumerate(labels)
]

# Build the figure
fig = go.Figure(data=data)
fig.update_layout(
    title='Sentiment Posts by State',  # Default title
    geo_scope='usa',  # Show only the USA
    updatemenus=[dict(active=0, buttons=dropdown_buttons)],  # Dropdown to switch labels
    template='plotly_white'  # Use a clean white template
)

# Show the plot
fig.show()

In [None]:
labeled_geo_tesla_data = pd.read_csv('labeled_tesla_geo_data.csv', index_col='Unnamed: 0', parse_dates=['created_utc'])

labeled_geo_tesla_data['created_utc'] = pd.to_datetime(labeled_geo_tesla_data['created_utc'])
labeled_geo_tesla_data['sentiment'] = labeled_geo_tesla_data['bert_label'].map({0: 'Negative', 1: 'Neutral', 2: 'Positive'})
labeled_geo_tesla_data['state'] = labeled_geo_tesla_data['state'].str.title()
geo_data = labeled_geo_tesla_data.dropna(subset=['state'])

# Group by state and sentiment
state_grouped = geo_data.groupby(['state', 'sentiment']).size().unstack(fill_value=0)

# Define color mapping for sentiment
color_map = {
    'Negative': 'red',
    'Neutral': 'gray',
    'Positive': 'green'
}

# Reorder columns to match color map order
state_grouped = state_grouped[['Negative', 'Neutral', 'Positive']]

# Sentiment distribution by state graph
state_grouped.plot(kind='bar', stacked=True, figsize=(14, 6), color=[color_map[col] for col in state_grouped.columns])
plt.title('Sentiment Distribution by State 2010 - 2025')
plt.xlabel('State')
plt.ylabel('Number of Posts')
plt.legend(title='Dominate Sentiment')
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

In [None]:
# Load CSV data
labeled_geo_tesla_data = pd.read_csv('labeled_tesla_geo_data.csv', index_col='Unnamed: 0', parse_dates=['created_utc'])
labeled_geo_tesla_data['created_utc'] = pd.to_datetime(labeled_geo_tesla_data['created_utc'])
labeled_geo_tesla_data['sentiment'] = labeled_geo_tesla_data['bert_label'].map({0: 'Negative', 1: 'Neutral', 2: 'Positive'})

# Convert state names to Camel Case
labeled_geo_tesla_data['state'] = labeled_geo_tesla_data['state'].str.title()

# Drop missing states
geo_data = labeled_geo_tesla_data.dropna(subset=['state'])

# Group by state and sentiment
state_grouped = geo_data.groupby(['state', 'sentiment']).size().unstack(fill_value=0)

# Get dominant sentiment per state
dominant_sentiment = state_grouped.idxmax(axis=1).reset_index()
dominant_sentiment.columns = ['state', 'dominant_sentiment']

# Dictionary for full state names to abbreviations
us_state_abbrev = {
    'Alabama': 'AL', 'Alaska': 'AK', 'Arizona': 'AZ', 'Arkansas': 'AR', 'California': 'CA',
    'Colorado': 'CO', 'Connecticut': 'CT', 'Delaware': 'DE', 'Florida': 'FL', 'Georgia': 'GA',
    'Hawaii': 'HI', 'Idaho': 'ID', 'Illinois': 'IL', 'Indiana': 'IN', 'Iowa': 'IA',
    'Kansas': 'KS', 'Kentucky': 'KY', 'Louisiana': 'LA', 'Maine': 'ME', 'Maryland': 'MD',
    'Massachusetts': 'MA', 'Michigan': 'MI', 'Minnesota': 'MN', 'Mississippi': 'MS', 'Missouri': 'MO',
    'Montana': 'MT', 'Nebraska': 'NE', 'Nevada': 'NV', 'New Hampshire': 'NH', 'New Jersey': 'NJ',
    'New Mexico': 'NM', 'New York': 'NY', 'North Carolina': 'NC', 'North Dakota': 'ND',
    'Ohio': 'OH', 'Oklahoma': 'OK', 'Oregon': 'OR', 'Pennsylvania': 'PA', 'Rhode Island': 'RI',
    'South Carolina': 'SC', 'South Dakota': 'SD', 'Tennessee': 'TN', 'Texas': 'TX', 'Utah': 'UT',
    'Vermont': 'VT', 'Virginia': 'VA', 'Washington': 'WA', 'West Virginia': 'WV',
    'Wisconsin': 'WI', 'Wyoming': 'WY'
}

# Add abbreviation column
dominant_sentiment['state_abbrev'] = dominant_sentiment['state'].map(us_state_abbrev)

# Create color map for sentiments
sentiment_colors = {
    'Positive': 'green',
    'Neutral': 'gray',
    'Negative': 'red'
}

# Dominant Sentiment by State Graph
fig = px.choropleth(
    dominant_sentiment,
    locations='state_abbrev',
    locationmode='USA-states',
    color='dominant_sentiment',
    scope='usa',
    color_discrete_map=sentiment_colors,
    title='Dominant Sentiment by U.S. State 2010 - 2025',
    labels={'dominant_sentiment': 'Dominant Sentiment'} 

)
fig.show()

In [None]:
# Load CSV data and variables 
labeled_geo_tesla_data = pd.read_csv('labeled_tesla_geo_data.csv', index_col='Unnamed: 0', parse_dates=['created_utc'])
labeled_geo_tesla_data['created_utc'] = pd.to_datetime(labeled_geo_tesla_data['created_utc'])
labeled_geo_tesla_data['year'] = labeled_geo_tesla_data['created_utc'].dt.year
labeled_geo_tesla_data['sentiment'] = labeled_geo_tesla_data['bert_label'].map({0: 'Negative', 1: 'Neutral', 2: 'Positive'})
labeled_geo_tesla_data['state'] = labeled_geo_tesla_data['state'].str.title()

# Drop missing state rows
geo_data = labeled_geo_tesla_data.dropna(subset=['state'])

# U.S. state full name → abbreviation map
us_state_abbrev = {
    'Alabama': 'AL', 'Alaska': 'AK', 'Arizona': 'AZ', 'Arkansas': 'AR', 'California': 'CA',
    'Colorado': 'CO', 'Connecticut': 'CT', 'Delaware': 'DE', 'Florida': 'FL', 'Georgia': 'GA',
    'Hawaii': 'HI', 'Idaho': 'ID', 'Illinois': 'IL', 'Indiana': 'IN', 'Iowa': 'IA',
    'Kansas': 'KS', 'Kentucky': 'KY', 'Louisiana': 'LA', 'Maine': 'ME', 'Maryland': 'MD',
    'Massachusetts': 'MA', 'Michigan': 'MI', 'Minnesota': 'MN', 'Mississippi': 'MS', 'Missouri': 'MO',
    'Montana': 'MT', 'Nebraska': 'NE', 'Nevada': 'NV', 'New Hampshire': 'NH', 'New Jersey': 'NJ',
    'New Mexico': 'NM', 'New York': 'NY', 'North Carolina': 'NC', 'North Dakota': 'ND',
    'Ohio': 'OH', 'Oklahoma': 'OK', 'Oregon': 'OR', 'Pennsylvania': 'PA', 'Rhode Island': 'RI',
    'South Carolina': 'SC', 'South Dakota': 'SD', 'Tennessee': 'TN', 'Texas': 'TX', 'Utah': 'UT',
    'Vermont': 'VT', 'Virginia': 'VA', 'Washington': 'WA', 'West Virginia': 'WV',
    'Wisconsin': 'WI', 'Wyoming': 'WY'
}

# Group by year, state, sentiment
grouped = geo_data.groupby(['year', 'state', 'sentiment']).size().reset_index(name='count')

# Pivot to find dominant sentiment per state per year
dominant_by_year = grouped.pivot_table(index=['year', 'state'], columns='sentiment', values='count', fill_value=0)
dominant_by_year['dominant_sentiment'] = dominant_by_year.idxmax(axis=1)
dominant_by_year = dominant_by_year.reset_index()

# Add state abbreviations
dominant_by_year['state_abbrev'] = dominant_by_year['state'].map(us_state_abbrev)

# Set color map
sentiment_colors = {
    'Positive': 'green',
    'Neutral': 'gray',
    'Negative': 'red'
}

# Step Plot one map per year
years = dominant_by_year['year'].unique()

for year in sorted(years):
    year_data = dominant_by_year[dominant_by_year['year'] == year]

    fig = px.choropleth(
        year_data,
        locations='state_abbrev',
        locationmode='USA-states',
        color='dominant_sentiment',
        color_discrete_map=sentiment_colors,
        scope='usa',
        title=f'Dominant Sentiment by State - {year}'
    )
    fig.show()
