<a href="https://colab.research.google.com/github/JamesMartinOU/PublicRedditSentimentAnalysis/blob/main/RedditPostsSentimentAnalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install Python libraries
!pip install transformers torch emoji
!pip install mysql-connector-python

In [None]:
# Import Python libraries
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import emoji
import pandas as pd
import mysql.connector
import warnings
from google.colab import files
import time
import openpyxl
from google.colab import auth
import gspread
from google.auth import default
from sqlalchemy import create_engine

In [None]:
# RDS MySQL connection details


In [None]:
# Load Twitter sentiment model
model_name = "cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

# MySQL connection
conn = mysql.connector.connect(
    host=DB_HOST,
    user=DB_USER,
    password=DB_PASSWORD,
    database=DB_NAME
)
cursor = conn.cursor()
cursor.execute("SELECT * FROM reddit_posts;")

columns = [desc[0] for desc in cursor.description]
data = cursor.fetchall()
df = pd.DataFrame(data, columns=columns)

cursor.close()
conn.close()

# Prepare for sentiment analysis
texts = df['title'].dropna().tolist()

# Preprocess and analyze
def preprocess(text):
    return emoji.demojize(text).replace('\n', ' ').strip()

results = []

for idx, row in df.iterrows():
    post_id = row['id']
    title = row['title']
    if pd.isna(title):
        continue
    cleaned = preprocess(title)
    result = sentiment_pipeline(cleaned)[0]
    label_map = {
        'LABEL_0': 'Negative',
        'LABEL_1': 'Neutral',
        'LABEL_2': 'Positive'
    }
    sentiment = label_map[result['label']]
    score = round(result['score'], 4)
    results.append({
        'id': post_id,
        'title': title,
        'sentiment': sentiment,
        'confidence': score
    })

# Convert to DataFrame
sentiment_df = pd.DataFrame(results)

db_url = f"mysql+mysqlconnector://{DB_USER}:{DB_PASSWORD}@{DB_HOST}/{DB_NAME}"
engine = create_engine(db_url)

sentiment_df.to_sql(name='reddit_posts_sentiment', con=engine, if_exists='replace', index=False)

print("Sentiment results written to 'reddit_posts_sentiment' table.")
# Preview
print(sentiment_df.head())

In [None]:
# Query result set
db_url = f"mysql+mysqlconnector://{DB_USER}:{DB_PASSWORD}@{DB_HOST}/{DB_NAME}"
engine = create_engine(db_url)

# Run query and load into DataFrame
query = "SELECT * FROM reddit_posts_sentiment;"
df_sentiment = pd.read_sql(query, engine)

# Preview result
print(df_sentiment.head())