In [1]:
import pandas as pd
from collections import Counter
import re
from textblob import TextBlob

##Path to the "sentimentdataset.csv" file
df = pd.read_csv('Dataset/sentimentdataset.csv')

df = df[['Timestamp', 'Text', 'Retweets', 'Likes', 'Hashtags', 'Sentiment']]
df = df.dropna()

df = df.rename(columns={
    'Timestamp': 'date',
    'Text': 'text',
    'Sentiment': 'Psentiment'
})

def cleanHashtags(hashtag_str):
    if isinstance(hashtag_str, str):
        cleaned = re.sub(r'[\[\]\'\"]', '', hashtag_str)
        tags = []
        for rawTag in cleaned.split(','):
            trimmedTag = rawTag.strip()
            if trimmedTag:
                tags.append(trimmedTag)
        return tags
    return []

df['hashtags'] = df['Hashtags'].apply(cleanHashtags)

allHashtags = []

for sublist in df['hashtags']:
    for tag in sublist:
        allHashtags.append(tag)

topHashtags = Counter(allHashtags).most_common(10)

def analyzeSentiment(text):
    analysis = TextBlob(str(text))
    if analysis.sentiment.polarity > 0.05:
        return 'Positive'
    elif analysis.sentiment.polarity < -0.05:
        return 'Negative'
    else:
        return 'Neutral'

df['calculatedSentiment'] = df['text'].apply(analyzeSentiment)

def getSentimentScore(text):
    text = str(text)
    blob = TextBlob(text)
    return blob.sentiment.polarity

df['sentimentScore'] = df['text'].apply(getSentimentScore)

df['hashtagSTR'] = df['hashtags'].apply(lambda x: ', '.join(x))

##Path to the "Output" folder
df.to_csv('Dataset/Output/processedData.csv', index=False)

topHashtagsDf = pd.DataFrame(topHashtags, columns=['Hashtag', 'Count'])
##Path to the "Output" folder
topHashtagsDf.to_csv('Dataset/Output/topHashtags.csv', index=False)

print("Processing complete! Files saved:")
print(f"Processed data: {len(df)} rows")
print(f"Top hashtag: {topHashtags[0][0]} ({topHashtags[0][1]} mentions)")

Processing complete! Files saved:
Processed data: 732 rows
Top hashtag: #Compassionate #TearsOfEmpathy (3 mentions)


In [2]:
import pandas as pd

##Path to the "Output" folder
df = pd.read_csv('Dataset/Output/processedData.csv')

df['hashtags'] = df['hashtags'].apply(eval)
dfExploded = df.explode('hashtags')

sentimentCounts = dfExploded.groupby(['hashtags', 'calculatedSentiment']).size().unstack(fill_value=0)

sentimentCounts.columns = ['Negative', 'Neutral', 'Positive'] if 'Negative' in sentimentCounts.columns else sentimentCounts.columns

sentimentPercent = sentimentCounts.div(sentimentCounts.sum(axis=1), axis=0) * 100
sentimentPercent = sentimentPercent.fillna(0).round(1)

sentimentPercent = sentimentPercent.reset_index()
sentimentPercent.columns.name = None
sentimentPercent = sentimentPercent.rename(columns={
    'hashtags': 'Hashtag',
    'Positive': 'Positive%',
    'Neutral': 'Neutral%',
    'Negative': 'Negative%'
})

##Path to the "Output" folder
sentimentPercent.to_csv('Dataset/Output/hashtagSentimentSummary.csv', index=False)

print("Hashtag sentiment breakdown saved as 'hashtagSentimentSummary.csv'")

Hashtag sentiment breakdown saved as 'hashtagSentimentSummary.csv'
