In [39]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import string
import re

In [40]:
df = pd.read_csv('sentimentdataset.csv')

# timestamp column to datetime object for easier handling
df['Timestamp'] = pd.to_datetime(df['Timestamp'], errors='coerce')
print(df.head())

   Unnamed: 0.1  Unnamed: 0  \
0             0           0   
1             1           1   
2             2           2   
3             3           3   
4             4           4   

                                                Text    Sentiment  \
0   Enjoying a beautiful day at the park!        ...   Positive     
1   Traffic was terrible this morning.           ...   Negative     
2   Just finished an amazing workout! 💪          ...   Positive     
3   Excited about the upcoming weekend getaway!  ...   Positive     
4   Trying out a new recipe for dinner tonight.  ...   Neutral      

            Timestamp            User     Platform  \
0 2023-01-15 12:30:00   User123          Twitter     
1 2023-01-15 08:45:00   CommuterX        Twitter     
2 2023-01-15 15:45:00   FitnessFan      Instagram    
3 2023-01-15 18:20:00   AdventureX       Facebook    
4 2023-01-15 19:55:00   ChefCook        Instagram    

                                     Hashtags  Retweets  Likes       Coun

In [41]:
df = df.drop(columns=['Unnamed: 0', 'Unnamed: 0.1'], errors='ignore')

print(df.columns)

Index(['Text', 'Sentiment', 'Timestamp', 'User', 'Platform', 'Hashtags',
       'Retweets', 'Likes', 'Country', 'Year', 'Month', 'Day', 'Hour'],
      dtype='object')


In [42]:
df.head()

Unnamed: 0,Text,Sentiment,Timestamp,User,Platform,Hashtags,Retweets,Likes,Country,Year,Month,Day,Hour
0,Enjoying a beautiful day at the park! ...,Positive,2023-01-15 12:30:00,User123,Twitter,#Nature #Park,15.0,30.0,USA,2023,1,15,12
1,Traffic was terrible this morning. ...,Negative,2023-01-15 08:45:00,CommuterX,Twitter,#Traffic #Morning,5.0,10.0,Canada,2023,1,15,8
2,Just finished an amazing workout! 💪 ...,Positive,2023-01-15 15:45:00,FitnessFan,Instagram,#Fitness #Workout,20.0,40.0,USA,2023,1,15,15
3,Excited about the upcoming weekend getaway! ...,Positive,2023-01-15 18:20:00,AdventureX,Facebook,#Travel #Adventure,8.0,15.0,UK,2023,1,15,18
4,Trying out a new recipe for dinner tonight. ...,Neutral,2023-01-15 19:55:00,ChefCook,Instagram,#Cooking #Food,12.0,25.0,Australia,2023,1,15,19


In [43]:
df['Sentiment'].value_counts()

Sentiment
Positive           44
Joy                42
Excitement         32
Neutral            14
Contentment        14
                   ..
Adrenaline          1
Harmony             1
ArtisticBurst       1
Radiance            1
Elegance            1
Name: count, Length: 279, dtype: int64

In [44]:
df['Platform'] = df['Platform'].str.strip() 
df['Platform'].value_counts()

Platform
Instagram    258
Twitter      243
Facebook     231
Name: count, dtype: int64

In [45]:
def clean_text(text):
    if not isinstance(text, str):
        return ""
    text = text.lower()                                     
    text = text.translate(str.maketrans('', '', string.punctuation))  
    text = re.sub(r'\d+', '', text)                         
    text = re.sub(r'\s+', ' ', text).strip()              
    return text

df['Cleaned_Text'] = df['Text'].apply(clean_text)
df[['Text', 'Cleaned_Text']].head()


Unnamed: 0,Text,Cleaned_Text
0,Enjoying a beautiful day at the park! ...,enjoying a beautiful day at the park
1,Traffic was terrible this morning. ...,traffic was terrible this morning
2,Just finished an amazing workout! 💪 ...,just finished an amazing workout 💪
3,Excited about the upcoming weekend getaway! ...,excited about the upcoming weekend getaway
4,Trying out a new recipe for dinner tonight. ...,trying out a new recipe for dinner tonight


In [46]:
def clean_hashtags(hashtags):
    if not isinstance(hashtags, str):
        return []
    tags = hashtags.strip().split(' ')
    tags = [tag.replace('#', '').lower() for tag in tags if tag]
    return tags

# Apply to the Hashtags column
df['Cleaned_Hashtags'] = df['Hashtags'].apply(clean_hashtags)
df[['Hashtags', 'Cleaned_Hashtags']].head()

Unnamed: 0,Hashtags,Cleaned_Hashtags
0,#Nature #Park,"[nature, park]"
1,#Traffic #Morning,"[traffic, morning]"
2,#Fitness #Workout,"[fitness, workout]"
3,#Travel #Adventure,"[travel, adventure]"
4,#Cooking #Food,"[cooking, food]"


In [47]:
df['Engagement_Score'] = df['Likes'] + (2 * df['Retweets'])
df['Engagement_Score'].head(10)

0     60.0
1     20.0
2     80.0
3     31.0
4     49.0
5    100.0
6     40.0
7     60.0
8    120.0
9     71.0
Name: Engagement_Score, dtype: float64

In [48]:
df['Sentiment'] = df['Sentiment'].astype(str).str.strip().str.capitalize()

In [49]:
print(df['Sentiment'].unique())

['Positive' 'Negative' 'Neutral' 'Anger' 'Fear' 'Sadness' 'Disgust'
 'Happiness' 'Joy' 'Love' 'Amusement' 'Enjoyment' 'Admiration' 'Affection'
 'Awe' 'Disappointed' 'Surprise' 'Acceptance' 'Adoration' 'Anticipation'
 'Bitter' 'Calmness' 'Confusion' 'Excitement' 'Kind' 'Pride' 'Shame'
 'Elation' 'Euphoria' 'Contentment' 'Serenity' 'Gratitude' 'Hope'
 'Empowerment' 'Compassion' 'Tenderness' 'Arousal' 'Enthusiasm'
 'Fulfillment' 'Reverence' 'Despair' 'Grief' 'Loneliness' 'Jealousy'
 'Resentment' 'Frustration' 'Boredom' 'Anxiety' 'Intimidation'
 'Helplessness' 'Envy' 'Regret' 'Curiosity' 'Indifference' 'Numbness'
 'Melancholy' 'Nostalgia' 'Ambivalence' 'Determination' 'Zest' 'Hopeful'
 'Proud' 'Grateful' 'Empathetic' 'Compassionate' 'Playful' 'Free-spirited'
 'Inspired' 'Confident' 'Bitterness' 'Yearning' 'Fearful' 'Apprehensive'
 'Overwhelmed' 'Jealous' 'Devastated' 'Frustrated' 'Envious' 'Dismissive'
 'Thrill' 'Bittersweet' 'Overjoyed' 'Inspiration' 'Motivation'
 'Contemplation' 'Joyfulr

In [50]:
sentiment_label_map = {}

for word in positive_words:
    sentiment_label_map[word.lower()] = 'Positive'
for word in neutral_words:
    sentiment_label_map[word.lower()] = 'Neutral'
for word in negative_words:
    sentiment_label_map[word.lower()] = 'Negative'

def map_sentiment_label(s):
    if isinstance(s, str):
        return sentiment_label_map.get(s.lower(), 'Neutral')  # default to Neutral if not found
    else:
        return 'Neutral'

df['Sentiment_Main'] = df['Sentiment'].apply(map_sentiment_label)


In [54]:
columns_needed = ['Text', 'Cleaned_Text', 'Sentiment_Main',
                  'Platform', 'Country', 'User',
                  'Cleaned_Hashtags', 'Likes', 'Retweets', 'Engagement_Score',
                  'Timestamp', 'Year', 'Month', 'Day', 'Hour']

cleaned_df = df[columns_needed]


In [55]:
cleaned_df

Unnamed: 0,Text,Cleaned_Text,Sentiment_Main,Platform,Country,User,Cleaned_Hashtags,Likes,Retweets,Engagement_Score,Timestamp,Year,Month,Day,Hour
0,Enjoying a beautiful day at the park! ...,enjoying a beautiful day at the park,Neutral,Twitter,USA,User123,"[nature, park]",30.0,15.0,60.0,2023-01-15 12:30:00,2023,1,15,12
1,Traffic was terrible this morning. ...,traffic was terrible this morning,Neutral,Twitter,Canada,CommuterX,"[traffic, morning]",10.0,5.0,20.0,2023-01-15 08:45:00,2023,1,15,8
2,Just finished an amazing workout! 💪 ...,just finished an amazing workout 💪,Neutral,Instagram,USA,FitnessFan,"[fitness, workout]",40.0,20.0,80.0,2023-01-15 15:45:00,2023,1,15,15
3,Excited about the upcoming weekend getaway! ...,excited about the upcoming weekend getaway,Neutral,Facebook,UK,AdventureX,"[travel, adventure]",15.0,8.0,31.0,2023-01-15 18:20:00,2023,1,15,18
4,Trying out a new recipe for dinner tonight. ...,trying out a new recipe for dinner tonight,Neutral,Instagram,Australia,ChefCook,"[cooking, food]",25.0,12.0,49.0,2023-01-15 19:55:00,2023,1,15,19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
727,Collaborating on a science project that receiv...,collaborating on a science project that receiv...,Positive,Facebook,UK,ScienceProjectSuccessHighSchool,"[sciencefairwinner, highschoolscience]",39.0,20.0,79.0,2017-08-18 18:20:00,2017,8,18,18
728,Attending a surprise birthday party organized ...,attending a surprise birthday party organized ...,Positive,Instagram,USA,BirthdayPartyJoyHighSchool,"[surprisecelebration, highschoolfriendship]",48.0,25.0,98.0,2018-06-22 14:15:00,2018,6,22,14
729,Successfully fundraising for a school charity ...,successfully fundraising for a school charity ...,Positive,Twitter,Canada,CharityFundraisingTriumphHighSchool,"[communitygiving, highschoolphilanthropy]",42.0,22.0,86.0,2019-04-05 17:30:00,2019,4,5,17
730,"Participating in a multicultural festival, cel...",participating in a multicultural festival cele...,Positive,Facebook,UK,MulticulturalFestivalJoyHighSchool,"[culturalcelebration, highschoolunity]",43.0,21.0,85.0,2020-02-29 20:45:00,2020,2,29,20


In [57]:
cleaned_df.to_csv('cleanedSentimentDataset.csv', index=False)
print("Cleaned data saved as cleanedSentimentDataset.csv")


Cleaned data saved as cleanedSentimentDataset.csv
