# Sentiment Analysis of Social Media

In [None]:
#Import required libraries
import pandas as pd

#Load the dataset
data = pd.read_csv("sentimentdataset.csv")

#Display basic info
print("Shape of dataset:", data.shape)
print("\nColumn names:", data.columns.tolist())
print("\nFirst 5 rows:")
display(data.head())

#Check for missing values
print("\nMissing values per column:")
print(data.isnull().sum())

Shape of dataset: (732, 15)

Column names: ['Unnamed: 0.1', 'Unnamed: 0', 'Text', 'Sentiment', 'Timestamp', 'User', 'Platform', 'Hashtags', 'Retweets', 'Likes', 'Country', 'Year', 'Month', 'Day', 'Hour']

First 5 rows:


Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Text,Sentiment,Timestamp,User,Platform,Hashtags,Retweets,Likes,Country,Year,Month,Day,Hour
0,0,0,Enjoying a beautiful day at the park! ...,Positive,2023-01-15 12:30:00,User123,Twitter,#Nature #Park,15.0,30.0,USA,2023,1,15,12
1,1,1,Traffic was terrible this morning. ...,Negative,2023-01-15 08:45:00,CommuterX,Twitter,#Traffic #Morning,5.0,10.0,Canada,2023,1,15,8
2,2,2,Just finished an amazing workout! ðŸ’ª ...,Positive,2023-01-15 15:45:00,FitnessFan,Instagram,#Fitness #Workout,20.0,40.0,USA,2023,1,15,15
3,3,3,Excited about the upcoming weekend getaway! ...,Positive,2023-01-15 18:20:00,AdventureX,Facebook,#Travel #Adventure,8.0,15.0,UK,2023,1,15,18
4,4,4,Trying out a new recipe for dinner tonight. ...,Neutral,2023-01-15 19:55:00,ChefCook,Instagram,#Cooking #Food,12.0,25.0,Australia,2023,1,15,19



Missing values per column:
Unnamed: 0.1    0
Unnamed: 0      0
Text            0
Sentiment       0
Timestamp       0
User            0
Platform        0
Hashtags        0
Retweets        0
Likes           0
Country         0
Year            0
Month           0
Day             0
Hour            0
dtype: int64


In [None]:
#Keep only relevant columns
data = data[['Text', 'Sentiment']]

#Check unique sentiment labels
print("Unique sentiment labels:", data['Sentiment'].unique())

#Clean the text data
import re

def clean_text(text):
    text = str(text).lower()
    text = re.sub(r"http\S+", "", text)  # remove links
    text = re.sub(r"[^a-z\s]", "", text)  # keep only letters
    text = re.sub(r"\s+", " ", text).strip()  # remove extra spaces
    return text

data['Clean_Text'] = data['Text'].apply(clean_text)

#Display sample cleaned data
data.head(10)

Unique sentiment labels: [' Positive  ' ' Negative  ' ' Neutral   ' ' Anger        '
 ' Fear         ' ' Sadness      ' ' Disgust      ' ' Happiness    '
 ' Joy          ' ' Love         ' ' Amusement    ' ' Enjoyment    '
 ' Admiration   ' ' Affection    ' ' Awe          ' ' Disappointed '
 ' Surprise     ' ' Acceptance   ' ' Adoration    ' ' Anticipation '
 ' Bitter       ' ' Calmness     ' ' Confusion    ' ' Excitement   '
 ' Kind         ' ' Pride        ' ' Shame        ' ' Confusion '
 ' Excitement ' ' Shame ' ' Elation       ' ' Euphoria      '
 ' Contentment   ' ' Serenity      ' ' Gratitude     ' ' Hope          '
 ' Empowerment   ' ' Compassion    ' ' Tenderness    ' ' Arousal       '
 ' Enthusiasm    ' ' Fulfillment  ' ' Reverence     ' ' Compassion'
 ' Fulfillment   ' ' Reverence ' ' Elation   ' ' Despair         '
 ' Grief           ' ' Loneliness      ' ' Jealousy        '
 ' Resentment      ' ' Frustration     ' ' Boredom         '
 ' Anxiety         ' ' Intimidation    

Unnamed: 0,Text,Sentiment,Clean_Text
0,Enjoying a beautiful day at the park! ...,Positive,enjoying a beautiful day at the park
1,Traffic was terrible this morning. ...,Negative,traffic was terrible this morning
2,Just finished an amazing workout! ðŸ’ª ...,Positive,just finished an amazing workout
3,Excited about the upcoming weekend getaway! ...,Positive,excited about the upcoming weekend getaway
4,Trying out a new recipe for dinner tonight. ...,Neutral,trying out a new recipe for dinner tonight
5,Feeling grateful for the little things in lif...,Positive,feeling grateful for the little things in life
6,Rainy days call for cozy blankets and hot coc...,Positive,rainy days call for cozy blankets and hot cocoa
7,The new movie release is a must-watch! ...,Positive,the new movie release is a mustwatch
8,Political discussions heating up on the timel...,Negative,political discussions heating up on the timeline
9,Missing summer vibes and beach days. ...,Neutral,missing summer vibes and beach days


In [3]:
#Clean Sentiment labels (remove spaces and unify similar labels)
data['Sentiment'] = data['Sentiment'].str.strip().str.title()

#Map all emotion labels to 3 main categories
positive_words = [
    'Positive', 'Happiness', 'Joy', 'Love', 'Gratitude', 'Hope', 'Pride',
    'Confidence', 'Excitement', 'Admiration', 'Affection', 'Contentment',
    'Serenity', 'Calmness', 'Amusement', 'Euphoria', 'Success', 'Enthusiasm',
    'Kindness', 'Motivation', 'Satisfaction', 'Optimism', 'Delight'
]

negative_words = [
    'Negative', 'Anger', 'Fear', 'Sadness', 'Disgust', 'Despair', 'Grief',
    'Loneliness', 'Frustration', 'Anxiety', 'Regret', 'Betrayal', 'Suffering',
    'Jealousy', 'Hate', 'Boredom', 'Depression', 'Loss', 'Desolation',
    'Heartbreak', 'Melancholy'
]

neutral_words = [
    'Neutral', 'Curiosity', 'Wonder', 'Reflection', 'Contemplation',
    'Observation', 'Indifference', 'Calm', 'Acceptance', 'Ambivalence'
]

def simplify_sentiment(sentiment):
    if sentiment in positive_words:
        return 'Positive'
    elif sentiment in negative_words:
        return 'Negative'
    else:
        return 'Neutral'

data['Simplified_Sentiment'] = data['Sentiment'].apply(simplify_sentiment)

#Check value distribution
data['Simplified_Sentiment'].value_counts()

Simplified_Sentiment
Neutral     407
Positive    233
Negative     92
Name: count, dtype: int64

In [4]:
#Import libraries for model training
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

#Split the data
X = data['Clean_Text']
y = data['Simplified_Sentiment']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

#Convert text into TF-IDF features
tfidf = TfidfVectorizer(max_features=5000, ngram_range=(1,2), stop_words='english')
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

#Train a Logistic Regression model
model = LogisticRegression(max_iter=1000, class_weight='balanced')
model.fit(X_train_tfidf, y_train)

#Predictions and evaluation
y_pred = model.predict(X_test_tfidf)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.7142857142857143

Classification Report:
               precision    recall  f1-score   support

    Negative       0.61      0.61      0.61        18
     Neutral       0.74      0.78      0.76        82
    Positive       0.70      0.64      0.67        47

    accuracy                           0.71       147
   macro avg       0.68      0.68      0.68       147
weighted avg       0.71      0.71      0.71       147


Confusion Matrix:
 [[11  5  2]
 [ 7 64 11]
 [ 0 17 30]]


In [5]:
import gradio as gr

#Define prediction function
def predict_sentiment(text):
    clean_text = clean_text = re.sub(r"http\S+", "", text.lower())
    clean_text = re.sub(r"[^a-z\s]", "", clean_text)
    clean_text = re.sub(r"\s+", " ", clean_text).strip()
    vectorized_text = tfidf.transform([clean_text])
    prediction = model.predict(vectorized_text)[0]
    probabilities = model.predict_proba(vectorized_text)[0]
    confidence = max(probabilities) * 100
    return f"Predicted Sentiment: {prediction} (Confidence: {confidence:.2f}%)"

#Create Gradio Interface
interface = gr.Interface(
    fn=predict_sentiment,
    inputs=gr.Textbox(lines=3, placeholder="Enter a social media post..."),
    outputs="text",
    title="Sentiment Analysis of Social Media Posts",
    description="This model predicts whether a post expresses a Positive, Negative, or Neutral sentiment based on trained data."
)

interface.launch(share=True)

--------


Running on local URL:  http://127.0.0.1:7860

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


