In [96]:
import random
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB, BernoulliNB, GaussianNB, CategoricalNB

In [97]:
df = pd.read_csv('SocialMedia_EmotionalState.csv')

In [98]:
df

Unnamed: 0,User_ID,Age,Gender,Platform,Daily_Usage_Time (minutes),Posts_Per_Day,Likes_Received_Per_Day,Comments_Received_Per_Day,Messages_Sent_Per_Day,Dominant_Emotion
0,1,25,Female,Instagram,120.0,3.0,45.0,10.0,12.0,Happiness
1,2,30,Male,Twitter,90.0,5.0,20.0,25.0,30.0,Anger
2,3,22,Non-binary,Facebook,60.0,2.0,15.0,5.0,20.0,Neutral
3,4,28,Female,Instagram,200.0,8.0,100.0,30.0,50.0,Anxiety
4,5,33,Male,LinkedIn,45.0,1.0,5.0,2.0,10.0,Boredom
...,...,...,...,...,...,...,...,...,...,...
996,996,33,Non-binary,Twitter,85.0,4.0,35.0,18.0,18.0,Boredom
997,997,22,Female,Facebook,70.0,1.0,14.0,6.0,10.0,Neutral
998,998,35,Male,Whatsapp,110.0,3.0,50.0,25.0,25.0,Happiness
999,999,28,Non-binary,Telegram,60.0,2.0,18.0,8.0,18.0,Anger


In [99]:
df.shape

(1001, 10)

In [100]:
df = df.dropna()

In [101]:
df = df.drop(columns=['User_ID'])

In [102]:
df.describe()

Unnamed: 0,Daily_Usage_Time (minutes),Posts_Per_Day,Likes_Received_Per_Day,Comments_Received_Per_Day,Messages_Sent_Per_Day
count,1000.0,1000.0,1000.0,1000.0,1000.0
mean,95.95,3.321,39.898,15.611,22.56
std,38.850442,1.914582,26.393867,8.819493,8.516274
min,40.0,1.0,5.0,2.0,8.0
25%,65.0,2.0,20.0,8.0,17.75
50%,85.0,3.0,33.0,14.0,22.0
75%,120.0,4.0,55.0,22.0,28.0
max,200.0,8.0,110.0,40.0,50.0


In [103]:
df.head()

Unnamed: 0,Age,Gender,Platform,Daily_Usage_Time (minutes),Posts_Per_Day,Likes_Received_Per_Day,Comments_Received_Per_Day,Messages_Sent_Per_Day,Dominant_Emotion
0,25,Female,Instagram,120.0,3.0,45.0,10.0,12.0,Happiness
1,30,Male,Twitter,90.0,5.0,20.0,25.0,30.0,Anger
2,22,Non-binary,Facebook,60.0,2.0,15.0,5.0,20.0,Neutral
3,28,Female,Instagram,200.0,8.0,100.0,30.0,50.0,Anxiety
4,33,Male,LinkedIn,45.0,1.0,5.0,2.0,10.0,Boredom


In [104]:
# convert the gender, platform, and dominant emotion values to float
df['Gender'] = df['Gender'].map({'Male': 0, 'Female': 1, 'Non-binary': 2}) #Male=0, Female=1, Non-binary=2
df['Platform'] = df['Platform'].map({'Instagram': 0, 'Twitter': 1, 'Facebook': 2, 'LinkedIn': 3, 'Snapchat': 4, 'Whatsapp': 5, 'Telegram': 6}) #Instagram=0, Twitter=1, Facebook=2, LinkedIn=3, Snapchat=4, Whatsapp=5, Telegram=6
df['Dominant_Emotion'] = df['Dominant_Emotion'].map({'Happiness': 0, 'Sadness': 1, 'Anger': 2, 'Anxiety': 3, 'Boredom': 4, 'Neutral': 5}) #Happiness=0, Sadness=1, Anger=2, Anxiety=3, Boredom=4, Neutral=5
df.head()

Unnamed: 0,Age,Gender,Platform,Daily_Usage_Time (minutes),Posts_Per_Day,Likes_Received_Per_Day,Comments_Received_Per_Day,Messages_Sent_Per_Day,Dominant_Emotion
0,25,1.0,0,120.0,3.0,45.0,10.0,12.0,0
1,30,0.0,1,90.0,5.0,20.0,25.0,30.0,2
2,22,2.0,2,60.0,2.0,15.0,5.0,20.0,5
3,28,1.0,0,200.0,8.0,100.0,30.0,50.0,3
4,33,0.0,3,45.0,1.0,5.0,2.0,10.0,4


In [105]:
df[['Platform', 'Daily_Usage_Time (minutes)','Dominant_Emotion']].corr(method='pearson')

Unnamed: 0,Platform,Daily_Usage_Time (minutes),Dominant_Emotion
Platform,1.0,-0.538444,0.370654
Daily_Usage_Time (minutes),-0.538444,1.0,-0.568772
Dominant_Emotion,0.370654,-0.568772,1.0
