In [1]:
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df_suicide_detection = pd.read_csv('../../data/prepared/prepared_2.csv').drop('Unnamed: 0',axis=1)
df_suicide_detection['corpus'] = df_suicide_detection['corpus'].apply(lambda x: x[1:-1].replace("'", "").split(', '))

In [3]:
nltk.download('vader_lexicon')

sid = SentimentIntensityAnalyzer()

def sentiment_analysis(text):
    text = ' '.join(text)
    sentiment = sid.polarity_scores(text)
    return sentiment

df_suicide_detection['sentiment'] = df_suicide_detection['corpus'].apply(sentiment_analysis)

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [4]:
grouped = df_suicide_detection.groupby(df_suicide_detection['class'])['sentiment'].apply(list).reset_index()

df_sentiment = pd.concat([
    pd.DataFrame(grouped['sentiment'].iloc[0]).mean(),
    pd.DataFrame(grouped['sentiment'].iloc[1]).mean()],
axis=1,
)
df_sentiment.columns=['non_suicide','suicide']
df_sentiment

Unnamed: 0,non_suicide,suicide
neg,0.143912,0.214827
neu,0.679522,0.62098
pos,0.176567,0.16419
compound,0.069729,-0.247926


On average, the 'suicide' category has a higher negative sentiment (0.214) than the 'non_suicide' category (0.143).   
The positive indices are similar.    
Compound is the overall sentiment value. It is lower in 'suicide' (-0.247) than in 'non_suicide' (0.069).  
Overall sentiment context in terms of 'suicide' is more inclined toward negativity compared to 'non_suicide'.

Verification of extreme cases

In [5]:
extremely_negative = df_suicide_detection[df_suicide_detection['sentiment'].apply(lambda x: x['compound'] < -0.999)]

print("Extremely negative:")
print(extremely_negative[['corpus', 'class']])

Extremely negative:
                                                   corpus  class
349     [want, end, myselfsad, pain, sad, pain, sad, p...      1
3457    [meow, cat, unmut, zoom, fuck, fuck, fuck, fuc...      0
4851    [fuck, mess, much, time, nsfw, due, languageco...      1
5130    [fuck, ghod, go, shtit, abnd, cum, nmi, teache...      0
8055    [easier, kill, traumai, wan, fuck, km, bad, ha...      1
10313   [turn, soon, fuckkmg, terrifi, might, attract,...      1
11816   [predict, feel, shit, fuck, shit, fuck, shit, ...      0
25015   [unwant, unwant, unwant, unlov, unlov, cure, c...      1
30226   [hurtshurt, hurt, hurt, hurt, hurt, hurt, hurt...      1
39243   [exist, fuck, nightmareeveri, fuck, time, some...      1
39309   [fuck, brice, maddock, fuck, brice, maddock, f...      0
42334   [fuck, fuck, fuck, fuck, fuckfuck, fuck, fuck,...      1
43714   [need, let, anger, fuck, fuck, fuck, shit, fuc...      0
45389   [kill, pleasejust, kill, kill, kill, kill, kil...      1
48111

In [6]:
extremely_positive = df_suicide_detection[df_suicide_detection['sentiment'].apply(lambda x: x['compound'] > 0.999)]

print("Extremely positive:")
print(extremely_positive[['corpus', 'class']])

Extremely positive:
                                                   corpus  class
4182    [talk, mei, need, need, help, need, help, need...      1
15959   [read, love, love, love, love, love, love, lov...      0
47127   [your, ever, sad, rememb, your, world, glam, p...      0
51224   [remind, kid, eat, rich, eat, rich, eat, rich,...      0
100524  [know, yall, struggl, wow, wow, wow, wow, wow,...      0
102339  [unpopular, opinion, peopl, alway, think, life...      0
128857  [good, day, pleas, thank, good, day, good, day...      0
155407  [school, motto, kinda, cring, care, share, dar...      0
170694  [copypasta, six, pardon, grammar, nasti, cough...      0
172183  [need, helpi, need, help, need, help, need, he...      1
173293  [pleas, help, mei, need, help, need, help, nee...      1
177049  [mf, like, mf, like, mf, like, mf, like, mf, l...      0
178533  [love, love, love, love, love, love, love, lov...      0
214433  [pleas, help, mepleas, help, pleas, help, plea...      1


Extremely positive cases include those belonging to the "suicide" class containing cries for help.  
Sentiment correction for the word "help" nad "please" is necessary.

Adjusted

In [11]:
def adjust_sentiment(text, sentiment):
    if 'help' in text or 'pleas' in text and sentiment>0:
        return sentiment-0.3 
    return sentiment 

def sentiment_analysis(text):
    text = " ".join(text)
    sentiment = sid.polarity_scores(text)
    if 'help' in text or 'pleas' in text and sentiment>0:
        sentiment['compound'] -= 0.3 
    return sentiment

df_suicide_detection['adjusted_sentiment'] = df_suicide_detection.apply(lambda row: adjust_sentiment(row['corpus'], row['sentiment']['compound']), axis=1)

In [12]:
grouped = df_suicide_detection.groupby(df_suicide_detection['class'])['adjusted_sentiment'].apply(list).reset_index()

df_sentiment = pd.concat([
    pd.DataFrame(grouped['adjusted_sentiment'].iloc[0]).mean(),
    pd.DataFrame(grouped['adjusted_sentiment'].iloc[1]).mean()],
axis=1,
)
df_sentiment.columns=['non_suicide','suicide']
df_sentiment

Unnamed: 0,non_suicide,suicide
0,0.042554,-0.327974


In [13]:
extremely_positive = df_suicide_detection[df_suicide_detection['adjusted_sentiment'].apply(lambda x: x > 0.999)]

print("Extremely positive:")
print(extremely_positive[['corpus', 'class']])

Extremely positive:
                                                   corpus  class
15959   [read, love, love, love, love, love, love, lov...      0
47127   [your, ever, sad, rememb, your, world, glam, p...      0
51224   [remind, kid, eat, rich, eat, rich, eat, rich,...      0
100524  [know, yall, struggl, wow, wow, wow, wow, wow,...      0
102339  [unpopular, opinion, peopl, alway, think, life...      0
155407  [school, motto, kinda, cring, care, share, dar...      0
170694  [copypasta, six, pardon, grammar, nasti, cough...      0
177049  [mf, like, mf, like, mf, like, mf, like, mf, l...      0
178533  [love, love, love, love, love, love, love, lov...      0


Length correlation

In [14]:
correlation = pd.Series([len(x) for x in df_suicide_detection['corpus']]).corr(df_suicide_detection['adjusted_sentiment'])
print(f"Correlation between text length and sentiment: {correlation}")

Correlation between text length and sentiment: -0.17658700948395203


The correlation of -0.1765 suggests a weak but negative relationship between text length and sentiment.