In [11]:
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [12]:
df_suicide_detection = pd.read_csv('prepared.csv').drop('Unnamed: 0',axis=1)
df_suicide_detection['corpus'] = df_suicide_detection['corpus'].apply(lambda x: x[1:-1].replace("'", "").split(', '))

In [13]:
nltk.download('vader_lexicon')

sid = SentimentIntensityAnalyzer()

def sentiment_analysis(text):
    text = ' '.join(text)
    sentiment = sid.polarity_scores(text)
    return sentiment

# df_suicide_detection['sentiment'] = df_suicide_detection['corpus'].apply(lambda text: ' '.join(text))
df_suicide_detection['sentiment'] = df_suicide_detection['corpus'].apply(sentiment_analysis)

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [14]:
grouped = df_suicide_detection.groupby(df_suicide_detection['class'])['sentiment'].apply(list).reset_index()

df_sentiment = pd.concat([
    pd.DataFrame(grouped['sentiment'].iloc[0]).mean(),
    pd.DataFrame(grouped['sentiment'].iloc[1]).mean()],
axis=1,
)
df_sentiment.columns=['non_suicide','suicide']
df_sentiment

Unnamed: 0,non_suicide,suicide
neg,0.137618,0.209482
neu,0.685859,0.624937
pos,0.176412,0.165581
compound,0.082155,-0.248371


W kategorii 'suicide' średnio występuje wyższy negatywny sentyment (0.209) niż w 'non_suicide' (0.137). Może to sugerować, że wpisy związane ze słowami samobójstwa mogą zawierać więcej treści o negatywnym nastroju.  
Wskaźniki nutralny i pozytywny zbliżone.  
Compound jest wartością ogólną sentymentu. W 'suicide' jest on niższy (-0.248) niż w 'non_suicide' (0.082). Może to wskazywać, że ogólny nastrojowy kontekst w kategoriach 'suicide' jest bardziej skłonny ku negatywności w porównaniu do 'non_suicide'.

Extreme opinions

In [15]:
extremely_negative = df_suicide_detection[df_suicide_detection['sentiment'].apply(lambda x: x['compound'] < -0.999)]

print("Extremely negative:")
print(extremely_negative[['corpus', 'class']])

Extremely negative:
                                                   corpus  class
349     [want, end, myselfsad, pain, sad, pain, sad, p...      1
3457    [meow, cat, unmut, zoom, fuck, fuck, fuck, fuc...      0
4851    [fuck, mess, much, time, nsfw, due, languageco...      1
5130    [fuck, ghod, go, shtit, abnd, cum, nmi, teache...      0
8055    [easier, kill, go, traumai, wan, na, fuck, km,...      1
10313   [im, turn, soon, im, fuckkmg, terrifi, might, ...      1
11816   [predict, feel, rn, shit, fuck, shit, fuck, sh...      0
25016   [unwant, unwant, unwant, unlov, unlov, cure, c...      1
30228   [hurtshurt, hurt, hurt, hurt, hurt, hurt, hurt...      1
39248   [exist, fuck, nightmareeveri, fuck, time, some...      1
39314   [fuck, brice, maddock, fuck, brice, maddock, f...      0
42340   [fuck, fuck, fuck, fuck, fuckfuck, fuck, fuck,...      1
43720   [need, let, anger, fuck, fuck, fuck, shit, fuc...      0
45395   [kill, pleasejust, kill, kill, kill, kill, kil...      1
48117

In [16]:
extremely_positive = df_suicide_detection[df_suicide_detection['sentiment'].apply(lambda x: x['compound'] > 0.999)]

print("Extremely positive:")
print(extremely_positive[['corpus', 'class']])

Extremely positive:
                                                   corpus  class
4182    [talk, mei, need, lp, need, help, need, help, ...      1
8576    [may, may, get, notic, post, anyway, care, you...      0
15960   [read, love, love, love, love, love, love, lov...      0
47133   [your, ever, sad, rememb, your, world, glam, p...      0
51231   [remind, kid, eat, rich, eat, rich, eat, rich,...      0
100538  [know, yall, struggl, wow, wow, wow, wow, wow,...      0
102353  [unpopular, opinion, peopl, alway, think, life...      0
128874  [good, day, pleas, thank, good, day, good, day...      0
155426  [school, motto, kinda, cring, care, share, dar...      0
170713  [copypasta, im, six, pardon, grammar, nasti, c...      0
172202  [need, helpi, need, help, need, help, need, he...      1
173312  [pleas, help, mei, need, help, need, help, nee...      1
177068  [mf, like, mf, like, mf, like, mf, like, mf, l...      0
178552  [love, love, love, love, love, love, love, lov...      0
20647

Adjusted

In [18]:
def adjust_sentiment(text, sentiment):
    if "help" in text:
        return -0.5  
    return sentiment 

def sentiment_analysis(text):
    text = " ".join(text)
    sentiment = sid.polarity_scores(text)
    if "help" in text:
        sentiment['compound'] -= 0.5 
    return sentiment

df_suicide_detection['adjusted_sentiment'] = df_suicide_detection.apply(lambda row: adjust_sentiment(row['corpus'], row['sentiment']['compound']), axis=1)

In [19]:
grouped = df_suicide_detection.groupby(df_suicide_detection['class'])['adjusted_sentiment'].apply(list).reset_index()

df_sentiment = pd.concat([
    pd.DataFrame(grouped['adjusted_sentiment'].iloc[0]).mean(),
    pd.DataFrame(grouped['adjusted_sentiment'].iloc[1]).mean()],
axis=1,
)
df_sentiment.columns=['non_suicide','suicide']
df_sentiment

Unnamed: 0,non_suicide,suicide
0,0.020971,-0.343747


In [20]:
extremely_positive = df_suicide_detection[df_suicide_detection['adjusted_sentiment'].apply(lambda x: x > 0.999)]

print("Extremely positive:")
print(extremely_positive[['corpus', 'class']])

Extremely positive:
                                                   corpus  class
8576    [may, may, get, notic, post, anyway, care, you...      0
15960   [read, love, love, love, love, love, love, lov...      0
47133   [your, ever, sad, rememb, your, world, glam, p...      0
51231   [remind, kid, eat, rich, eat, rich, eat, rich,...      0
100538  [know, yall, struggl, wow, wow, wow, wow, wow,...      0
102353  [unpopular, opinion, peopl, alway, think, life...      0
128874  [good, day, pleas, thank, good, day, good, day...      0
155426  [school, motto, kinda, cring, care, share, dar...      0
170713  [copypasta, im, six, pardon, grammar, nasti, c...      0
177068  [mf, like, mf, like, mf, like, mf, like, mf, l...      0
178552  [love, love, love, love, love, love, love, lov...      0
206473  [georg, floyd, moment, xd, xd, xd, xd, xd, xd,...      0


Length correlation

In [21]:
correlation = df_suicide_detection['post_length'].corr(df_suicide_detection['adjusted_sentiment'])
print(f"Correlation between text length and sentiment: {correlation}")

Correlation between text length and sentiment: -0.18553369080021795


The correlation of -0.1855 suggests a weak but negative relationship between text length and sentiment.