In [2]:
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.7.1
  Using cached https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[38;5;2m[+] Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [1]:
import pandas as pd
import spacy
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder

In [8]:
# Loading the dataset
df = pd.read_csv('../Stress_Dataset_518rows.csv')
df

Unnamed: 0,Response,Stress label
0,"I have a presentation tomorrow, and I can't st...",Stress
1,"My partner is always criticizing me, and it's ...",Stress
2,I feel like I'm drowning in work. No matter ho...,Stress
3,"I keep making mistakes at work, and it's makin...",Stress
4,I'm so overwhelmed with all my responsibilitie...,Stress
...,...,...
512,Just found out my favorite book is being adapt...,Normal
513,Successfully completed a challenging puzzle. T...,Normal
514,Booked tickets for a weekend getaway to the mo...,Normal
515,Finished reading a captivating novel. The imme...,Normal


In [9]:
# Instantiating the english module
nlp = spacy.load('en_core_web_sm')
nlp

<spacy.lang.en.English at 0x21085e0f650>

In [10]:
# Creating the pipeline sentensizer to read the punctuations in the text
sbd = nlp.create_pipe("sentencizer")

In [11]:
# Adding the component to the pipeline
nlp.add_pipe("sentencizer")

<spacy.pipeline.sentencizer.Sentencizer at 0x21086235490>

In [12]:
# Sample text
text="I'm going through some things with my feelings and myself. I barely sleep and I do nothing but think about how I'm worthless and how I shouldn't be here. I've never tried or contemplated suicide. I've always wanted to fix my issues, but I never get around to it. How can I change my feeling of being worthless to everyone?"

In [13]:
# Creating a text object to carry out the tokens
doc = nlp(text)

In [14]:
# Creating and updating list of tokens using list comprehession / tokenization
tokens = [token for token in doc.sents]
print(tokens)

[I'm going through some things with my feelings and myself., I barely sleep and I do nothing but think about how I'm worthless and how I shouldn't be here., I've never tried or contemplated suicide., I've always wanted to fix my issues, but I never get around to it., How can I change my feeling of being worthless to everyone?]


In [15]:
# Creating and updating our list of tokens using list comprehension / lemmitization
tokens = [[token.text,token.lemma_] for token in doc]
print(tokens)

[['I', 'I'], ["'m", 'be'], ['going', 'go'], ['through', 'through'], ['some', 'some'], ['things', 'thing'], ['with', 'with'], ['my', 'my'], ['feelings', 'feeling'], ['and', 'and'], ['myself', 'myself'], ['.', '.'], ['I', 'I'], ['barely', 'barely'], ['sleep', 'sleep'], ['and', 'and'], ['I', 'I'], ['do', 'do'], ['nothing', 'nothing'], ['but', 'but'], ['think', 'think'], ['about', 'about'], ['how', 'how'], ['I', 'I'], ["'m", 'be'], ['worthless', 'worthless'], ['and', 'and'], ['how', 'how'], ['I', 'I'], ['should', 'should'], ["n't", 'not'], ['be', 'be'], ['here', 'here'], ['.', '.'], ['I', 'I'], ["'ve", 'have'], ['never', 'never'], ['tried', 'try'], ['or', 'or'], ['contemplated', 'contemplate'], ['suicide', 'suicide'], ['.', '.'], ['I', 'I'], ["'ve", 'have'], ['always', 'always'], ['wanted', 'want'], ['to', 'to'], ['fix', 'fix'], ['my', 'my'], ['issues', 'issue'], [',', ','], ['but', 'but'], ['I', 'I'], ['never', 'never'], ['get', 'get'], ['around', 'around'], ['to', 'to'], ['it', 'it'], ['

In [16]:
# Removing stop words from the words list
from spacy.lang.en.stop_words import STOP_WORDS
stop = STOP_WORDS
print(stop)

{'something', 'elsewhere', 'bottom', 'call', 'what', 'whereas', 'regarding', 'we', 'becomes', 'if', 'anyone', 'you', 'hereupon', 'however', 'became', 'seems', 'two', 'not', '’ll', 'same', 'first', 'which', 'them', 'thru', 'from', '‘s', 'noone', 'up', 'than', 'so', 'eleven', 'before', 'everything', 'whenever', 'four', 'herein', 'get', 'three', 'the', 'formerly', 'whole', 'must', 'made', 'indeed', 'one', 'whose', 'though', 'itself', 'via', 'these', 'five', 'else', 'yet', 'has', 'several', 'amongst', 'beforehand', 'ourselves', 'just', 'upon', 'even', 'please', 'once', 'your', "'ll", 'anything', 'whereupon', 'between', 'whether', 'next', 'rather', 'do', 'somewhere', 'take', '‘m', 'perhaps', 'name', 'due', 'are', 'besides', 'being', 'twelve', 'some', 'could', '‘ve', 'there', 'many', 'therefore', 'yourselves', 'where', 'across', 'now', 'others', 'still', 'on', 'thus', 'say', 'quite', 'had', 'hers', 'someone', 'unless', 'below', 'onto', 'fifty', 'former', '’s', "'d", 'enough', 'among', 'towar

In [17]:
# 
filtered = [token.text for token in doc if token.is_stop == False and       
token.text.isalpha() == True]

print(filtered)

['going', 'things', 'feelings', 'barely', 'sleep', 'think', 'worthless', 'tried', 'contemplated', 'suicide', 'wanted', 'fix', 'issues', 'change', 'feeling', 'worthless']
