In [39]:
## warnings
import warnings
warnings.filterwarnings("ignore")

## for data
import numpy as np
import pandas as pd

## for plotting
import matplotlib.pyplot as plt
import seaborn as sns

## Bag of Words
from sklearn.feature_extraction.text import CountVectorizer

## TF-IDF 
from sklearn.feature_extraction.text import TfidfVectorizer

## Train-Test Split
from sklearn.model_selection import train_test_split

## for processing
import nltk
import re
import ftfy
from nltk.stem import WordNetLemmatizer 
from nltk.corpus import stopwords
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

## Feature selection
from sklearn import feature_selection

## Support vector machine
from sklearn.pipeline import Pipeline
import sklearn.metrics as skm
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.svm import SVC

## for saving and loading model
import pickle

## for word embedding with Spacy
import spacy
import en_core_web_lg


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\otaku7\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\otaku7\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\otaku7\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\otaku7\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [40]:
# Expand Contraction
cList = {
  "ain't": "am not",
  "aren't": "are not",
  "can't": "cannot",
  "can't've": "cannot have",
  "'cause": "because",
  "could've": "could have",
  "couldn't": "could not",
  "couldn't've": "could not have",
  "didn't": "did not",
  "doesn't": "does not",
  "don't": "do not",
  "hadn't": "had not",
  "hadn't've": "had not have",
  "hasn't": "has not",
  "haven't": "have not",
  "he'd": "he would",
  "he'd've": "he would have",
  "he'll": "he will",
  "he'll've": "he will have",
  "he's": "he is",
  "how'd": "how did",
  "how'd'y": "how do you",
  "how'll": "how will",
  "how's": "how is",
  "I'd": "I would",
  "I'd've": "I would have",
  "I'll": "I will",
  "I'll've": "I will have",
  "I'm": "I am",
  "I've": "I have",
  "isn't": "is not",
  "it'd": "it had",
  "it'd've": "it would have",
  "it'll": "it will",
  "it'll've": "it will have",
  "it's": "it is",
  "let's": "let us",
  "ma'am": "madam",
  "mayn't": "may not",
  "might've": "might have",
  "mightn't": "might not",
  "mightn't've": "might not have",
  "must've": "must have",
  "mustn't": "must not",
  "mustn't've": "must not have",
  "needn't": "need not",
  "needn't've": "need not have",
  "o'clock": "of the clock",
  "oughtn't": "ought not",
  "oughtn't've": "ought not have",
  "shan't": "shall not",
  "sha'n't": "shall not",
  "shan't've": "shall not have",
  "she'd": "she would",
  "she'd've": "she would have",
  "she'll": "she will",
  "she'll've": "she will have",
  "she's": "she is",
  "should've": "should have",
  "shouldn't": "should not",
  "shouldn't've": "should not have",
  "so've": "so have",
  "so's": "so is",
  "that'd": "that would",
  "that'd've": "that would have",
  "that's": "that is",
  "there'd": "there had",
  "there'd've": "there would have",
  "there's": "there is",
  "they'd": "they would",
  "they'd've": "they would have",
  "they'll": "they will",
  "they'll've": "they will have",
  "they're": "they are",
  "they've": "they have",
  "to've": "to have",
  "wasn't": "was not",
  "we'd": "we had",
  "we'd've": "we would have",
  "we'll": "we will",
  "we'll've": "we will have",
  "we're": "we are",
  "we've": "we have",
  "weren't": "were not",
  "what'll": "what will",
  "what'll've": "what will have",
  "what're": "what are",
  "what's": "what is",
  "what've": "what have",
  "when's": "when is",
  "when've": "when have",
  "where'd": "where did",
  "where's": "where is",
  "where've": "where have",
  "who'll": "who will",
  "who'll've": "who will have",
  "who's": "who is",
  "who've": "who have",
  "why's": "why is",
  "why've": "why have",
  "will've": "will have",
  "won't": "will not",
  "won't've": "will not have",
  "would've": "would have",
  "wouldn't": "would not",
  "wouldn't've": "would not have",
  "y'all": "you all",
  "y'alls": "you alls",
  "y'all'd": "you all would",
  "y'all'd've": "you all would have",
  "y'all're": "you all are",
  "y'all've": "you all have",
  "you'd": "you had",
  "you'd've": "you would have",
  "you'll": "you you will",
  "you'll've": "you you will have",
  "you're": "you are",
  "you've": "you have"
}

c_re = re.compile('(%s)' % '|'.join(cList.keys()))

def expandContractions(text, c_re=c_re):
    def replace(match):
        return cList[match.group(0)]
    return c_re.sub(replace, text)

In [41]:
## Function to perform stepwise cleaning process
def text_cleaner(tweet):

    cleaned_tweets = []
    tweet = tweet.lower() #lowercase
    
    # if url links then don't append to avoid news articles
    # also check tweet length, save those > 5 
    if re.match("(\w+:\/\/\S+)", tweet) == None and len(tweet) > 5:
    
        #remove hashtag, @mention, emoji and image URLs
        tweet = ' '.join(re.sub("(@[A-Za-z0-9]+)|(\#[A-Za-z0-9]+)|(<.>)|(pic\.twitter\.com\/.*)", " ", tweet).split())

        #fix weirdly encoded texts
        tweet = ftfy.fix_text(tweet)

        #expand contraction
        tweet = expandContractions(tweet)


        #remove punctuation
        tweet = ' '.join(re.sub("([^0-9A-Za-z \t])", " ", tweet).split())

        #stop words and lemmatization
        stop_words = set(stopwords.words('english'))
        word_tokens = nltk.word_tokenize(tweet)

        lemmatizer=WordNetLemmatizer()
        filtered_sentence = [lemmatizer.lemmatize(word) for word in word_tokens if not word in stop_words]
        # back to string from list
        tweet = ' '.join(filtered_sentence) # join words with a space in between them

        cleaned_tweets.append(tweet)
        

    return cleaned_tweets
     

In [42]:
with open("SVM_model.pkl", 'rb') as file:  
    model = pickle.load(file)

model

SVC()

In [56]:
with open("LR_model.pkl", 'rb') as file:  
    lr = pickle.load(file)

lr

LogisticRegression()

In [43]:
nlp = en_core_web_lg.load()

In [44]:
st1="""
Today has been quite challenging. I woke up feeling overwhelmed by the tasks ahead. The workload at the office seems never-ending, and there's a constant pressure to meet deadlines. I find myself struggling to balance work and personal life, and it's taking a toll on my mental well-being. During lunch, I couldn't shake off the feeling of anxiety and kept replaying all the things I need to do in my mind.

In the afternoon, a meeting with a difficult client added to my stress levels. The pressure to perform well and the fear of making mistakes made my heart race. As the day progressed, I felt the weight of responsibility on my shoulders, and I couldn't stop thinking about how I would manage everything.

In the evening, I tried to unwind by going for a walk, but my mind kept racing with thoughts about the challenges I face. Despite my efforts to relax, I still feel a lingering sense of stress and worry. I hope tomorrow will be a better day
"""

In [45]:
corpus = text_cleaner(st1)

In [46]:
test = pd.np.array([pd.np.array([token.vector for token in nlp(s)]).mean(axis=0) * pd.np.ones((300)) \
                           for s in corpus])

In [57]:
model.predict(test)

array([1], dtype=int64)

In [58]:
lr.predict(test)

array([1], dtype=int64)

In [48]:
st2="""I don't have the ability to cope with it anymore. I'm trying, but a lot of things are triggering me, 
and I'm shutting down at work, just finding the place I feel safest, and staying there for an hour or two until 
I feel like I can do something again. I'm tired of watching my back, tired of traveling to places I don't feel safe, 
tired of reliving that moment, tired of being triggered, tired of the stress, tired of anxiety and knots in my stomach, 
tired of irrational thought when triggered, tired of irrational paranoia. I'm exhausted and need a break, but know it won't be 
enough until I journey the long road through therapy. I'm not suicidal at all, just wishing this pain and misery would end, to have my life back again."""


In [49]:
c2=text_cleaner(st2)

In [50]:
t2 = pd.np.array([pd.np.array([token.vector for token in nlp(s)]).mean(axis=0) * pd.np.ones((300)) \
                           for s in c2])

In [51]:
model.predict(t2)

array([1], dtype=int64)

In [60]:
lr.predict(t2)

array([1], dtype=int64)

In [52]:
nst="""In case this is the first time you're reading this post... We are looking for people who are willing 
to complete some online questionnaires about employment and well-being which we hope will help us to improve services 
for assisting people with mental health difficulties to obtain and retain employment. We are developing an employment 
questionnaire for people with personality disorders; however we are looking for people from all backgrounds to complete it. 
That means you do not need to have a diagnosis of personality disorder – you just need to have an interest in completing the 
online questionnaires. The questionnaires will only take about 10 minutes to complete online. For your participation, we’ll 
donate £1 on your behalf to a mental health charity (Young Minds: Child & Adolescent Mental Health, Mental Health Foundation, or Rethink)"""


In [53]:
nc=text_cleaner(nst)

In [54]:
t3 = pd.np.array([pd.np.array([token.vector for token in nlp(s)]).mean(axis=0) * pd.np.ones((300)) \
                           for s in nc])

In [55]:
model.predict(t3)

array([0], dtype=int64)

In [61]:
lr.predict(t3)

array([0], dtype=int64)