In [None]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split
import neattext.functions as nfx
import plotly.express as plx
from sklearn.metrics import classification_report
import keras
from keras.layers import Embedding,Dense,LSTM,GlobalMaxPooling1D,Input
from keras.callbacks import EarlyStopping,ReduceLROnPlateau
from keras.models import Sequential
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from tqdm import tqdm

In [None]:
data=pd.read_csv('Suicide_Detection.csv')
data.head()

In [None]:
data['class'].value_counts()

In [None]:
data['class'].value_counts().index.values

In [None]:
train_data,test_data=train_test_split(data,test_size=0.2,random_state=10)

In [None]:
train_data['class'].value_counts().index.values

# **Data Visualisation**

In [None]:
# plx.bar(train_data,x=train_data['class'].value_counts().index.values,
#         y=train_data['class'].value_counts(),color=['Suicide','Not Suicide'])

# **Data Cleaning**

In [None]:
def clean_text(text):
    text_length=[]
    cleaned_text=[]
    for sent in tqdm(text):
        sent=sent.lower()
        sent=nfx.remove_special_characters(sent)
        sent=nfx.remove_stopwords(sent)
        text_length.append(len(sent.split()))
        cleaned_text.append(sent)
    return cleaned_text,text_length

In [None]:
cleaned_train_text,train_text_length=clean_text(train_data.text)
cleaned_test_text,test_text_length=clean_text(test_data.text)

In [None]:
tokenizer=Tokenizer()
tokenizer.fit_on_texts(cleaned_train_text)

In [None]:
# cleaned_train_text

In [None]:
train_text_seq=tokenizer.texts_to_sequences(cleaned_train_text)
train_text_pad=pad_sequences(train_text_seq,maxlen=50)


test_text_seq=tokenizer.texts_to_sequences(cleaned_test_text)
test_text_pad=pad_sequences(test_text_seq,maxlen=50)

In [None]:
train_text_pad

# **Glove Embeddings**

In [None]:
lbl_target=LabelEncoder()
train_output=lbl_target.fit_transform(train_data['class'])
test_output=lbl_target.transform(test_data['class'])

In [None]:
import pickle
with open('glove.840B.300d.pkl', 'rb') as fp:
    glove_embedding = pickle.load(fp)

In [None]:
v=len(tokenizer.word_index)

embedding_matrix=np.zeros((v+1,300), dtype=float)
for word,idx in tokenizer.word_index.items():
    embedding_vector=glove_embedding.get(word)
    if embedding_vector is not None:
        embedding_matrix[idx]=embedding_vector

In [None]:
embedding_matrix

In [None]:
early_stop=EarlyStopping(patience=5)
reducelr=ReduceLROnPlateau(patience=3)

# **Keras Sequential Model Construction**

In [None]:
model=Sequential()
model.add(Input(shape=(40,)))
model.add(Embedding(v+1,output_dim=300,embeddings_initializer=tf.keras.initializers.Constant(embedding_matrix),trainable=False))
model.add(LSTM(20,return_sequences=True))
model.add(GlobalMaxPooling1D())
model.add(Dense(256,activation='relu'))
model.add(Dense(1,activation='sigmoid'))
model.compile(optimizer=keras.optimizers.SGD(0.1,momentum=0.09),loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
model.summary()

# **Model Training and Evaluation**

In [None]:
r=model.fit(train_text_pad,train_output,validation_data=(test_text_pad,test_output),
            epochs=20,batch_size=256,callbacks=[early_stop,reducelr])

In [None]:
print('TESTING DATA CLASSIFICATION REPORT \n \n')
print(classification_report(test_output,(model.predict(test_text_pad) > 0.5).astype('int32'),
                            target_names=lbl_target.inverse_transform([0,1])))

print('TRAINING DATA CLASSIFICATION REPORT \n \n')
print(classification_report(test_output,(model.predict(test_text_pad) > 0.5).astype('int32'),
                            target_names=lbl_target.inverse_transform([0,1])))

In [None]:
twt = ['i am happy']
twt = tokenizer.texts_to_sequences(twt)
twt = pad_sequences(twt, maxlen=50)

prediction = model.predict(twt)[0][0]
print(prediction)

if(prediction > 0.5):
    print("Potential Suicide Post")
else:
    print("Non Suicide Post")

In [None]:
pickle.dump(tokenizer, open('tokenizer.pkl', 'wb'))

In [None]:
model.save("model.h5")

In [None]:
token_form = pickle.load(open('tokenizer.pkl', 'rb'))

In [None]:
from keras.models import load_model

In [None]:
model_form = load_model("model.h5")

In [None]:

twt = ['Through these past years thoughts of suicide, fear, anxiety Iâ€™m so close to my limit']
twt = token_form.texts_to_sequences(twt)
twt = pad_sequences(twt, maxlen=50)


prediction = model_form.predict(twt)[0][0]
print(prediction)

if(prediction > 0.5):
    print("Potential Suicide Post")
elif (prediction == 1):
    print("Non Suicide Post")