In [91]:
from nrclex import NRCLex

import pandas as pd
import numpy as np
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
import joblib
from pandarallel import pandarallel

from sklearn import utils
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.linear_model import LogisticRegression
from sklearn import model_selection, naive_bayes, svm
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report


from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
stopwords = stopwords.words('english')

from gensim.models.doc2vec import Doc2Vec, TaggedDocument

In [92]:
%run COMBINED.ipynb

In [93]:
df = pd.read_pickle("../02_Behavioral/02_behavioral_dataset.pkl")

In [94]:
df.head()

Unnamed: 0,narrative,label,behavioral
5,\nI (20 F) have been abused by different peopl...,unwell,abused very young
6,I grew up with my dad laying on top of me when...,unwell,grew laying woke started continues continued c...
7,He would call me mommy and ask me to come wipe...,unwell,call ask come wipe
9,I never did anything when he said those things...,unwell,never did said away stayed
10,\n\nWhen I was in seventh grade I became depre...,unwell,became started


In [95]:
dic = get_sentiment_breakdown("I am not very sad and very angry.")

In [96]:
dic

{'fear': 0.0,
 'anger': 0.3333333333333333,
 'anticip': 0.0,
 'trust': 0.0,
 'surprise': 0.0,
 'positive': 0.0,
 'negative': 0.3333333333333333,
 'sadness': 0.0,
 'disgust': 0.3333333333333333,
 'joy': 0.0}

In [97]:
df["emotional"] = df["narrative"].apply(lambda x: get_sentiment_breakdown(x))

In [98]:
df.head()

Unnamed: 0,narrative,label,behavioral,emotional
5,\nI (20 F) have been abused by different peopl...,unwell,abused very young,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't..."
6,I grew up with my dad laying on top of me when...,unwell,grew laying woke started continues continued c...,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't..."
7,He would call me mommy and ask me to come wipe...,unwell,call ask come wipe,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't..."
9,I never did anything when he said those things...,unwell,never did said away stayed,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't..."
10,\n\nWhen I was in seventh grade I became depre...,unwell,became started,"{'fear': 0.25, 'anger': 0.25, 'anticip': 0.0, ..."


In [99]:
df.to_pickle("03_emotional_dataset.pkl")

In [100]:
emotional_df = pd.DataFrame(columns=["negative", "positive", "fear", "anger", "trust", "sadness", "disgust", "anticip", "surprise", "joy"])

In [101]:
emotional_df

Unnamed: 0,negative,positive,fear,anger,trust,sadness,disgust,anticip,surprise,joy


In [102]:
for key, value in df.emotional.iteritems():
    emotional_df = emotional_df.append(value, ignore_index=True)

In [103]:
emotional_df

Unnamed: 0,negative,positive,fear,anger,trust,sadness,disgust,anticip,surprise,joy,anticipation
0,0.00,0.250000,0.00,0.00,0.000000,0.00,0.0,0.0,0.25,0.25,0.250000
1,0.00,0.333333,0.00,0.00,0.333333,0.00,0.0,0.0,0.00,0.00,0.333333
2,0.00,0.000000,0.00,0.00,0.000000,0.00,0.0,0.0,0.00,0.00,
3,0.00,0.000000,0.00,0.00,0.000000,0.00,0.0,0.0,0.00,0.00,
4,0.25,0.000000,0.25,0.25,0.000000,0.25,0.0,0.0,0.00,0.00,
...,...,...,...,...,...,...,...,...,...,...,...
4747,0.00,0.000000,0.00,0.00,0.000000,0.00,0.0,0.0,0.00,0.00,
4748,0.00,0.200000,0.20,0.20,0.200000,0.00,0.0,0.0,0.00,0.20,
4749,0.00,0.250000,0.00,0.00,0.250000,0.00,0.0,0.0,0.00,0.25,0.250000
4750,0.00,0.000000,0.00,0.00,0.000000,0.00,0.0,0.0,0.00,0.00,


In [104]:
emotional_df = emotional_df.fillna(0)

In [105]:
emotional_df["array"] = emotional_df.values.tolist()

In [107]:
emotional_df.head()

Unnamed: 0,negative,positive,fear,anger,trust,sadness,disgust,anticip,surprise,joy,anticipation,array
0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.25,0.25,"[0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25..."
1,0.0,0.333333,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.333333,"[0.0, 0.3333333333333333, 0.0, 0.0, 0.33333333..."
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,0.25,0.0,0.25,0.25,0.0,0.25,0.0,0.0,0.0,0.0,0.0,"[0.25, 0.0, 0.25, 0.25, 0.0, 0.25, 0.0, 0.0, 0..."


In [109]:
df2 = pd.DataFrame(df.label).reset_index(drop=True)
df2.head()

Unnamed: 0,label
0,unwell
1,unwell
2,unwell
3,unwell
4,unwell


In [115]:
emotional_df = pd.concat([emotional_df, df2], axis=1)

In [116]:
emotional_df.to_pickle("emotional_training_set.pkl")

In [139]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(emotional_df["array"], emotional_df["label"], test_size=0.3, random_state=42)

In [141]:
X_train = X_train.to_list()
X_test = X_test.to_list()

In [145]:
SVM = svm.SVC(C=1.0, kernel='linear', degree=3, gamma='auto')
SVM.fit(X_train,y_train)

y_pred = SVM.predict(X_test)

In [146]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

      unwell       0.83      0.57      0.67       718
        well       0.67      0.88      0.76       708

    accuracy                           0.72      1426
   macro avg       0.75      0.72      0.72      1426
weighted avg       0.75      0.72      0.72      1426



In [147]:
joblib.dump(SVM, '03_svm_model.pkl', compress=9)

['03_svm_model.pkl']

In [151]:
final_dataset = pd.concat([df.reset_index(), emotional_df.array], axis=1)

In [152]:
final_dataset.to_pickle("FINAL_DATASET.pkl")