In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import os
import re
import numpy as np
import pandas as pd
from scipy import stats

import gensim
import json


import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer

import itertools

import keras

from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Embedding, LSTM, Input, Activation, GlobalAveragePooling1D, Flatten, Concatenate, Conv1D, MaxPooling1D
from keras.layers.normalization import BatchNormalization
from keras.layers.merge import concatenate
from keras.optimizers import SGD, RMSprop, Adagrad, Adam
from keras.preprocessing.text import one_hot, text_to_word_sequence, Tokenizer
from keras.preprocessing.sequence import pad_sequences

from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.utils.vis_utils import plot_model

import fnmatch

import warnings

import string
from pathlib import Path
from random import shuffle
from ast import literal_eval
import statistics

warnings.filterwarnings('ignore')

Using TensorFlow backend.


In [2]:
wordnet_lemmatizer = WordNetLemmatizer()

In [3]:
WINDOWS_SIZE = 10
labels=['none','mild','moderate','moderately severe', 'severe']
num_classes = len(labels)

In [4]:
def text_to_wordlist(text, remove_stopwords=True, stem_words=False):    
    # Clean the text, with the option to remove stopwords and to stem words.
    
    # Convert words to lower case and split them
    text = text.lower().split()

    # Optionally, remove stop words
    if remove_stopwords:
        stops = set(stopwords.words("english"))
        text = [wordnet_lemmatizer.lemmatize(w) for w in text if not w in stops ]
        text = [w for w in text if w != "nan" ]
    else:
        text = [wordnet_lemmatizer.lemmatize(w) for w in text]
        text = [w for w in text if w != "nan" ]
    
    text = " ".join(text)

    # Clean the text
    text = re.sub(r"[^A-Za-z0-9^,!.\/'+-=]", " ", text)
    text = re.sub(r"what's", "what is ", text)
    text = re.sub(r"\'s", " ", text)
    text = re.sub(r"\'ve", " have ", text)
    text = re.sub(r"can't", "cannot ", text)
    text = re.sub(r"n't", " not ", text)
    text = re.sub(r"i'm", "i am ", text)
    text = re.sub(r"\'re", " are ", text)
    text = re.sub(r"\'d", " would ", text)
    text = re.sub(r"\'ll", " will ", text)
    text = re.sub(r",", " ", text)
    text = re.sub(r"\.", " ", text)
    text = re.sub(r"!", " ! ", text)
    text = re.sub(r"\/", " ", text)
    text = re.sub(r"\^", " ^ ", text)
    text = re.sub(r"\+", " + ", text)
    text = re.sub(r"\-", " - ", text)
    text = re.sub(r"\=", " = ", text)
    
    text = re.sub(r"\<", " ", text)
    text = re.sub(r"\>", " ", text)
    
    text = re.sub(r"'", " ", text)
    text = re.sub(r"(\d+)(k)", r"\g<1>000", text)
    text = re.sub(r":", " : ", text)
    text = re.sub(r" e g ", " eg ", text)
    text = re.sub(r" b g ", " bg ", text)
    text = re.sub(r" u s ", " american ", text)
    text = re.sub(r"\0s", "0", text)
    text = re.sub(r" 9 11 ", "911", text)
    text = re.sub(r"e - mail", "email", text)
    text = re.sub(r"j k", "jk", text)
    text = re.sub(r"\s{2,}", " ", text)
    
    # Optionally, shorten words to their stems
    if stem_words:
        text = text.split()
        stemmer = SnowballStemmer('english')
        stemmed_words = [stemmer.stem(word) for word in text]
        text = " ".join(stemmed_words)
    
    # Return a list of words
    return(text)

In [5]:
import nltk
nltk.download('wordnet')
nltk.download('stopwords')

[nltk_data] Downloading package wordnet to /home/prahlad/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/prahlad/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [6]:
data_path = "/media/prahlad/New Volume/DIAC-WOZ/transcripts/"
#transcripts_to_dataframe(data_path) 
all_participants = pd.read_csv(data_path + 'all.csv', sep=',')

In [7]:
all_participants.columns =  ['index','personId', 'question', 'answer']
all_participants = all_participants.astype({"index": int, "personId": float, "question": str, "answer": str })

In [8]:
all_participants_mix = all_participants.copy()
all_participants_mix['answer'] = all_participants_mix.apply(lambda row: text_to_wordlist(row.answer).split(), axis=1)

In [9]:
all_participants_mix_stopwords = all_participants.copy()
all_participants_mix_stopwords['answer'] = all_participants_mix_stopwords.apply(lambda row: text_to_wordlist(row.answer, remove_stopwords=False).split(), axis=1)

In [10]:
words = [w for w in all_participants_mix['answer'].tolist()]
words = set(itertools.chain(*words))
vocab_size = len(words)
print(vocab_size)

7373


In [11]:
words_stop = [w for w in all_participants_mix_stopwords['answer'].tolist()]
words_stop = set(itertools.chain(*words_stop))
vocab_size_stop = len(words_stop)

In [12]:
windows_size = WINDOWS_SIZE
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(all_participants_mix['answer'])
tokenizer.fit_on_sequences(all_participants_mix['answer'])

all_participants_mix['t_answer'] = tokenizer.texts_to_sequences(all_participants_mix['answer'])

In [13]:
from keras.models import load_model
model = load_model('/media/prahlad/New Volume/DIAC-WOZ/model_glove_lstm_b.h5')

In [14]:
def test_model(text, model):
    word_list = text_to_wordlist(text)
    list_of_words = word_list.split(" ")
    sequences = tokenizer.texts_to_sequences([word_list])
    word_tokens = sequences[0]
    size = len(word_tokens)
    test_phrases = []
    for i in range(size):
        tokens = word_tokens[i:min(i+windows_size,size)]  
        test_phrases.append(tokens)
    sequences_input = test_phrases
    sequences_input =  pad_sequences(sequences_input, value=0, padding="post", maxlen=windows_size)
    
    predicted_classes = []
    for sequence in sequences_input:
        input_a = np.asarray([sequence])
        pred = model.predict(input_a, batch_size=None, verbose=0, steps=None)
        predicted_classes.append(np.argmax(pred))
        
    predicted_class = statistics.mode(predicted_classes)
    print(labels[predicted_class])

In [15]:
data_path = "/media/prahlad/New Volume/DIAC-WOZ/transcripts/"
#transcripts_to_dataframe(data_path) 
all_participants = pd.read_csv(data_path + 'all.csv', sep=',')

In [16]:
answers_308 = all_participants[all_participants.personId==308].answer.tolist()

In [17]:
import math
sen = ""
for answer in answers_308:
    if type(answer)==str or not math.isnan(answer):
        sen+=answer
print(sen)

 los angeles california yes um the southern california lifestyle the beaches the um active lifestyle the traffic the pretentious people um my teachers um i don't know certain teachers um got uh spiritual teachers that i um find a lot of guidance from <laughter> that's it mm i like to skateboard and and just stay active stay healthy um anything that's um outdoors and active and healthy hiking biking rollerblading skateboarding all the above mm no mm not really in a position to travel financially if i could i'm sure i would enjoy seeing all the different cultures and places and meeting all the different people and all the interesting structures and uh the food so i meditate and i skateboard well i can meditate on my board mm i meditate it helps me relax um sometimes if i don't really feel like skating hard i'll just go for a nice uh sunset skate and meditate a nice two hour skate just relax mm it's not that hard you just gotta get to the skate spots you know i usually like skating um the

In [18]:
test_model(sen, model)

severe


In [19]:
# GETTING THE QUESTIONNAIRE

In [21]:
all_participants.tail()

Unnamed: 0.1,Unnamed: 0,personId,question,answer
14968,14968,492,mm,
14969,14969,492,what are you most proud of in your life,um i am very proud of the fact that uh i don'...
14970,14970,492,okay i think i have asked everything i need to,
14971,14971,492,thanks for sharing your thoughts with me,
14972,14972,492,goodbye,bye and thank you


In [28]:
questions = list(all_participants['question'])
print(questions)

["hi i'm ellie thanks for coming in today", 'i was created to talk to people in a safe and secure environment', "think of me as a friend i don't judge i can't i'm a computer", "i'm here to learn about people and would love to learn about you", "i'll ask a few questions to get us started and please feel free to tell me anything your answers are totally confidential", 'how are you doing today', "that's good", 'where are you from originally', 'really', "why'd you move to l_a", 'how do you like l_a', 'what are some things you really like about l_a', 'how easy was it for you to get used to living in l_a', "what are some things you don't really like about l_a", 'mhm', 'okay', "what'd you study at school", 'cool', 'are you still doing that', "what's your dream job", 'awesome', 'do you travel a lot', 'why', 'okay', 'how often do you go back to your hometown', 'nice', 'do you consider yourself an introvert', 'whatever comes to your mind', 'okay', ' what do you do to relax', 'awesome', 'how are 

In [67]:
def refine_questions(questions):
    results = []
    for question in questions:
        if question in results or type(question)==float:
            continue
        if question.startswith('how'):
            results.append(question)
        elif question.startswith('what'):
            results.append(question)
        elif question.startswith('what'):
            results.append(question)
        elif question.startswith('when'):
            results.append(question)
        elif question.startswith('why'):
            results.append(question)
        elif question.startswith('where'):
            results.append(question)
        elif question.startswith('which'):
            results.append(question)
        elif question.startswith('do'):
            results.append(question)
        elif question.startswith('can'):
            results.append(question)
        elif question.startswith('may'):
            results.append(question)
        elif question.startswith('will'):
            results.append(question)
        elif question.startswith('shall'):
            results.append(question)
        elif question.startswith('have'):
            results.append(question)
        elif question.startswith('were'):
            results.append(question)
        
    return results
        
    

In [68]:
shortlisted_questions = refine_questions(questions)

In [69]:
for question in shortlisted_questions:
    print(question)

how are you doing today
where are you from originally
why'd you move to l_a
how do you like l_a
what are some things you really like about l_a
how easy was it for you to get used to living in l_a
what are some things you don't really like about l_a
what'd you study at school
what's your dream job
do you travel a lot
why
how often do you go back to your hometown
do you consider yourself an introvert
whatever comes to your mind
how are you at controlling your temper
when was the last time you argued with someone and what was it about
how did you feel in that moment
how close are you to them
how do you know them
what are some things you like to do for fun
can you tell me about that
how close are you to your family
what made you decide to do that
what's one of your most memorable experiences
what's it like for you living with them
how do you like your living situation
do you have roommates
how easy is it for you to get a good night's sleep
do you feel that way often
what are you like when 

In [70]:
print(len(shortlisted_questions))

174


In [63]:
all_lens = []
for id in range(300, 493):
    len_id = len(refine_questions(all_participants[all_participants.personId==id].question.tolist()))
    if len_id != 0:
        all_lens.append(len_id)

In [66]:
print(min(all_lens), max(all_lens))

14 47


In [71]:
file = open('/media/prahlad/New Volume2/DIAC-WOZ/Depression_Questionnaire.txt', 'r') 
all_questions = file.readlines() 
for question in all_questions:
    print(question)

How are you doing today? How are you feeling lately?

Where do you live? What are some things you really like about this place? What are some things you don't really like about this place?

What did you study at school?

Are you currently employed? If so, how's work?

What's your dream job? 

Do you travel a lot? If so, why? What do you enjoy about traveling?

Do you consider yourself an introvert? 

How would your best friend describe you?

How do you like your living situation?

Do you have roommates? If so, what's it like for you living with your roommates?

What advice would you give yourself ten or twenty years ago?

What are some things you wish you could change about yourself? 

What's something you feel guilty about?

What do you think of today's kids?

What are some things that make you really mad?

How are you at controlling your temper?

When was the last time you argued with someone and what was it about? How did you feel in that moment? How do you know them and how close a

In [72]:
print(len(all_questions))

39


In [1]:
import tensorflow as tf

model = tf.keras.models.load_model('/home/prahlad/DepDet model/model_glove_lstm_b.h5')

converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

open("depression_active_model.tflite", "wb").write(tflite_model)

ConverterError: See console for info.
/home/prahlad/.local/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:516: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
/home/prahlad/.local/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:517: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
/home/prahlad/.local/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:518: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
/home/prahlad/.local/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:519: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
/home/prahlad/.local/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:520: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
/home/prahlad/.local/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  np_resource = np.dtype([("resource", np.ubyte, 1)])
/home/prahlad/.local/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:541: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
/home/prahlad/.local/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:542: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
/home/prahlad/.local/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:543: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
/home/prahlad/.local/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:544: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
/home/prahlad/.local/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:545: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
/home/prahlad/.local/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:550: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  np_resource = np.dtype([("resource", np.ubyte, 1)])
2020-04-12 12:13:12.757657: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2020-04-12 12:13:13.654267: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2400000000 Hz
2020-04-12 12:13:13.655171: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4314e60 executing computations on platform Host. Devices:
2020-04-12 12:13:13.655281: I tensorflow/compiler/xla/service/service.cc:175]   StreamExecutor device (0): <undefined>, <undefined>
2020-04-12 12:13:13.842704: I tensorflow/lite/toco/import_tensorflow.cc:1336] Converting unsupported operation: AddV2
2020-04-12 12:13:13.842908: I tensorflow/lite/toco/import_tensorflow.cc:1336] Converting unsupported operation: TensorListFromTensor
2020-04-12 12:13:13.842975: I tensorflow/lite/toco/import_tensorflow.cc:193] Unsupported data type in placeholder op: 21
2020-04-12 12:13:13.843139: I tensorflow/lite/toco/import_tensorflow.cc:1336] Converting unsupported operation: TensorListReserve
2020-04-12 12:13:13.843191: I tensorflow/lite/toco/import_tensorflow.cc:193] Unsupported data type in placeholder op: 21
2020-04-12 12:13:13.843319: I tensorflow/lite/toco/import_tensorflow.cc:1336] Converting unsupported operation: While
2020-04-12 12:13:13.843414: I tensorflow/lite/toco/import_tensorflow.cc:193] Unsupported data type in placeholder op: 21
2020-04-12 12:13:13.843446: I tensorflow/lite/toco/import_tensorflow.cc:193] Unsupported data type in placeholder op: 21
2020-04-12 12:13:13.843500: I tensorflow/lite/toco/import_tensorflow.cc:1336] Converting unsupported operation: TensorListStack
2020-04-12 12:13:13.860259: I tensorflow/lite/toco/graph_transformations/graph_transformations.cc:39] Before Removing unused ops: 56 operators, 122 arrays (0 quantized)
2020-04-12 12:13:13.864838: I tensorflow/lite/toco/graph_transformations/graph_transformations.cc:39] Before general graph transformations: 56 operators, 122 arrays (0 quantized)
2020-04-12 12:13:13.868253: I tensorflow/lite/toco/graph_transformations/graph_transformations.cc:39] After general graph transformations pass 1: 41 operators, 96 arrays (0 quantized)
2020-04-12 12:13:13.873369: I tensorflow/lite/toco/graph_transformations/graph_transformations.cc:39] After general graph transformations pass 2: 38 operators, 93 arrays (0 quantized)
2020-04-12 12:13:13.875723: I tensorflow/lite/toco/graph_transformations/graph_transformations.cc:39] After general graph transformations pass 3: 37 operators, 91 arrays (0 quantized)
2020-04-12 12:13:13.878502: I tensorflow/lite/toco/graph_transformations/graph_transformations.cc:39] Before Group bidirectional sequence lstm/rnn: 37 operators, 91 arrays (0 quantized)
2020-04-12 12:13:13.880078: I tensorflow/lite/toco/graph_transformations/graph_transformations.cc:39] Before dequantization graph transformations: 37 operators, 91 arrays (0 quantized)
2020-04-12 12:13:13.882565: I tensorflow/lite/toco/allocate_transient_arrays.cc:345] Total transient array allocated size: 8128 bytes, theoretical optimal value: 8064 bytes.
2020-04-12 12:13:13.884819: E tensorflow/lite/toco/toco_tooling.cc:456] We are continually in the process of adding support to TensorFlow Lite for more ops. It would be helpful if you could inform us of how this conversion went by opening a github issue at https://github.com/tensorflow/tensorflow/issues/new?template=40-tflite-op-request.md
 and pasting the following:

Some of the operators in the model are not supported by the standard TensorFlow Lite runtime. If those are native TensorFlow operators, you might be able to use the extended runtime by passing --enable_select_tf_ops, or by setting target_ops=TFLITE_BUILTINS,SELECT_TF_OPS when calling tf.lite.TFLiteConverter(). Otherwise, if you have a custom implementation for them you can disable this error with --allow_custom_ops, or by setting allow_custom_ops=True when calling tf.lite.TFLiteConverter(). Here is a list of builtin operators you are using: ADD, CAST, CONCATENATION, FILL, FULLY_CONNECTED, GATHER, MUL, PACK, RESHAPE, SHAPE, SOFTMAX, STRIDED_SLICE, TRANSPOSE. Here is a list of operators for which you will need custom implementations: AddV2, TensorListFromTensor, TensorListReserve, TensorListStack, While.
Traceback (most recent call last):
  File "/home/prahlad/.local/bin/toco_from_protos", line 11, in <module>
    sys.exit(main())
  File "/home/prahlad/.local/lib/python3.6/site-packages/tensorflow/lite/toco/python/toco_from_protos.py", line 59, in main
    app.run(main=execute, argv=[sys.argv[0]] + unparsed)
  File "/home/prahlad/.local/lib/python3.6/site-packages/tensorflow/python/platform/app.py", line 40, in run
    _run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef)
  File "/home/prahlad/.local/lib/python3.6/site-packages/absl/app.py", line 299, in run
    _run_main(main, args)
  File "/home/prahlad/.local/lib/python3.6/site-packages/absl/app.py", line 250, in _run_main
    sys.exit(main(argv))
  File "/home/prahlad/.local/lib/python3.6/site-packages/tensorflow/lite/toco/python/toco_from_protos.py", line 33, in execute
    output_str = tensorflow_wrap_toco.TocoConvert(model_str, toco_str, input_str)
Exception: We are continually in the process of adding support to TensorFlow Lite for more ops. It would be helpful if you could inform us of how this conversion went by opening a github issue at https://github.com/tensorflow/tensorflow/issues/new?template=40-tflite-op-request.md
 and pasting the following:

Some of the operators in the model are not supported by the standard TensorFlow Lite runtime. If those are native TensorFlow operators, you might be able to use the extended runtime by passing --enable_select_tf_ops, or by setting target_ops=TFLITE_BUILTINS,SELECT_TF_OPS when calling tf.lite.TFLiteConverter(). Otherwise, if you have a custom implementation for them you can disable this error with --allow_custom_ops, or by setting allow_custom_ops=True when calling tf.lite.TFLiteConverter(). Here is a list of builtin operators you are using: ADD, CAST, CONCATENATION, FILL, FULLY_CONNECTED, GATHER, MUL, PACK, RESHAPE, SHAPE, SOFTMAX, STRIDED_SLICE, TRANSPOSE. Here is a list of operators for which you will need custom implementations: AddV2, TensorListFromTensor, TensorListReserve, TensorListStack, While.


