In [46]:
import os
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

In [47]:
import tensorflow as tf
import numpy as np
import re
import io

In [48]:
# Define word2index dictionary (replace with your own)
word2index = {
    "<PAD>": 0,
    "<UNK>": 1,
    "hello": 2,
    "world": 3,
    "how": 4,
    "are": 5,
    "you": 6,
    "today": 7,
    "i": 8,
    "am": 9,
    "doing": 10,
    "fine": 11,
    "thank": 12
}
index2word = {index: word for word, index in word2index.items()}

In [49]:
# Load Cornell Movie Dialogs Corpus
path_to_movie_lines = "datasets/cornell_movie_dialogs_corpus/movie_lines.txt"
path_to_movie_conversations = "datasets/cornell_movie_dialogs_corpus/movie_conversations.txt"

In [50]:
# Create a dictionary to map line ids to their corresponding text
id2line = {}
with io.open(path_to_movie_lines, 'r', encoding='iso-8859-1') as f:
    lines = f.readlines()
for line in lines:
    parts = line.strip().split(' +++$+++ ')
    if len(parts) >= 5:
        id2line[parts[0]] = parts[4]

In [51]:
# Create a list of all conversations
conversations = []
with open(path_to_movie_conversations, 'r') as f:
    lines = f.readlines()
for line in lines:
    parts = line.strip().split(' +++$+++ ')
    # Convert string representation of list to actual list
    convo = list(eval(parts[3]))
    convo_text = []
    for utt_id in convo:
        if utt_id in id2line:
            convo_text.append(id2line[utt_id])
    if len(convo_text) > 1:
        conversations.append(convo_text)

In [43]:
# Load saved lstm model
path_to_lstm_model = "models/final_models/lstm_e10_b32.h5"
lstm_model = tf.keras.models.load_model(path_to_lstm_model)



In [44]:
# Define function to preprocess text data
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"she's", "she is", text)
    text = re.sub(r"it's", "it is", text)
    text = re.sub(r"that's", "that is", text)
    text = re.sub(r"what's", "what is", text)
    text = re.sub(r"where's", "where is", text)
    text = re.sub(r"how's", "how is", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"\'d", " would", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"can't", "cannot", text)
    text = re.sub(r"n't", " not", text)
    text = re.sub(r"[-()\";:<>{}+=~.,|?!]", "", text)
    text = text.strip()
    return text

In [58]:
# Define function to generate responses
def generate_response(input_text, max_len, user_inputs):
    # Preprocess input text
    input_text = preprocess_text(input_text)
    # Convert input text to sequence of integers
    input_seq = []
    for word in input_text.split():
        if word in word2index:
            input_seq.append(word2index[word])
        else:
            input_seq.append(word2index["<UNK>"])
    input_seq = np.array(input_seq)
    # Use LSTM model to generate response
    response_seq = []
    # Initialize state to None
    state = None
    for i in range(max_len):
        # Generate output for one time step
        output, state = lstm_model.predict([np.expand_dims(input_seq, axis=0), state])
        # Get most likely next word
        next_word = np.argmax(output[0, i, :])
        if index2word[next_word] == "<EOS>":
            break
        response_seq.append(next_word)
    # Convert sequence of integers to text
    response_text = ""
    for index in response_seq:
        if index2word[index] != "<PAD>":
            response_text += index2word[index] + " "
    # Save user input and generated response
    user_inputs.append(input_text)
    user_inputs.append(response_text)
    # Check if user has provided enough input to diagnose depression
    if len(user_inputs) < 20:
        return "not enough input to diagnose"
    # Preprocess user input
    user_input_seq = []
    for user_input in user_inputs:
        user_input = preprocess_text(user_input)
        for word in user_input.split():
            if word in word2index:
                user_input_seq.append(word2index[word])
            else:
                user_input_seq.append(word2index["<UNK>"])
    user_input_seq = np.array(user_input_seq)
    # Use LSTM model to predict depression diagnosis
    predicted_output = lstm_model.predict(np.expand_dims(user_input_seq, axis=0))
    if predicted_output > 0.5:
        return "You might be showing signs of depression. Please seek professional help."
    else:
        return "You seem to be doing well. Keep it up!"

In [59]:
# Define user inputs list
user_inputs = []

# Define max sequence length
max_len = 20

# Start conversation with user
print("Hello! How can I help you today?")

while True:
    # Get user input
    user_input = input("User: ")

    # Check if user wants to end the conversation
    if user_input.lower() == "bye":
        print("Chatbot: Goodbye!")
        break

    # Check if user wants to diagnose depression
    if user_input.lower() == "diagnose":
        # Generate response
        response = generate_response("diagnose", max_len, user_inputs)

        # Print response
        print("Chatbot:", response)
        continue

    # Add user input to list
    user_inputs.append(user_input)

    # Continue conversation
    print("Chatbot: How else can I help you?")

Hello! How can I help you today?


ValueError: Failed to find data adapter that can handle input: (<class 'list'> containing values of types {"<class 'numpy.ndarray'>", "<class 'NoneType'>"}), <class 'NoneType'>