## Imports all in one place

In [18]:
import csv
import nltk
import warnings
import numpy as np
from numpy.random import seed
from gensim.models import Word2Vec
from keras.models import Sequential
from keras.models import model_from_json
from keras.preprocessing import sequence
from keras.layers import Dense, Conv2D, Flatten
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional

In [19]:
warnings.filterwarnings('ignore')

### FUNCTION: Load LSTM Model

In [20]:
def get_lstm_model():
    json_file = open('./models/lstm/lstm.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    model = model_from_json(loaded_model_json)
    model.load_weights('./models/lstm/lstm.h5') 
    model.compile(optimizer = 'adam', loss = 'binary_crossentropy')
    return model

### FUNCTION: Load Word2Vec Model

In [22]:
def get_word2vec_model():
    return Word2Vec.load('./models/word2vec/word2vec.model')

### FUNCTION: Train Word2Vec Model

In [23]:
def train_word2vec_model(wordVecDataset):
    model = Word2Vec(wordVecDataset, min_count = 1, size = 50, workers = 3, window = 3, sg = 1)
    model.save('word2vec.model')
    return model

### FUNCTION: Train Bi-LSTM Model

In [24]:
def train_lstm_model(x_train, y_train):
    model = Sequential()
    model.add(Bidirectional(LSTM(128, input_shape = (128, 3))))
    model.add(Dense(2, activation = 'softmax'))
    model.compile(optimizer = 'adam', loss = 'binary_crossentropy')
    model.fit(x_train, y_train, batch_size = 200, epochs = 60, shuffle = True)
    pred_output = model.predict(x_test, batch_size = 200)
    model_json = model.to_json()
    with open('./models/lstm/lstm.json', 'w') as json_file:
        json_file.write(model_json)
    model.save_weights('./models/lstm/lstm.h5')
    return model

### Arrays for datasets

In [81]:
n_training_tweets = 0
n_testing_tweets = 0
dataset = []
wordVecDataset = []
x_train, y_train, x_test, y_test = [], [], [], []

### Loading Word2Vec Model

In [82]:
wordvecModel = get_word2vec_model()

### Loading and preparing training dataset

In [83]:
training_file = open('./corpora/sample.csv', encoding = 'utf-8', errors = 'ignore')
csv_reader = csv.reader(training_file, delimiter = ',')

line_count = 0
for row in csv_reader:
    if line_count != 0:
        tokens = nltk.word_tokenize(row[1])
        dataset.append([tokens, row[2]])
        wordVecDataset.append(tokens)
    line_count += 1
n_training_tweets = line_count    
    
for tweet in dataset:
    tweet_tokens = tweet[0]
    embeddings = []
    for token in tweet_tokens:
        embeddings.append([round(abs(sum(wordvecModel[token])) * 10, 4)])
    padding = [[0]] * (128 - len(embeddings))
    embeddings = embeddings.copy() + padding
    x_train.append(embeddings)
    label = int(tweet[1])
    if label == 0:
        y_train.append([0, 1])
    else:
        y_train.append([1, 0])

### Loading and preparing testing dataset

In [84]:
dataset = []
testing_file = open('./corpora/testing.csv', encoding = 'utf-8', errors = 'ignore')
csv_reader = csv.reader(testing_file, delimiter = ',')

line_count = 0
for row in csv_reader:
    if line_count != 0:
        tokens = nltk.word_tokenize(row[1])
        dataset.append([tokens, row[2]])
    line_count += 1
n_testing_tweets = line_count   

for tweet in dataset:
    tweet_tokens = tweet[0]
    embeddings = []
    for token in tweet_tokens:
        embeddings.append([round(abs(sum(wordvecModel[token])) * 10, 4)])
    padding = [[0]] * (128 - len(embeddings))
    embeddings = embeddings.copy() + padding
    x_test.append(embeddings)
    label = int(tweet[1])
    if label == 0:
        y_test.append([0, 1])
    else:
        y_test.append([1, 0])

In [85]:
# LSTM dataset
# x_train = np.array(x_train)
# x_test = np.array(x_test)
# x_train = x_train.reshape(len(x_train),128,3)
# x_test = x_test.reshape(len(x_test),128,3)

In [86]:
x_train = np.array([x_train])
y_train = np.array([y_train])
x_test = np.array([x_test])
y_test = np.array([y_test])
# print(x_train)
# print(len(x_train))
print(x_train.shape)

(1, 59, 128, 1)


In [87]:
cnnModel = Sequential()
cnnModel.add(Conv2D(128, kernel_size=3, activation='relu', input_shape=(n_training_tweets,128,3)))
cnnModel.add(Flatten())
cnnModel.add(Dense(2, activation='softmax'))
cnnModel.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
cnnModel.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=3)

Epoch 1/3


ValueError: in user code:

    G:\Depression Analysis\de-venv\lib\site-packages\tensorflow\python\keras\engine\training.py:805 train_function  *
        return step_function(self, iterator)
    G:\Depression Analysis\de-venv\lib\site-packages\tensorflow\python\keras\engine\training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    G:\Depression Analysis\de-venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    G:\Depression Analysis\de-venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    G:\Depression Analysis\de-venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    G:\Depression Analysis\de-venv\lib\site-packages\tensorflow\python\keras\engine\training.py:788 run_step  **
        outputs = model.train_step(data)
    G:\Depression Analysis\de-venv\lib\site-packages\tensorflow\python\keras\engine\training.py:754 train_step
        y_pred = self(x, training=True)
    G:\Depression Analysis\de-venv\lib\site-packages\tensorflow\python\keras\engine\base_layer.py:998 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    G:\Depression Analysis\de-venv\lib\site-packages\tensorflow\python\keras\engine\input_spec.py:259 assert_input_compatibility
        ' but received input with shape ' + display_shape(x.shape))

    ValueError: Input 0 of layer sequential_7 is incompatible with the layer: expected axis -1 of input shape to have value 3 but received input with shape (None, 59, 128, 1)
