In [1]:
import numpy
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM , Bidirectional
from keras.callbacks import ModelCheckpoint , EarlyStopping, ReduceLROnPlateau
from keras.utils import to_categorical

In [2]:
filename = "mental_h.txt"
raw_text = open(filename, "r", encoding="utf-8", errors="ignore").read()
raw_text = raw_text.lower()

In [3]:
import torch
print("Is CUDA available: ", torch.cuda.is_available())
print("Number of GPUs: ", torch.cuda.device_count())
if torch.cuda.is_available():
    print("GPU Name: ", torch.cuda.get_device_name(0))


Is CUDA available:  True
Number of GPUs:  1
GPU Name:  NVIDIA GeForce RTX 3050 Ti Laptop GPU


In [4]:
chars = sorted(list(set(raw_text)))
char_to_int = {c: i for i, c in enumerate(chars)}
int_to_char = {i: c for i, c in enumerate(chars)}

In [5]:
chars

['\t',
 '\n',
 '\x0c',
 ' ',
 '"',
 '#',
 '$',
 '%',
 '&',
 "'",
 '(',
 ')',
 '*',
 '+',
 ',',
 '-',
 '.',
 '/',
 '0',
 '1',
 '2',
 '3',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9',
 ':',
 ';',
 '=',
 '?',
 '[',
 ']',
 '_',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z',
 '|',
 '\xa0',
 '©',
 'í',
 '\u200b',
 '–',
 '—',
 '‘',
 '’',
 '“',
 '”']

In [6]:
len(chars)

72

In [7]:
len(raw_text)

298246

In [8]:
n_chars = len(raw_text)
n_vocab = len(chars)

In [9]:
seq_len = 100
dataX = []
dataY = []
for i in range(0,n_chars-seq_len,1):
    seq_in = raw_text[i:i+seq_len] #0-99, train 99 values
    seq_out = raw_text[i+seq_len] #100, for 100th
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total patterns : ", n_patterns)

Total patterns :  298146


In [10]:
len(dataX[0])

100

In [11]:
dataY[20]

56

In [12]:
X = numpy.reshape(dataX , (n_patterns,seq_len,1)) #(samples, timesans, features)
X = X/float(n_vocab) #0-1
y = to_categorical(dataY, num_classes=n_vocab)

In [13]:
X.shape

(298146, 100, 1)

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
#model = Sequential()
#model.add(LSTM(256, input_shape= (X.shape[1],X.shape[2]))) #100,1
#model.add(Dropout(0.2)) #20%
#model.add(Dense(y.shape[1], activation="softmax")) #y.shape[1] = num of voc size

In [16]:
from tensorflow.keras.layers import BatchNormalization

model = Sequential()
model.add(LSTM(512, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(LSTM(256))  # Second LSTM layer
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation="softmax"))

In [17]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 100, 512)          1052672   
                                                                 
 dropout (Dropout)           (None, 100, 512)          0         
                                                                 
 batch_normalization (BatchN  (None, 100, 512)         2048      
 ormalization)                                                   
                                                                 
 lstm_1 (LSTM)               (None, 256)               787456    
                                                                 
 dropout_1 (Dropout)         (None, 256)               0         
                                                                 
 dense (Dense)               (None, 72)                18504     
                                                        

In [18]:
filepath = "weights-improvement-{epoch:01d}-{loss:.4f}.hdf5" #store path and type
checkpoint = ModelCheckpoint(filepath, monitor="loss", verbose=1, save_best_only=True)
#verbose = display the description when it was save
callbacks_list = [checkpoint]

In [19]:
model.compile(loss="categorical_crossentropy", optimizer="adam")

In [20]:
checkpoint = ModelCheckpoint("weights-improvement-{epoch:02d}-{loss:.4f}.hdf5", monitor="loss", verbose=1, save_best_only=True, mode="min")
early_stopping = EarlyStopping(monitor='loss', patience=5, verbose=1, mode='min')
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.5, patience=3, min_lr=1e-5, verbose=1)
callbacks_list = [checkpoint, early_stopping, reduce_lr]

In [21]:
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=50, batch_size=128, callbacks=callbacks_list)

Epoch 1/50
Epoch 1: loss improved from inf to 2.82854, saving model to weights-improvement-01-2.8285.hdf5
Epoch 2/50
Epoch 2: loss improved from 2.82854 to 2.38011, saving model to weights-improvement-02-2.3801.hdf5
Epoch 3/50
Epoch 3: loss improved from 2.38011 to 2.09093, saving model to weights-improvement-03-2.0909.hdf5
Epoch 4/50
  75/1864 [>.............................] - ETA: 2:41 - loss: 1.9324

KeyboardInterrupt: 

In [None]:
f_name = "weights-improvement-40-1.2065.hdf5"
model.load_weights(f_name)
model.compile(loss="categorical_crossentropy")
model.compile(loss="categorical_crossentropy",optimizer="adam")

In [None]:
int_to_char = dict((i,c) for i,c in enumerate(chars))

In [None]:
int_to_char

In [None]:
import sys

In [None]:
def sample(predictions, temperature=1.0):
    predictions = numpy.log(predictions) / temperature
    exp_predictions = numpy.exp(predictions)
    predictions = exp_predictions / numpy.sum(exp_predictions)
    return numpy.random.choice(len(predictions), p=predictions)


In [None]:
start = numpy.random.randint(0,len(dataX))
pattern = dataX[start]
print("seed")
print("\"", "".join([int_to_char[value] for value in pattern]),"\"")

for i in range(1000):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)[0]
    index = sample(prediction, temperature=0.8)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]