In [1]:
#Importing Libraries:
import numpy as np
import tensorflow as tf

In [2]:
#Loading Dataset:
def load_data(file):
  with open(file, 'r') as files:
    names = files.readlines()
  names = [name.strip() for name in names]
  return names

In [3]:
#Create mappings :
def create_mappings(names):
  all_chars = sorted(set("".join(names)))
  char_to_index = {char: i for i, char in enumerate(all_chars)}
  index_to_char = {i: char for i, char in enumerate(all_chars)}
  return all_chars, char_to_index, index_to_char

In [4]:
#Convert names to sequences:
def names_to_indices(names, char_to_index):
  indices = []
  for name in names:
    indices.append([char_to_index[char] for char in name])
  return indices

In [5]:
#Generate Batches:
def generate_batches(indices, batch_size):
  num_batches = len(indices)// batch_size
  indices = indices[:num_batches * batch_size]
  indices = np.array(indices)
  indices = indices.reshape((batch_size, -1))
  for i in range(0, indices.shape[1], max_len):
    x = indices[:, i:i+max_len]
    y = np.zeros_like(x)
    y[:, :-1] = x[:,1:]
    yield x, y

In [6]:
#Define LSTM:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
      tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size, None]),
      tf.keras.layers.LSTM(rnn_units, return_sequences=True, stateful = True, recurrent_initialize = 'glorot_uniform'),
      tf.keras.layers.Dense(vocab_size)
  ])
  return model

In [7]:
#Generate Names:
def generate_name(model, index_to_char, start_char = 'A', length=20, temperature=1.0):
  input_eval = [char_to_index[start_char]]
  input_eval = tf.expand_dims(input_eval, 0)
  text_generated = []
  model.reset_states()
  for i in range(length):
    predictions = model(input_eval)
    predictions = tf.squeeze(predictions, 0)
    predeictions = predictions /temperature
    predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
    input_eval = tf.expand_dims([predicted_id],0)
    text_generated.append(index_to_char[predicted_id])
  return start_char + ''.join(text_generated)

In [10]:
#Parameters:
file = '/content/dataset.txt'
max_len = 20
batch_size = 64
embedding_dim = 256
rnn_units = 1024
epochs = 50

In [11]:
#Load and Preprocess Data:
names = load_data(file)
all_chars, char_to_index, index_to_char = create_mappings(names)
vocab_size = len(all_chars)
indices = names_to_indices(names, char_to_index)

In [12]:
#Build and Compile Model:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size)
model.compile(optimizer ='adam', loss= tf.losses.SparseCategoricalCrossentropy(from_logits=True))

TypeError: ('Keyword argument not understood:', 'recurrent_initialize')

In [None]:
import numpy as np
import tensorflow as tf

# Load the dataset
def load_data(file_path):
    with open(file_path, 'r') as file:
        names = file.readlines()
    names = [name.strip() for name in names]
    return names

# Create mappings for characters to indices and vice versa
def create_mappings(names):
    all_chars = sorted(set(''.join(names)))
    char_to_index = {char: i for i, char in enumerate(all_chars)}
    index_to_char = {i: char for i, char in enumerate(all_chars)}
    return all_chars, char_to_index, index_to_char

# Convert names to sequences of indices
def names_to_indices(names, char_to_index):
    indices = []
    for name in names:
        indices.append([char_to_index[char] for char in name])
    return indices

# Generate batches of data
def generate_batches(indices, batch_size):
    num_batches = len(indices) // batch_size
    indices = indices[:num_batches * batch_size]
    max_length = max(len(seq) for seq in indices)
    padded_indices = np.zeros((len(indices), max_length), dtype=np.int32)
    for i, seq in enumerate(indices):
        padded_indices[i, :len(seq)] = seq
    for i in range(0, padded_indices.shape[1], max_len):
        x = padded_indices[:, i:i+max_len]
        y = np.zeros_like(x)
        y[:, :-1] = x[:, 1:]
        yield tf.convert_to_tensor(x, dtype=tf.int32), tf.convert_to_tensor(y, dtype=tf.int32)

# Define the LSTM model
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size, None]),
        tf.keras.layers.LSTM(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dense(vocab_size)
    ])
    return model

# Generate names
def generate_name(model, index_to_char, start_char='A', length=20, temperature=1.0):
    input_eval = [char_to_index[start_char]]
    input_eval = tf.expand_dims(input_eval, 0)
    text_generated = []
    model.reset_states()
    for i in range(length):
        predictions = model(input_eval)
        predictions = tf.squeeze(predictions, 0)
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
        input_eval = tf.expand_dims([predicted_id], 0)
        text_generated.append(index_to_char[predicted_id])
    return start_char + ''.join(text_generated)

# Parameters
file_path = '/content/dataset.txt'  # Update with your dataset path
max_len = 20
batch_size = 64
embedding_dim = 256
rnn_units = 1024
epochs = 50

# Load and preprocess data
names = load_data(file_path)
all_chars, char_to_index, index_to_char = create_mappings(names)
vocab_size = len(all_chars)
indices = names_to_indices(names, char_to_index)

# Build and compile the model
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size)
model.compile(optimizer='adam', loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True))

# Train the model
for epoch in range(epochs):
    for batch_x, batch_y in generate_batches(indices, batch_size):
        model.fit(batch_x, batch_y, batch_size=batch_size, epochs=1, verbose=0)
    print(f'Epoch {epoch+1}/{epochs}')

# Generate names
num_names_to_generate = 5
for _ in range(num_names_to_generate):
    generated_name = generate_name(model, index_to_char)
    print("Generated Name:", generated_name)
