In [115]:
import tensorflow as tf
import numpy as np

# Load and preprocess the dataset
import csv

# Load and preprocess the dataset
def preprocess_data(data_path):
  # Load the raw data
  raw_data = []
  with open(data_path, 'r', encoding='utf-8') as file:
    reader = csv.reader(file)
    for row in reader:
      raw_data.append(row[0])  # Assume that the data is in the first column of the CSV
  # Tokenize the data to convert it into numerical form
  tokenizer = tf.keras.preprocessing.text.Tokenizer()
  tokenizer.fit_on_texts(raw_data)
  data = tokenizer.texts_to_sequences(raw_data)
  # Pad the data to ensure that all sequences have the same length
  data = tf.keras.preprocessing.sequence.pad_sequences(data)
  # Shuffle the data to mix up the examples
  np.random.shuffle(data)
#   print(data)
  return data, tokenizer


# Set the input and output dimensions for the GAN
input_dim = 100  # The number of words in the input sentence
output_dim = 100  # The number of words in the output sentence

# Build the generator model
generator = tf.keras.Sequential()
generator.add(tf.keras.layers.InputLayer(input_shape=(input_dim,)))
generator.add(tf.keras.layers.Dense(units=512, activation='relu'))
generator.add(tf.keras.layers.Dense(units=1024, activation='relu'))
generator.add(tf.keras.layers.Dense(units=2048, activation='relu'))
generator.add(tf.keras.layers.Dense(units=4096, activation='relu'))
generator.add(tf.keras.layers.Dense(units=output_dim, activation='softmax'))

# Build the discriminator model
discriminator = tf.keras.Sequential()
discriminator.add(tf.keras.layers.InputLayer(input_shape=(output_dim,)))
discriminator.add(tf.keras.layers.Dense(units=4098, activation='relu'))
discriminator.add(tf.keras.layers.Dense(units=2048, activation='relu'))
discriminator.add(tf.keras.layers.Dense(units=1024, activation='relu'))
discriminator.add(tf.keras.layers.Dense(units=512, activation='relu'))
discriminator.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Compile the models
generator.compile(optimizer='adam', loss='categorical_crossentropy')
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Build the GAN model by stacking the generator and discriminator
gan = tf.keras.Sequential()
gan.add(generator)
gan.add(discriminator)
gan.compile(optimizer='adam', loss='binary_crossentropy')

# Set the number of epochs and batch size for training
epochs = 5
batch_size = 32

# Load and preprocess the data
data_path = './data.csv'  # Replace with the file path to the dataset
data, tokenizer = preprocess_data(data_path)

# Define a function to generate a batch of synthetic data
def generate_synthetic_data(batch_size):
  synthetic_data = []
  for i in range(batch_size):
    # Generate a random input sentence
    input_sentence = np.reshape(np.random.randint(low=0, high=input_dim, size=(input_dim,)), (1, -1))
#     print(input_sentence, ",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,")
    # Use the generator to generate a synthetic output sentence
    synthetic_output = generator.predict(input_sentence)
    synthetic_data.append((input_sentence, synthetic_output))
  return synthetic_data

# Train the GAN
for epoch in range(epochs):
  # Generate a batch of synthetic data
  synthetic_data = generate_synthetic_data(batch_size)
  input_sentences, synthetic_outputs = zip(*synthetic_data)
  # Generate a batch of real data
  real_data, _ = preprocess_data(data_path)
  real_data = real_data[epoch*batch_size:(epoch+1)*batch_size]

  real_outputs = real_data[:, -output_dim:]
  # Train the discriminator on the synthetic and real data
  discriminator.trainable = True
  d_loss_real = discriminator.train_on_batch(real_outputs, np.ones((batch_size, 1)))
  d_loss_synthetic = discriminator.train_on_batch(np.reshape(synthetic_outputs, (32,-1)), np.zeros((batch_size, 1)))
  d_loss = 0.5 * (d_loss_real[0] + d_loss_synthetic[0])
  d_acc = 0.5 * (d_loss_real[1] + d_loss_synthetic[1])
  # Train the generator
  discriminator.trainable = False
  g_loss = gan.train_on_batch(np.reshape(input_sentences, (32,-1)), np.ones((batch_size, 1)))
  # Print the losses and accuracies
  print(f'Epoch {epoch+1}/{epochs} - d_loss: {d_loss:.3f}, d_acc: {d_acc:.3f}, g_loss: {g_loss:.3f}')

# Save the trained models
generator.save('generator.h5')
discriminator.save('discriminator.h5')

# Test the GAN
def test_gan(sentence):
  # Preprocess the input sentence
  _, tokenizer = preprocess_data(data_path)
  sentence = tokenizer.texts_to_sequences([sentence])

  sentence = tf.keras.preprocessing.sequence.pad_sequences(sentence, maxlen=input_dim)
  # Use the generator to generate a synthetic output sentence
  output = generator.predict(sentence)
  print(output, "first")
#   print(int(np.argmax(output, axis=-1)), "secnof")
  # Convert the output back to text
  output = tokenizer.sequences_to_texts(int(np.argmax(output, axis=-1)[0]))
#   output = tokenizer.sequences_to_texts(np.reshape(output)
  return output

# Test the GAN on a few example sentences
# sentences = ['This is a test sentence.', 'I am just trying out this GAN.', 'I hope it works well!']
# outputs = [test_gan(sentence) for sentence in sentences]

# # Print the results
# for i, (sentence, output) in enumerate(zip(sentences, outputs)):
#   print(f'Example {i+1}:')
#   print(f'Input: {sentence}')
#   print(f'Output: {output}')

Epoch 1/5 - d_loss: 83.048, d_acc: 0.047, g_loss: 0.248
Epoch 2/5 - d_loss: 4.758, d_acc: 0.500, g_loss: 0.000
Epoch 3/5 - d_loss: 11.291, d_acc: 0.500, g_loss: 0.000
Epoch 4/5 - d_loss: 17.393, d_acc: 0.500, g_loss: 0.000
Epoch 5/5 - d_loss: 21.594, d_acc: 0.500, g_loss: 0.000


In [116]:
sentences = ['This is a test sentence.', 'I am just trying out this GAN.', 'I hope it works well!']
outputs = [test_gan(sentence) for sentence in sentences]

# Print the results
# print(test_gan("Great course"))

for i, (sentence, output) in enumerate(zip(sentences, outputs)):
  print(f'Example {i+1}:')
  print(f'Input: {sentence}')
  print(f'Output: {output}')

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0.]] first


TypeError: 'int' object is not iterable

In [None]:
import pandas as pd
data = pd.read_csv("./data.csv")
test = list(data['reviews'])
print(test_gan(test[:10]))