In [508]:
import os
import time
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt

from tensorflow.keras import layers

Предобработка

In [509]:
dataset_path = "datasets/item_recommendation"
list_of_files = os.listdir(dataset_path)

column_names = ['movie_id,', 'user_id,', 'movie_rating']

for ds in list_of_files:
    with open(os.path.join(dataset_path, ds), 'r') as f:
        lines = f.readlines()
        new_lines = []
        for line in lines:
            new_lines.append(line.replace('\t', ','))

    with open(os.path.join(dataset_path, ds[:-3] + 'csv'), 'w') as f:
        f.writelines(column_names)
        f.write('\n')
        f.writelines(new_lines)


In [510]:
movielens_train = tf.data.experimental.make_csv_dataset(
    os.path.join(dataset_path, 'movielens-100k-train.csv'),
    shuffle=True,
    label_name='movie_rating',
    batch_size=10
)

movielens_test = tf.data.experimental.make_csv_dataset(
    os.path.join(dataset_path, 'movielens-100k-test.csv'),
    shuffle=True,
    label_name='movie_rating',
    batch_size=10
)

train_total = int(movielens_train.cardinality())
test_total = int(movielens_test.cardinality())

In [511]:
def preprocess(value, label):
    value = tf.transpose(tf.convert_to_tensor(list(value.values()), dtype=tf.float32))
    label = tf.cast(label, tf.float32)
    return value, label

In [512]:
movielens_train = movielens_train.map(preprocess)
movielens_test = movielens_test.map(preprocess)

In [513]:
for value, label in movielens_train.take(1):
    print(value, label)

tf.Tensor(
[[  15.  683.]
 [  55.  226.]
 [ 129. 1078.]
 [ 185.  828.]
 [   0.    3.]
 [  24.    0.]
 [ 143.  587.]
 [  15.  320.]
 [ 144.  226.]
 [  20.  987.]], shape=(10, 2), dtype=float32) tf.Tensor([5. 3. 3. 4. 3. 5. 4. 3. 4. 1.], shape=(10,), dtype=float32)


2024-05-28 19:49:49.286428: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [514]:
def Generator():
    inputs = layers.Input(shape=[2])
    x = layers.Dense(8, kernel_initializer=tf.random_normal_initializer(0., 0.02), activation='relu')(inputs)
    x = layers.Dense(16, activation='relu')(x)
    last = layers.Dense(1)(x)
    return tf.keras.Model(inputs=inputs, outputs=last)

In [515]:
LAMBDA = 100

In [516]:
def generator_loss(discriminator_output, gen_output, target):
    loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)
    gan_loss = loss_fn(tf.ones_like(discriminator_output), discriminator_output)

    l1_loss = tf.reduce_mean(tf.abs(target - gen_output))

    total_gen_loss = gan_loss + LAMBDA*l1_loss

    return total_gen_loss, gan_loss, l1_loss

In [517]:
def Discriminator():
    inp = layers.Input(shape=[2], name='input')
    tar = layers.Input(shape=[1], name='target')
    x = layers.Concatenate()([inp, tar])
    x = layers.Dense(8, activation='relu')(x)
    last = layers.Dense(2)(x)
    return tf.keras.Model(inputs=[inp, tar], outputs=last)


In [518]:
def discriminator_loss(disc_real_output, disc_gen_output):
    loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)
    real_loss = loss_fn(tf.ones_like(disc_real_output), disc_real_output)
    gen_loss = loss_fn(tf.zeros_like(disc_gen_output), disc_gen_output)

    total_disc_loss = real_loss + gen_loss

    return total_disc_loss

In [519]:
generator_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)
discriminator_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)

In [520]:
generator = Generator()
discriminator = Discriminator()

checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer,
                                 discriminator_optimizer=discriminator_optimizer,
                                 generator=generator,
                                 discriminator=discriminator)


In [521]:
def test_accuracy(model, test_input, ground_truth):
    prediction = model(test_input, training=True)
    print(abs(ground_truth[0]-prediction[0]))


In [522]:
@tf.function
def train_step(input, target, step):
  with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
    gen_output = generator(input, training=True)

    disc_real_output = discriminator([input, target], training=True)
    disc_generated_output = discriminator([input, gen_output], training=True)

    gen_total_loss, gen_gan_loss, gen_l1_loss = generator_loss(disc_generated_output, gen_output, target)
    disc_loss = discriminator_loss(disc_real_output, disc_generated_output)

  generator_gradients = gen_tape.gradient(gen_total_loss,
                                          generator.trainable_variables)
  discriminator_gradients = disc_tape.gradient(disc_loss,
                                               discriminator.trainable_variables)

  generator_optimizer.apply_gradients(zip(generator_gradients,
                                          generator.trainable_variables))
  discriminator_optimizer.apply_gradients(zip(discriminator_gradients,
                                              discriminator.trainable_variables))


In [523]:
def fit(train_ds, test_ds, steps):
  example_input, example_target = next(iter(test_ds.take(1)))
  start = time.time()

  for step, (input, target) in train_ds.take(steps).enumerate():
    if (step) % 1000 == 0:

      if step != 0:
        print(f'Time taken for 1000 steps: {time.time()-start:.2f} sec\n')

      start = time.time()

      test_accuracy(generator, example_input, example_target)
      print(f"Step: {step//1000}k")

    train_step(input, target, step)

    # Training step
    if (step+1) % 10 == 0:
      print('.', end='', flush=True)


    # Save (checkpoint) the model every 5k steps
    if (step + 1) % 5000 == 0:
      checkpoint.save(file_prefix=checkpoint_prefix)


In [524]:
fit(movielens_train, movielens_test, 40000)

tf.Tensor([10.348309], shape=(1,), dtype=float32)
Step: 0k
....................................................................................................Time taken for 1000 steps: 2.13 sec

tf.Tensor([2.1568756], shape=(1,), dtype=float32)
Step: 1k
....................................................................................................Time taken for 1000 steps: 1.68 sec

tf.Tensor([2.1510177], shape=(1,), dtype=float32)
Step: 2k
....................................................................................................Time taken for 1000 steps: 1.70 sec

tf.Tensor([2.1672354], shape=(1,), dtype=float32)
Step: 3k
....................................................................................................Time taken for 1000 steps: 1.68 sec

tf.Tensor([2.2303405], shape=(1,), dtype=float32)
Step: 4k
....................................................................................................Time taken for 1000 steps: 1.70 sec

tf.Tensor([2.3988914

2024-05-28 19:50:57.022667: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
