<a href="https://colab.research.google.com/github/Rstam59/TaskDataRepoForStudents/blob/main/Date_conversion.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import tensorflow as tf
from datetime import date
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding, TextVectorization
from tensorflow.keras.models import Model


MONTHS = ["January", "February", "March", "April", "May", "June",
          "July", "August", "September", "October", "November", "December"]

def random_dates(n_dates):
    min_date = date(1000, 1, 1).toordinal()
    max_date = date(9999, 12, 31).toordinal()
    ordinals = np.random.randint(max_date - min_date, size=n_dates) + min_date
    dates = [date.fromordinal(ordinal) for ordinal in ordinals]
    x = [MONTHS[d.month - 1] + " " + d.strftime("%d, %Y") for d in dates]
    y = [d.strftime("%Y-%m-%d") for d in dates]
    return x, y

np.random.seed(42)
x_texts, y_texts = random_dates(10000)

# Add "startofseq" and "endofseq" markers
decoder_inputs_texts = [f"startofseq {text}" for text in y_texts]
decoder_targets_texts = [f"{text} endofseq" for text in y_texts]


max_input_len = max(len(txt) for txt in x_texts)
max_decoder_input_len = max(len(txt) for txt in decoder_inputs_texts)
max_decoder_target_len = max(len(txt) for txt in decoder_targets_texts)

input_vectorizer = TextVectorization(
    output_mode='int',
    output_sequence_length=max_input_len,
    split='character',
    standardize=None
)
decoder_vectorizer = TextVectorization(
    output_mode='int',
    output_sequence_length=max(max_decoder_input_len, max_decoder_target_len),
    split='character',
    standardize=None
)

# Adapt vectorizers
input_vectorizer.adapt(x_texts)
decoder_vectorizer.adapt(decoder_inputs_texts + decoder_targets_texts)

# Vectorize
encoder_input = input_vectorizer(tf.constant(x_texts))
decoder_input = decoder_vectorizer(tf.constant(decoder_inputs_texts))
decoder_target = decoder_vectorizer(tf.constant(decoder_targets_texts))

# Get vocab size
input_vocab_size = len(input_vectorizer.get_vocabulary())
target_vocab_size = len(decoder_vectorizer.get_vocabulary())

# Convert target to one-hot for teacher forcing
decoder_target = tf.one_hot(decoder_target, depth=target_vocab_size)


latent_dim = 256

# Encoder
encoder_inputs = Input(shape=(None,), name="encoder_inputs")
x = Embedding(input_vocab_size, latent_dim, mask_zero=True)(encoder_inputs)
encoder_outputs, state_h, state_c = LSTM(latent_dim, return_state=True)(x)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs = Input(shape=(None,), name="decoder_inputs")
x = Embedding(target_vocab_size, latent_dim, mask_zero=True)(decoder_inputs)
x = LSTM(latent_dim, return_state=True)(x, initial_state=encoder_states)
decoder_outputs = Dense(target_vocab_size, activation="softmax")(x)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
model.summary()



model.fit(
    [encoder_input, decoder_input],
    decoder_target,
    batch_size=64,
    epochs=10,
    validation_split=0.2
)
