In [None]:
# Imports
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.utils import pad_sequences
from keras_preprocessing.text import Tokenizer
import os
import numpy as np
import pickle
print(tf.config.list_physical_devices('GPU'))

In [None]:
input_texts = []
output_texts = []


with open("Input_Dataset_2_5m/prompts.txt") as f:
  input_texts = f.read().splitlines()

with open("Input_Dataset_2_5m/code.txt") as f:
  output_texts = f.read().splitlines()

In [None]:
with open("tokenizer.pkl", "rb") as f:
    loaded_tokenizer = pickle.load(f)


combined_texts = []
for prompt, code in zip(input_texts, output_texts):
    combined_text = "<Start> " + prompt + " <SEP> " + code + " <End>"
    combined_texts.append(combined_text)

loaded_tokenizer.fit_on_texts(combined_texts)
loaded_tokenizer.word_index = {"<PAD>": 0, **{k: v+1 for k, v in loaded_tokenizer.word_index.items()}}
loaded_tokenizer.index_word = {v: k for k, v in loaded_tokenizer.word_index.items()}

sequences = loaded_tokenizer.texts_to_sequences(combined_texts)

max_seq_len = max(len(seq) for seq in sequences)

sequences = pad_sequences(sequences,maxlen=max_seq_len,padding='post',truncating="post")
print(max_seq_len)

vocab_size = len(loaded_tokenizer.word_index) + 1

In [None]:
# Model loading

model = keras.models.load_model("/Models/best_decoder_model1.keras")

In [None]:
# Fine Tuning 

# Rebuilding Embedding Layer because Vocab Size changed

embedding_layer = model.get_layer("embedding_layer")
old_weights = embedding_layer.get_weights()[0]
new_weights = np.vstack([
    old_weights,
    np.random.normal(size=(vocab_size - old_weights.shape[0], old_weights.shape[1]))
])
embedding_layer.build((None,))
embedding_layer.set_weights([new_weights])

initial_learning_rate = 1e-4
lr_schedule = keras.optimizers.schedules.CosineDecay(
    initial_learning_rate=initial_learning_rate,
    decay_steps=1000,
    alpha=0.1
)



model.compile(
    optimizer="adam",
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=['accuracy']
)

model.summary()


callbacks = [
    keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=5,  
        restore_best_weights=True,
        verbose=1
    ),
    
    keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=3,
        min_lr=1e-7,
        verbose=1
    ),
    
    keras.callbacks.ModelCheckpoint(
        filepath='best_decoder_model_fine_tuned.keras',
        monitor='val_loss',
        save_best_only=True,
        verbose=1
    ),
    
    keras.callbacks.LambdaCallback(
        on_batch_end=lambda batch, logs: tf.clip_by_global_norm([v for v in model.trainable_variables], 1.0)
    )
]


X = sequences[:, :-1]  
y = sequences[:, 1:]   


batch_size = 256
validation_split = 0.2 

history = model.fit(
    X,
    y,
    batch_size=batch_size,
    epochs=20,
    validation_split=validation_split,
    callbacks=callbacks,
    verbose=1,
    shuffle=True
)
