In [1]:
import tensorflow as tf
from tensorflow.keras import layers, Model
import os
import cv2
import numpy as np

In [2]:
# === ✅ Step 1: Enable GPU Memory Growth BEFORE TensorFlow Initializes ===
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("✅ GPU Memory Growth Enabled")
    except RuntimeError as e:
        print(f"❌ Error: {e}")

# === ✅ Step 2: Initialize Multi-GPU Training ===
strategy = tf.distribute.MirroredStrategy(devices=["/GPU:0", "/GPU:1"])
print(f"✅ Using {strategy.num_replicas_in_sync} GPUs")

✅ GPU Memory Growth Enabled
✅ Using 2 GPUs


In [3]:
# === Beta-VAE Model ===
class BetaVAE(Model):
    def __init__(self, latent_dim=128, beta=4.0):
        super(BetaVAE, self).__init__()
        self.latent_dim = latent_dim
        self.beta = beta  # Weight for KL loss

        # Encoder
        self.encoder = tf.keras.Sequential([
            layers.InputLayer(input_shape=(64, 64, 3)),
            layers.Conv2D(64, (3, 3), strides=2, padding="same", activation="relu"),
            layers.Conv2D(128, (3, 3), strides=2, padding="same", activation="relu"),
            layers.Conv2D(256, (3, 3), strides=2, padding="same", activation="relu"),
            layers.Flatten(),
            layers.Dense(128, activation="relu"),
            layers.Dense(2 * latent_dim)  # Output mean and log variance
        ])

        # Decoder
        self.decoder = tf.keras.Sequential([
            layers.InputLayer(input_shape=(latent_dim,)),
            layers.Dense(8 * 8 * 256, activation="relu"),
            layers.Reshape((8, 8, 256)),
            layers.Conv2DTranspose(128, (3, 3), strides=2, padding="same", activation="relu"),
            layers.Conv2DTranspose(64, (3, 3), strides=2, padding="same", activation="relu"),
            layers.Conv2DTranspose(3, (3, 3), strides=2, padding="same", activation="tanh")
        ])

    def reparameterize(self, mean, log_var):
        """Reparameterization trick: Sample from N(mean, std)."""
        eps = tf.random.normal(shape=tf.shape(mean))
        return mean + tf.exp(0.5 * log_var) * eps

    def call(self, x):
        """Forward pass through encoder and decoder."""
        z_params = self.encoder(x)
        mean, log_var = tf.split(z_params, num_or_size_splits=2, axis=1)
        z = self.reparameterize(mean, log_var)
        x_reconstructed = self.decoder(z)

        # KL divergence loss (Beta-VAE)
        kl_loss = -0.5 * tf.reduce_mean(1 + log_var - tf.square(mean) - tf.exp(log_var))
        self.add_loss(self.beta * kl_loss)

        return x_reconstructed

In [4]:
# === Load KITTI Dataset ===
def load_kitti_data(left_images_dir, right_images_dir, image_size=(64, 64)):
    left_images, right_images = [], []

    for root, _, files in os.walk(left_images_dir):
        for file in files:
            if file.endswith(".png") or file.endswith(".jpg"):
                img_path = os.path.join(root, file)
                img = cv2.imread(img_path)
                img = cv2.resize(img, image_size) / 127.5 - 1  # Normalize to [-1, 1]
                left_images.append(img)

    for root, _, files in os.walk(right_images_dir):
        for file in files:
            if file.endswith(".png") or file.endswith(".jpg"):
                img_path = os.path.join(root, file)
                img = cv2.imread(img_path)
                img = cv2.resize(img, image_size) / 127.5 - 1  # Normalize to [-1, 1]
                right_images.append(img)

    return np.array(left_images), np.array(right_images)

In [5]:
# === Paths ===
left_train_dir = "/kaggle/input/kitti-dataset/data_object_image_2/training/image_2"
right_train_dir = "/kaggle/input/kitti-dataset/data_object_image_3/training/image_3"
left_test_dir= "/kaggle/input/kitti-dataset/data_object_image_2/testing/image_2"
right_test_dir="/kaggle/input/kitti-dataset/data_object_image_3/testing/image_3"

In [6]:
# === Load Data ===
train_L, train_R = load_kitti_data(left_train_dir, right_train_dir)
print(f"Train Left Images {train_L.shape}")
print(f"Train Right Images {train_R.shape}")

test_L, test_R = load_kitti_data(left_test_dir, right_test_dir)
print(f"Test Left Images {test_L.shape}")
print(f"Test Right Images {test_R.shape}")

Train Left Images (7481, 64, 64, 3)
Train Right Images (7481, 64, 64, 3)
Test Left Images (7518, 64, 64, 3)
Test Right Images (7518, 64, 64, 3)


In [7]:
# === Train Beta-VAE on Multi-GPU ===
with strategy.scope():
    vae = BetaVAE(latent_dim=128, beta=4.0)
    vae.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), metrics=["accuracy"])
    
    vae.fit(train_L, train_R, epochs=1000, batch_size=128,validation_data=(test_L,test_R))



Epoch 1/1000
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 144ms/step - loss: 0.4474 - val_loss: 0.3807
Epoch 2/1000
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 50ms/step - loss: 0.3931 - val_loss: 0.3575
Epoch 3/1000
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 50ms/step - loss: 0.3727 - val_loss: 0.3482
Epoch 4/1000
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 50ms/step - loss: 0.3650 - val_loss: 0.3442
Epoch 5/1000
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 50ms/step - loss: 0.3627 - val_loss: 0.3421
Epoch 6/1000
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 50ms/step - loss: 0.3607 - val_loss: 0.3411
Epoch 7/1000
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 50ms/step - loss: 0.3608 - val_loss: 0.3401
Epoch 8/1000
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 50ms/step - loss: 0.3585 - val_loss: 0.3389
Epoch 9/1000
[1m59/59[0m [3

In [8]:

import os
import cv2
import numpy as np

# Create directory for generated images
vae_output_dir = "/kaggle/working/vae_generated/"
os.makedirs(vae_output_dir, exist_ok=True)

# Generate images using the VAE decoder
num_images = 1000  # Adjust as needed
latent_dim = vae.latent_dim  # Get the latent space size

for i in range(num_images):
    z_sample = np.random.normal(size=(1, latent_dim))  # Sample random latent vector
    generated_image = vae.decoder.predict(z_sample)[0]  # Generate image
    generated_image = ((generated_image + 1) * 127.5).astype(np.uint8)  # Convert to [0, 255]
    
    # Save image
    img_path = os.path.join(vae_output_dir, f"vae_{i:04d}.png")
    cv2.imwrite(img_path, generated_image)

print(f"✅ Saved {num_images} VAE-generated images to {vae_output_dir}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 293ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 130ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 135ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 117ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 130ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 126ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 123ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [9]:
%cd /kaggle/input/ns/other/default/1/stylegan3-main

/kaggle/input/ns/other/default/1/stylegan3-main


In [10]:
!zip -r /kaggle/working/vae_generated.zip /kaggle/working/vae_generated/

  adding: kaggle/working/vae_generated/ (stored 0%)
  adding: kaggle/working/vae_generated/vae_0745.png (deflated 1%)
  adding: kaggle/working/vae_generated/vae_0536.png (deflated 1%)
  adding: kaggle/working/vae_generated/vae_0009.png (deflated 1%)
  adding: kaggle/working/vae_generated/vae_0911.png (deflated 1%)
  adding: kaggle/working/vae_generated/vae_0816.png (deflated 1%)
  adding: kaggle/working/vae_generated/vae_0563.png (deflated 1%)
  adding: kaggle/working/vae_generated/vae_0034.png (deflated 1%)
  adding: kaggle/working/vae_generated/vae_0458.png (deflated 1%)
  adding: kaggle/working/vae_generated/vae_0962.png (deflated 1%)
  adding: kaggle/working/vae_generated/vae_0516.png (deflated 0%)
  adding: kaggle/working/vae_generated/vae_0688.png (deflated 1%)
  adding: kaggle/working/vae_generated/vae_0351.png (deflated 1%)
  adding: kaggle/working/vae_generated/vae_0366.png (deflated 1%)
  adding: kaggle/working/vae_generated/vae_0297.png (deflated 1%)
  adding: kaggle/working

In [11]:
!python train.py --outdir=/kaggle/working/training-runs \
  --cfg=stylegan3-r --data=/kaggle/working/vae_generated.zip \
  --gpus=2 --batch=32 --gamma=8.2 \
  --kimg=500 --aug=ada --fp32=no --metrics=fid50k \
  --workers=2


Training options:
{
  "G_kwargs": {
    "class_name": "training.networks_stylegan3.Generator",
    "z_dim": 512,
    "w_dim": 512,
    "mapping_kwargs": {
      "num_layers": 2
    },
    "channel_base": 65536,
    "channel_max": 1024,
    "magnitude_ema_beta": 0.9988915792636801,
    "conv_kernel": 1,
    "use_radial_filters": true
  },
  "D_kwargs": {
    "class_name": "training.networks_stylegan2.Discriminator",
    "block_kwargs": {
      "freeze_layers": 0
    },
    "mapping_kwargs": {},
    "epilogue_kwargs": {
      "mbstd_group_size": 4
    },
    "channel_base": 32768,
    "channel_max": 512
  },
  "G_opt_kwargs": {
    "class_name": "torch.optim.Adam",
    "betas": [
      0,
      0.99
    ],
    "eps": 1e-08,
    "lr": 0.0025
  },
  "D_opt_kwargs": {
    "class_name": "torch.optim.Adam",
    "betas": [
      0,
      0.99
    ],
    "eps": 1e-08,
    "lr": 0.002
  },
  "loss_kwargs": {
    "class_name": "training.loss.StyleGAN2Loss",
    "r1_gamma": 8.2,
    "blur_init_si

In [19]:
import spacy
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util

# Load NLP model
nlp = spacy.load("en_core_web_sm")

# Load similarity model
similarity_model = SentenceTransformer("all-MiniLM-L6-v2")

# Load grammar checker
grammar_checker = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")

def validate_scenario(prompt, generated_text):
    """
    Validate GPT-2 generated scenario description.
    
    Parameters:
    - prompt (str): The original prompt.
    - generated_text (str): The generated scenario description.

    Returns:
    - dict: Validation results.
    """

    # 1. Check grammatical correctness
    corrected_text = grammar_checker(generated_text, max_length=512)[0]["generated_text"]

    # 2. Compute similarity with the prompt (Semantic validation)
    prompt_embedding = similarity_model.encode(prompt, convert_to_tensor=True)
    generated_embedding = similarity_model.encode(generated_text, convert_to_tensor=True)
    similarity_score = util.pytorch_cos_sim(prompt_embedding, generated_embedding).item()

    # 3. Check coherence (Basic spaCy parsing)
    doc = nlp(generated_text)
    is_coherent = len(list(doc.sents)) > 1  # More than one sentence ensures coherence

    return {
        "original_text": generated_text,
        "corrected_text": corrected_text,
        "semantic_similarity": similarity_score,
        "coherence_check": is_coherent
    }

# Example Usage
prompt = "A self-driving car navigates a busy city street."
generated_text = generate_scenario(prompt)

validation_results = validate_scenario(prompt, generated_text)
print(validation_results)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.42k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/892M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.92k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.79k [00:00<?, ?B/s]

Device set to use cuda:0


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'original_text': "A self-driving car navigates a busy city street.\n\nThe car is equipped with a GPS system that tracks the speed of the car's movements. The car then uses that information to determine the direction of the road ahead.\n\nThe car then uses that information to determine the direction of the road ahead.\n\nThe car then uses that information to determine the direction of the road ahead.\n\nThe car then uses that information to determine the direction of the road ahead.\n\nThe car then uses that information to determine the direction of the road ahead.\n\nThe car then uses that information to determine the direction of the road ahead.\n\nThe car then uses that information to determine the direction of the road ahead.\n\nThe car then uses that information to determine the direction of the road ahead.\n\nThe car then uses that information to determine the direction of the road ahead.\n\nThe car then uses that information to determine the direction of the road ahead.", 'corre