In [None]:
# Core ML and Deep Learning Libraries
import tensorflow as tf
import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy

# Reinforcement Learning Libraries  
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

# Utilities and Tools
import yaml
from PIL import Image
import sys
import os
from datetime import datetime

# Visualization Setup
plt.style.use('seaborn-v0_8')
np.random.seed(42)  # For reproducible results

print("All imports successful!")
print("=" * 50)
print(f"Test Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Python version: {sys.version.split()[0]}")
print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {keras.__version__}")
print(f"NumPy version: {np.__version__}")
print(f"Gymnasium version: {gym.__version__}")
print("=" * 50)

# Check GPU availability
if tf.config.list_physical_devices('GPU'):
    print("GPU acceleration available!")
    print(f"   GPUs detected: {len(tf.config.list_physical_devices('GPU'))}")
else:
    print("Running on CPU (this is fine for our project)")
    
print("\nEnvironment validation starting...")  


âœ… All imports successful!
TensorFlow version: 2.19.0
Keras version: 3.10.0
NumPy version: 2.1.3


In [None]:
# Testing TensorFlow/Keras Neural Network Capabilities
print("Testing TensorFlow/Keras neural network creation and training...")

# Create a sample neural network (similar to what PPO will use internally)
model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(10,)),
    keras.layers.Dropout(0.2),  # Add dropout for better generalization
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
], name="EscapeCageTestNetwork")

# Compile with optimizer similar to PPO
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy', 
    metrics=['accuracy', 'precision', 'recall']
)

print(f"Neural network '{model.name}' created successfully")
print(f"   Total parameters: {model.count_params():,}")
print(f"   Layers: {len(model.layers)}")

# Generate synthetic training data (simulating environment observations)
print("\nTesting training loop with synthetic data...")
X_train = np.random.random((1000, 10))  # Simulate observations
y_train = np.random.randint(0, 2, 1000)  # Simulate binary decisions

X_test = np.random.random((200, 10))
y_test = np.random.randint(0, 2, 200)

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=10,
    batch_size=32,
    verbose=0  # Silent training
)

# Evaluate performance
test_loss, test_acc, test_prec, test_recall = model.evaluate(X_test, y_test, verbose=0)

print(f"Training completed successfully!")
print(f"   Final accuracy: {test_acc:.3f}")
print(f"   Final loss: {test_loss:.3f}")
print(f"   Precision: {test_prec:.3f}")
print(f"   Recall: {test_recall:.3f}")

# Test prediction
sample_prediction = model.predict(X_test[:1], verbose=0)
print(f"   Sample prediction: {sample_prediction[0][0]:.3f}")

print("\nTensorFlow/Keras functionality verified!")


ðŸ§  Testing TensorFlow/Keras...
âœ… Neural network created with 2817 parameters


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


âœ… TensorFlow/Keras training test successful!


In [None]:
# Testing Gymnasium and Stable-Baselines3 RL Framework
print("Testing reinforcement learning environment and training framework...")

# Create and test a simple RL environment
env = gym.make('CartPole-v1', render_mode=None)
observation, info = env.reset()

print(f"Test Environment: {env.spec.id}")
print(f"Observation space: {env.observation_space}")
print(f"   Shape: {env.observation_space.shape}")
print(f"   Data type: {env.observation_space.dtype}")
print(f"Action space: {env.action_space}")
print(f"   Actions available: {env.action_space.n}")

# Test environment interaction
print(f"\nTesting environment interaction...")
total_reward = 0
steps = 0

for step in range(50):  # Run for 50 steps
    action = env.action_space.sample()  # Random action
    obs, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    steps += 1
    
    if terminated or truncated:
        break

print(f"   Environment interaction successful")
print(f"   Steps taken: {steps}")
print(f"   Total reward: {total_reward}")

# Test Stable-Baselines3 PPO Agent
print(f"\nTesting PPO agent creation and training...")

# Create vectorized environment for PPO
env_vec = make_vec_env('CartPole-v1', n_envs=1)

# Create PPO agent with custom hyperparameters
ppo_agent = PPO(
    'MlpPolicy', 
    env_vec, 
    learning_rate=0.0003,
    n_steps=2048,
    batch_size=64,
    verbose=0
)

print(f"PPO agent created successfully")
print(f"   Policy type: MLP (Multi-Layer Perceptron)")
print(f"   Learning rate: {ppo_agent.learning_rate}")
print(f"   Batch size: {ppo_agent.batch_size}")

# Quick training test (minimal timesteps for speed)
print(f"\nRunning quick training test...")
import time
start_time = time.time()

ppo_agent.learn(total_timesteps=2000, progress_bar=False)

training_time = time.time() - start_time
print(f"Training completed in {training_time:.2f} seconds")

# Test trained agent performance
print(f"\nTesting trained agent performance...")
test_env = gym.make('CartPole-v1', render_mode=None)
obs, _ = test_env.reset()

test_reward = 0
test_steps = 0

for _ in range(200):  # Max steps for CartPole
    action, _ = ppo_agent.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = test_env.step(action)
    test_reward += reward
    test_steps += 1
    
    if terminated or truncated:
        break

print(f"   Test episode reward: {test_reward}")
print(f"   Test episode length: {test_steps}")

# Clean up
env.close()
env_vec.close()
test_env.close()

print(f"\nReinforcement Learning framework fully functional!")

# Final summary
print("\n" + "="*60)
print("ALL TESTS PASSED! Environment is ready for Escape Cage RL project!")
print("="*60)
print("Summary:")
print("   TensorFlow/Keras: Neural networks working")
print("   Gymnasium: RL environments functional") 
print("   Stable-Baselines3: PPO agent training successful")
print("   Full pipeline: Ready for escape cage training")
print("\nYou can now proceed with training your escape cage AI!")
print("Next steps: Run the main training scripts in ml_training/")
print("="*60)


ðŸŽ® Testing RL environment...
Environment: CartPole-v1
Observation space: Box([-4.8               -inf -0.41887903        -inf], [4.8               inf 0.41887903        inf], (4,), float32)
Action space: Discrete(2)
âœ… PPO agent created successfully
âœ… RL libraries test successful!

ðŸŽ‰ ALL TESTS PASSED! Environment is ready for Escape Cage RL project!
