<a href="https://colab.research.google.com/github/Salahudin77/thesis/blob/main/python/medium_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:

import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist
import time
import gc
import os
import psutil
import numpy as np
from PIL import Image
import json
import datetime
import platform
import statistics
from pathlib import Path
import traceback

In [17]:
class MemoryTracker:
    def __init__(self, interval_ms=100):
        self.interval_ms = interval_ms
        self.memory_stats = []
        self.running = False
        self.thread = None
        self.start_time = None
        self.end_time = None
        self.peak_heap_usage = 0
        self.peak_virtual_usage = 0
        
    def collect_stats(self):
        process = psutil.Process(os.getpid())
        stats = {
            "timestamp": time.time() * 1000,  # ms since epoch
            "rss_bytes": process.memory_info().rss,
            "vms_bytes": process.memory_info().vms,
            "heap_used_mb": process.memory_info().rss / 1024 / 1024,
            "virtual_used_mb": process.memory_info().vms / 1024 / 1024
        }
        
        self.peak_heap_usage = max(self.peak_heap_usage, stats["rss_bytes"])
        self.peak_virtual_usage = max(self.peak_virtual_usage, stats["vms_bytes"])
        
        return stats
    
    def stats_collector(self):
        import threading
        while self.running:
            self.memory_stats.append(self.collect_stats())
            time.sleep(self.interval_ms / 1000)  # Convert ms to seconds
    
    def start(self):
        if not self.running:
            self.running = True
            self.start_time = time.time()
            import threading
            self.thread = threading.Thread(target=self.stats_collector)
            self.thread.daemon = True
            self.thread.start()
    
    def stop(self):
        if self.running:
            self.running = False
            self.end_time = time.time()
            if self.thread:
                self.thread.join(timeout=0.5)
                self.thread = None
    
    def get_peak_heap_usage_bytes(self):
        return self.peak_heap_usage
    
    def get_peak_heap_usage_mb(self):
        return self.peak_heap_usage / 1024 / 1024
    
    def get_peak_virtual_usage_bytes(self):
        return self.peak_virtual_usage
    
    def get_peak_virtual_usage_mb(self):
        return self.peak_virtual_usage / 1024 / 1024
    
    def get_total_peak_memory_mb(self):
        # In Python, we can't cleanly separate heap vs non-heap like in Java
        # Instead, we'll use RSS as heap and the difference between VMS and RSS as "non-heap"
        return self.get_peak_heap_usage_mb()
    
    def get_summary(self):
        summary = {
            "sample_count": len(self.memory_stats),
            "sample_interval_ms": self.interval_ms,
            "peak_heap_mb": self.get_peak_heap_usage_mb(),
            "peak_virtual_mb": self.get_peak_virtual_usage_mb(),
            "peak_total_mb": self.get_total_peak_memory_mb()
        }
        
        if self.start_time and self.end_time:
            summary["duration_ms"] = (self.end_time - self.start_time) * 1000
        
        return summary
    
    def get_time_series_data(self):
        if not self.memory_stats:
            return []
        
        base_time = self.memory_stats[0]["timestamp"]
        
        return [{
            "time_ms": stats["timestamp"] - base_time,
            "heap_mb": stats["heap_used_mb"],
            "virtual_mb": stats["virtual_used_mb"]
        } for stats in self.memory_stats]
    
    def __enter__(self):
        self.start()
        return self
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        self.stop()


class InferenceResult:
    def __init__(self, prediction, time_ms, memory_used_mb, cpu_time_ms, peak_memory_mb, memory_profile):
        self.prediction = prediction
        self.time_ms = time_ms
        self.memory_used_mb = memory_used_mb
        self.cpu_time_ms = cpu_time_ms
        self.peak_memory_mb = peak_memory_mb
        self.memory_profile = memory_profile


class TrainingResult:
    def __init__(self, training_time_ms, peak_memory_mb, accuracy, memory_profile, evaluation):
        self.training_time_ms = training_time_ms
        self.peak_memory_mb = peak_memory_mb
        self.accuracy = accuracy
        self.memory_profile = memory_profile
        self.evaluation = evaluation


def create_medium_network():
    """Create a neural network matching the Java implementation's structure"""
    # Medium complexity network with two hidden layers - identical to Java version
    model = models.Sequential([
        layers.Flatten(input_shape=(28, 28)),
        layers.Dense(128, activation='relu'),  # First hidden layer with 128 neurons
        layers.Dense(64, activation='relu'),   # Second hidden layer with 64 neurons
        layers.Dense(10, activation='softmax') # Output layer
    ])
    
    # Use Adam optimizer with identical learning rate
    model.compile(
        optimizer=tf.keras.optimizers.Adam(0.001),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model


def train_network_with_metrics(model):
    """Train the network and measure performance metrics like the Java version"""
    # Configuration - matching Java parameters
    batch_size = 256  # Match Java batch size
    num_epochs = 10   # Match Java epochs
    
    # Force garbage collection before starting
    gc.collect()
    time.sleep(0.1)  # Small delay like Java's Thread.sleep(100)
    
    # Set up memory tracker with 100ms intervals (same as Java)
    with MemoryTracker(interval_ms=100) as memory_tracker:
        start_training_time = time.time()
        
        # Load and preprocess MNIST dataset
        print("Loading data...")
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        x_train, x_test = x_train / 255.0, x_test / 255.0
        
        # Create progress callback similar to ScoreIterationListener in Java
        class ScoreLogger(tf.keras.callbacks.Callback):
            def on_batch_end(self, batch, logs=None):
                if batch % 100 == 0:
                    print(f"Batch {batch}, loss: {logs['loss']:.4f}")
        
        print("Starting training...")
        for i in range(num_epochs):
            print(f"Epoch {i+1}/{num_epochs}")
            model.fit(
                x_train, y_train,
                batch_size=batch_size,
                epochs=1,
                verbose=0,
                callbacks=[ScoreLogger()]
            )
        
        print("Training complete!")
        
        # Evaluate the model
        print("Evaluating model...")
        evaluation = model.evaluate(x_test, y_test, verbose=0)
        test_loss, test_accuracy = evaluation
        print(f"Test accuracy: {test_accuracy:.4f}")
        print(f"Test loss: {test_loss:.4f}")
        
        # End timing
        end_training_time = time.time()
        
        # Calculate metrics
        training_time_ms = (end_training_time - start_training_time) * 1000
        peak_memory_mb = memory_tracker.get_total_peak_memory_mb()
        
        # Create memory profile JSON
        memory_profile = {
            "summary": memory_tracker.get_summary()
        }
        
        # Create evaluation JSON with matching fields to Java version
        evaluation_data = {
            "accuracy": float(test_accuracy),
            "precision": float(test_accuracy),  # Simplified - in real life we'd calculate actual precision
            "recall": float(test_accuracy),     # Simplified - in real life we'd calculate actual recall
            "f1": float(test_accuracy)          # Simplified - in real life we'd calculate actual F1
        }
        
        return TrainingResult(training_time_ms, peak_memory_mb, float(test_accuracy), memory_profile, evaluation_data)


def test_network(model, image_file):
    """Run inference test with detailed metrics like Java version"""
    print("\n--- Running inference test ---")
    
    # Force garbage collection before starting
    gc.collect()
    time.sleep(0.1)  # Like Java's Thread.sleep(100)
    
    start_mem = get_used_memory()
    process = psutil.Process(os.getpid())
    start_cpu_time = process.cpu_times().user + process.cpu_times().system
    
    # Start memory tracking with 10ms intervals (like Java)
    with MemoryTracker(interval_ms=10) as memory_tracker:
        start_time = time.time()
        
        # Perform actual inference
        try:
            # Load and preprocess image like Java's NativeImageLoader
            img = Image.open(image_file).convert('L')  # Convert to grayscale
            img = img.resize((28, 28))  # Resize to MNIST dimensions
            
            # Convert to numpy array and normalize like Java's ImagePreProcessingScaler
            image = np.array(img, dtype=np.float32) / 255.0
            
            # Reshape to match the Java reshape(1, 28 * 28)
            # In TF, we need to keep original dimensions but add batch dimension
            image = image.reshape(1, 28, 28)
            
            # Run prediction
            output = model.predict(image, verbose=0)
            prediction = np.argmax(output[0])
        except Exception as e:
            print(f"Error processing image: {e}")
            traceback.print_exc()
            prediction = -1
        
        end_time = time.time()
        
    # Calculate metrics
    end_cpu_time = process.cpu_times().user + process.cpu_times().system
    end_mem = get_used_memory()
    
    time_ms = (end_time - start_time) * 1000
    memory_used_mb = (end_mem - start_mem) / 1024 / 1024
    cpu_time_ms = (end_cpu_time - start_cpu_time) * 1000
    peak_memory_mb = memory_tracker.get_total_peak_memory_mb()
    
    memory_profile = {
        "summary": memory_tracker.get_summary()
    }
    
    # Print results similar to Java output
    print(f"Prediction: {prediction}")
    print(f"Execution time (wall): {time_ms:.2f} ms")
    print(f"CPU time: {cpu_time_ms:.2f} ms")
    print(f"Memory used (end-start): {memory_used_mb:.2f} MB")
    print(f"Peak memory used: {peak_memory_mb:.2f} MB")
    print(f"Peak heap memory: {memory_tracker.get_peak_heap_usage_mb():.2f} MB")
    print(f"Peak virtual memory: {memory_tracker.get_peak_virtual_usage_mb():.2f} MB")
    
    return InferenceResult(prediction, time_ms, memory_used_mb, cpu_time_ms, peak_memory_mb, memory_profile)


def warmup_iteration(model, image_file):
    """Perform a warmup iteration like in Java"""
    try:
        img = Image.open(image_file).convert('L')
        img = img.resize((28, 28))
        image = np.array(img, dtype=np.float32) / 255.0
        image = image.reshape(1, 28, 28)
        model.predict(image, verbose=0)
    except Exception as e:
        print(f"Warmup error: {e}")


def get_system_info():
    """Get system information similar to Java's version"""
    import multiprocessing
    import sys
    
    memory = psutil.virtual_memory()
    
    return {
        "available_processors": multiprocessing.cpu_count(),
        "max_memory_mb": memory.total / (1024 * 1024),
        "total_memory_mb": memory.total / (1024 * 1024),
        "os_name": platform.system(),
        "os_version": platform.version(),
        "os_arch": platform.machine(),
        "python_version": platform.python_version()
    }


def get_used_memory():
    """Get current memory usage in bytes"""
    process = psutil.Process(os.getpid())
    return process.memory_info().rss


def average(values):
    """Calculate mean of values"""
    return statistics.mean(values)


def std_dev(values):
    """Calculate standard deviation of values"""
    if len(values) <= 1:
        return 0
    return statistics.stdev(values)


def mode(values):
    """Find the most common value"""
    try:
        return statistics.mode(values)
    except statistics.StatisticsError:
        # If there's no unique mode, return the first value
        return values[0] if values else None


def save_to_json(training_result, inference_results, avg_time, std_dev_time, 
                avg_memory, std_dev_memory, avg_peak_memory, std_dev_peak_memory, 
                common_prediction):
    """Save metrics to JSON - matches Java's format"""
    
    root = {}
    
    # Convert numpy types to native Python types
    def convert_numpy_types(obj):
        if isinstance(obj, (np.integer, np.floating)):
            return float(obj) if isinstance(obj, np.floating) else int(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        elif isinstance(obj, dict):
            return {k: convert_numpy_types(v) for k, v in obj.items()}
        elif isinstance(obj, (list, tuple)):
            return [convert_numpy_types(x) for x in obj]
        return obj
    
    # Add timestamp in same format as Java
    timestamp = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + datetime.datetime.now().strftime("%z")
    root["timestamp"] = timestamp
    root["system_info"] = convert_numpy_types(get_system_info())
    
    # Add training metrics (excluding timeseries)
    training_obj = {
        "training_time_ms": convert_numpy_types(training_result.training_time_ms),
        "training_time_seconds": convert_numpy_types(training_result.training_time_ms / 1000.0),
        "peak_memory_mb": convert_numpy_types(training_result.peak_memory_mb),
        "accuracy": convert_numpy_types(training_result.accuracy),
        "memory_profile": convert_numpy_types(training_result.memory_profile["summary"]),
        "evaluation": convert_numpy_types(training_result.evaluation)
    }
    root["training"] = training_obj
    
    # Add inference runs (excluding timeseries)
    runs_array = []
    for i, r in enumerate(inference_results):
        run_obj = {
            "run": i + 1,
            "prediction": convert_numpy_types(r.prediction),
            "execution_time_ms": convert_numpy_types(r.time_ms),
            "cpu_time_ms": convert_numpy_types(r.cpu_time_ms),
            "memory_used_mb": convert_numpy_types(r.memory_used_mb),
            "peak_memory_mb": convert_numpy_types(r.peak_memory_mb),
            "memory_profile": convert_numpy_types(r.memory_profile["summary"])
        }
        runs_array.append(run_obj)
    
    root["inference_runs"] = runs_array
    
    root["inference_summary"] = {
        "average_execution_time_ms": convert_numpy_types(avg_time),
        "std_dev_execution_time_ms": convert_numpy_types(std_dev_time),
        "average_memory_used_mb": convert_numpy_types(avg_memory),
        "std_dev_memory_used_mb": convert_numpy_types(std_dev_memory),
        "average_peak_memory_mb": convert_numpy_types(avg_peak_memory),
        "std_dev_peak_memory_mb": convert_numpy_types(std_dev_peak_memory),
        "most_common_prediction": convert_numpy_types(common_prediction)
    }
    
    with open("medium_network_results_python256.json", "w") as f:
        json.dump(root, f, indent=4)
    
    print("Results exported to medium_network_results_python.json")


In [18]:
def main():
    # Create a directory for models if it doesn't exist
    Path("models").mkdir(exist_ok=True)
    
    # Create the network
    model = create_medium_network()
    model.summary()
    
    # Train the network and measure training metrics
    training_result = train_network_with_metrics(model)
    
    print("\n===== Training Metrics =====")
    print(f"Total training time: {training_result.training_time_ms:.2f} ms ({training_result.training_time_ms / 1000.0:.2f} seconds)")
    print(f"Peak training memory: {training_result.peak_memory_mb:.2f} MB")
    print(f"Final model accuracy: {training_result.accuracy * 100:.2f}%")
    
    # Path to test image
    image_file = "träningsbilder/testSample/img_1.jpg"
    
    # Add explicit warm-up phase
    print("\nPerforming warm-up iterations...")
    for i in range(10):
        warmup_iteration(model, image_file)
    print("Warm-up complete, starting benchmark...")
    
    # Force garbage collection before testing
    gc.collect()
    time.sleep(0.5)
    
    # Run multiple tests like Java version
    runs = 5
    inference_results = []
    
    for i in range(runs):
        print(f"\n--- Test Run {i + 1} ---")
        result = test_network(model, image_file)
        inference_results.append(result)
    
    # Aggregate metrics
    avg_time = average([r.time_ms for r in inference_results])
    std_dev_time = std_dev([r.time_ms for r in inference_results])
    
    avg_memory = average([r.memory_used_mb for r in inference_results])
    std_dev_memory = std_dev([r.memory_used_mb for r in inference_results])
    
    avg_peak_memory = average([r.peak_memory_mb for r in inference_results])
    std_dev_peak_memory = std_dev([r.peak_memory_mb for r in inference_results])
    
    common_prediction = mode([r.prediction for r in inference_results])
    
    print("\n===== Average Inference Results After 5 Runs =====")
    print(f"Most common prediction: {common_prediction}")
    print(f"Average execution time: {avg_time:.2f} ms (±{std_dev_time:.2f})")
    print(f"Average memory used: {avg_memory:.2f} MB (±{std_dev_memory:.2f})")
    print(f"Average peak memory: {avg_peak_memory:.2f} MB (±{std_dev_peak_memory:.2f})")
    
    # Save metrics to JSON
    save_to_json(training_result, inference_results, avg_time, std_dev_time, 
                avg_memory, std_dev_memory, avg_peak_memory, std_dev_peak_memory, 
                common_prediction)
    
    # Save the model
    model.save("models/trained-medium-model.h5")
    print("Model saved to models/trained-medium-model.h5")


if __name__ == "__main__":
    main()


Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_5 (Flatten)         (None, 784)               0         
                                                                 
 dense_15 (Dense)            (None, 128)               100480    
                                                                 
 dense_16 (Dense)            (None, 64)                8256      
                                                                 
 dense_17 (Dense)            (None, 10)                650       
                                                                 
Total params: 109,386
Trainable params: 109,386
Non-trainable params: 0
_________________________________________________________________
Loading data...
Starting training...
Epoch 1/10
Batch 0, loss: 2.3870
Batch 100, loss: 0.6508
Batch 200, loss: 0.4560
Epoch 2/10
Batch 0, loss: 0.1584
Batch 100, loss: 0.1809
Batch