<a href="https://colab.research.google.com/github/Salahudin77/thesis/blob/main/python/easy_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist
import time
import gc
import os
import psutil
import numpy as np
from PIL import Image
import json
import datetime
import platform
import statistics
import threading
import sys
from concurrent.futures import ThreadPoolExecutor
import multiprocessing

In [14]:
class MemoryTracker:
    def __init__(self, interval_ms=100):
        self.interval_ms = interval_ms
        self.memory_stats = []
        self.running = False
        self.thread = None
        self.start_time = None
        self.end_time = None
        self.peak_memory_usage = 0
        
    def collect_stats(self):
        process = psutil.Process(os.getpid())
        stats = {
            'timestamp': time.time() * 1000,  # convert to ms
            'memory_used_bytes': process.memory_info().rss,
            'memory_used_mb': process.memory_info().rss / 1024 / 1024
        }
        self.peak_memory_usage = max(self.peak_memory_usage, stats['memory_used_bytes'])
        self.memory_stats.append(stats)
        
    def track_loop(self):
        while self.running:
            self.collect_stats()
            time.sleep(self.interval_ms / 1000)  # convert ms to seconds
            
    def start(self):
        if not self.running:
            self.running = True
            self.start_time = time.time()
            self.thread = threading.Thread(target=self.track_loop)
            self.thread.daemon = True
            self.thread.start()
            
    def stop(self):
        if self.running:
            self.running = False
            self.end_time = time.time()
            if self.thread:
                self.thread.join(timeout=0.5)
                
    def get_peak_memory_mb(self):
        return self.peak_memory_usage / 1024 / 1024
        
    def get_summary(self):
        summary = {
            'duration_ms': int((self.end_time - self.start_time) * 1000) if self.start_time and self.end_time else 0,
            'sample_count': len(self.memory_stats),
            'sample_interval_ms': self.interval_ms,
            'peak_total_mb': self.get_peak_memory_mb(),
        }
        return summary
        
    def get_timeseries_data(self):
        if not self.memory_stats:
            return []
            
        base_time = self.memory_stats[0]['timestamp']
        timeseries = []
        
        for stats in self.memory_stats:
            point = {
                'time_ms': int(stats['timestamp'] - base_time),
                'heap_mb': stats['memory_used_mb'],  # Python doesn't separate heap/non-heap like Java
                'non_heap_mb': 0  # Not applicable in Python
            }
            timeseries.append(point)
            
        return timeseries
        
    def __enter__(self):
        self.start()
        return self
        
    def __exit__(self, exc_type, exc_val, exc_tb):
        self.stop()


In [15]:
class InferenceResult:
    def __init__(self, prediction, time_ms, memory_used_mb, cpu_time_ms, peak_memory_mb, memory_profile):
        self.prediction = prediction
        self.time_ms = time_ms
        self.memory_used_mb = memory_used_mb
        self.cpu_time_ms = cpu_time_ms
        self.peak_memory_mb = peak_memory_mb
        self.memory_profile = memory_profile

class TrainingResult:
    def __init__(self, time_ms, memory_used_mb, cpu_time_ms, peak_memory_mb, accuracy, memory_profile):
        self.time_ms = time_ms
        self.memory_used_mb = memory_used_mb
        self.cpu_time_ms = cpu_time_ms
        self.peak_memory_mb = peak_memory_mb
        self.accuracy = accuracy
        self.memory_profile = memory_profile

In [16]:
def create_easy_network():
    """Create a simple neural network matching the Java version"""
    model = models.Sequential([
        layers.Flatten(input_shape=(28, 28)),  # This matches the Java 28*28 input
        layers.Dense(64, activation='relu'),    # Same as Java (nOut=64)
        layers.Dense(10, activation='softmax')  # Output layer same as Java (nOut=10)
    ])
    
    # Use Adam with 0.001 learning rate to match Java
    model.compile(
        optimizer=tf.keras.optimizers.Adam(0.001),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

def get_used_memory():
    """Get current memory usage in bytes"""
    process = psutil.Process(os.getpid())
    return process.memory_info().rss

def get_system_info():
    """Get system information similar to Java version"""
    system_info = {
        "available_processors": multiprocessing.cpu_count(),
        "max_memory_mb": psutil.virtual_memory().total / (1024.0 * 1024.0),
        "total_memory_mb": psutil.Process().memory_info().rss / (1024.0 * 1024.0),
        "os_name": platform.system(),
        "os_version": platform.version(),
        "os_arch": platform.machine(),
        "python_version": platform.python_version(),
    }
    return system_info

def train_network(model):
    """Train the network with same parameters as Java version"""
    # Force garbage collection before measurement
    gc.collect()
    time.sleep(0.1)
    
    start_mem = get_used_memory()
    start_cpu_time = time.process_time()
    
    # Track memory during training with 100ms intervals, same as Java
    with MemoryTracker(interval_ms=100) as memory_tracker:
        start_time = time.time()
        
        # Load and preprocess MNIST dataset
        print("Loading data...")
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        x_train, x_test = x_train / 255.0, x_test / 255.0
        
        # Match Java training parameters
        batch_size = 256  # Same as Java
        num_epochs = 10  # Same as Java
        
        print("Starting training...")
        # Training with callback to match the score iteration listener in Java
        class ScoreLogger(tf.keras.callbacks.Callback):
            def on_batch_end(self, batch, logs=None):
                if batch % 100 == 0:
                    print(f"Batch {batch}, loss: {logs['loss']:.4f}")
        
        for i in range(num_epochs):
            print(f"Epoch {i+1}/{num_epochs}")
            model.fit(
                x_train, y_train,
                batch_size=batch_size,
                epochs=1,
                verbose=0,
                callbacks=[ScoreLogger()]
            )
        
        print("Training complete!")
        
        # Evaluate on test data
        print("Evaluating model...")
        test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
        print(f"Test accuracy: {test_acc:.4f}")
        print(f"Test loss: {test_loss:.4f}")
        
        end_time = time.time()
        
    end_cpu_time = time.process_time()
    end_mem = get_used_memory()
    
    time_ms = (end_time - start_time) * 1000
    memory_used_mb = (end_mem - start_mem) / (1024.0 * 1024.0)
    cpu_time_ms = (end_cpu_time - start_cpu_time) * 1000
    peak_memory_mb = memory_tracker.get_peak_memory_mb()
    
    # Create memory profile JSON
    memory_profile = {
        "summary": memory_tracker.get_summary(),
        "timeseries": memory_tracker.get_timeseries_data()
    }
    
    result = TrainingResult(
        time_ms=time_ms,
        memory_used_mb=memory_used_mb,
        cpu_time_ms=cpu_time_ms,
        peak_memory_mb=peak_memory_mb,
        accuracy=test_acc,
        memory_profile=memory_profile
    )
    
    return result, x_test, y_test

def warmup_iteration(model, image_file):
    """Perform warmup iterations just like Java version"""
    try:
        # Load and preprocess the image
        img = Image.open(image_file).convert('L')  # Convert to grayscale
        img = img.resize((28, 28))  # Resize to MNIST dimensions
        
        # Convert to numpy array and normalize
        img_array = np.array(img)
        img_array = img_array / 255.0
        
        # Reshape for the model (add batch dimension)
        img_array = img_array.reshape(1, 28, 28)
        
        # Run prediction
        _ = model.predict(img_array, verbose=0)
    except Exception as e:
        print(f"Warning: Warmup iteration failed: {e}")
        # If image loading fails, create a random image for warmup
        img_array = np.random.random((1, 28, 28))
        _ = model.predict(img_array, verbose=0)

In [17]:
def test_network(model, image_file):
    """Test the network with detailed metrics matching Java version"""
    # Force garbage collection before measurement
    gc.collect()
    time.sleep(0.1)
    
    start_mem = get_used_memory()
    start_cpu_time = time.process_time()
    
    # Track memory during inference with 10ms intervals, same as Java
    with MemoryTracker(interval_ms=10) as memory_tracker:
        start_time = time.time()
        
        try:
            # Load and preprocess the image
            img = Image.open(image_file).convert('L')  # Convert to grayscale
            img = img.resize((28, 28))  # Resize to MNIST dimensions
            
            # Convert to numpy array and normalize
            img_array = np.array(img)
            img_array = img_array / 255.0
            
            # Reshape for the model (add batch dimension)
            img_array = img_array.reshape(1, 28, 28)
            
            # Run prediction
            predictions = model.predict(img_array, verbose=0)
            prediction = np.argmax(predictions[0])
        except Exception as e:
            print(f"Warning: Image processing failed: {e}")
            # If image loading fails, use random data
            img_array = np.random.random((1, 28, 28))
            predictions = model.predict(img_array, verbose=0)
            prediction = np.argmax(predictions[0])
        
        end_time = time.time()
    
    end_cpu_time = time.process_time()
    end_mem = get_used_memory()
    
    time_ms = (end_time - start_time) * 1000
    memory_used_mb = (end_mem - start_mem) / (1024.0 * 1024.0)
    cpu_time_ms = (end_cpu_time - start_cpu_time) * 1000
    peak_memory_mb = memory_tracker.get_peak_memory_mb()
    
    # Create memory profile JSON
    memory_profile = {
        "summary": memory_tracker.get_summary(),
        "timeseries": memory_tracker.get_timeseries_data()
    }
    
    print(f"Prediction: {prediction}")
    print(f"Execution time (wall): {time_ms:.2f} ms")
    print(f"CPU time: {cpu_time_ms:.2f} ms")
    print(f"Memory used (end-start): {memory_used_mb:.2f} MB")
    print(f"Peak memory used: {peak_memory_mb:.2f} MB")
    
    result = InferenceResult(
        prediction=prediction,
        time_ms=time_ms,
        memory_used_mb=memory_used_mb,
        cpu_time_ms=cpu_time_ms,
        peak_memory_mb=peak_memory_mb,
        memory_profile=memory_profile
    )
    
    return result

def average(values):
    """Calculate average of values"""
    return sum(values) / len(values) if values else 0

def std_dev(values):
    """Calculate standard deviation"""
    if not values or len(values) < 2:
        return 0
    return statistics.stdev(values)

def mode(values):
    """Find the most common value"""
    return max(set(values), key=values.count)

def save_to_json(
        training_result,
        inference_results,
        create_time_ms,
        avg_inference_time,
        std_dev_inference_time,
        avg_memory,
        std_dev_memory,
        avg_peak_memory,
        std_dev_peak_memory,
        common_prediction):
    """Save results to JSON file, matching Java version"""
    
    root = {
        "timestamp": datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + datetime.datetime.now().strftime("%z"),
        "system_info": get_system_info(),
        "model_creation": {
            "time_ms": create_time_ms
        }
    }
    
    # Add training info - exclude timeseries from memory_profile
    training_obj = {
        "time_ms": training_result.time_ms,
        "cpu_time_ms": training_result.cpu_time_ms,
        "memory_used_mb": training_result.memory_used_mb,
        "peak_memory_mb": training_result.peak_memory_mb,
        "accuracy": training_result.accuracy,
        "memory_profile": {
            "summary": training_result.memory_profile["summary"]
        }
    }
    root["training"] = training_obj
    
    # Add inference runs - exclude timeseries from memory_profile
    inference_runs = []
    for i, result in enumerate(inference_results):
        run_obj = {
            "run": i + 1,
            "prediction": int(result.prediction),  # Convert numpy.int64 to regular int
            "execution_time_ms": result.time_ms,
            "cpu_time_ms": result.cpu_time_ms,
            "memory_used_mb": result.memory_used_mb,
            "peak_memory_mb": result.peak_memory_mb,
            "memory_profile": {
                "summary": result.memory_profile["summary"]
            }
        }
        inference_runs.append(run_obj)
    root["inference_runs"] = inference_runs
    
    # Add summary with all metrics
    root["summary"] = {
        "model_creation_time_ms": create_time_ms,
        "training_time_ms": training_result.time_ms,
        "training_peak_memory_mb": training_result.peak_memory_mb,
        "average_inference_time_ms": avg_inference_time,
        "std_dev_inference_time_ms": std_dev_inference_time,
        "average_inference_memory_mb": avg_memory,
        "std_dev_inference_memory_mb": std_dev_memory,
        "average_inference_peak_memory_mb": avg_peak_memory,
        "std_dev_inference_peak_memory_mb": std_dev_peak_memory,
        "most_common_prediction": int(common_prediction),  # Convert numpy.int64 to regular int
        "total_time_ms": create_time_ms + training_result.time_ms + avg_inference_time
    }
    
    with open("easy_memory_results_python256.json", "w") as f:
        json.dump(root, f, indent=4)
    print("Results exported to easy_memory_results_python.json")



In [18]:
def main():
    # First, measure model creation time
    print("==== Creating network model ====")
    start_create_time = time.time()
    model = create_easy_network()
    end_create_time = time.time()
    create_time_ms = (end_create_time - start_create_time) * 1000
    print(f"Model creation time: {create_time_ms:.2f} ms")
    
    # Next, measure training time
    print("\n==== Training network model ====")
    training_result, x_test, y_test = train_network(model)
    print(f"Training time: {training_result.time_ms:.2f} ms")
    print(f"Training memory used: {training_result.memory_used_mb:.2f} MB")
    print(f"Training peak memory: {training_result.peak_memory_mb:.2f} MB")
    
    # Determine test image path
    image_file = "./träningsbilder/testSample/img_1.jpg"
    if not os.path.exists(image_file):
        print(f"Warning: Test image not found at {image_file}")
        print(f"Will try to use a random test sample from MNIST instead for inference")
        # Select random test image from MNIST
        test_idx = np.random.randint(0, len(x_test))
        # Save the test image for visualization
        test_img = x_test[test_idx]
        os.makedirs("./träningsbilder/testSample", exist_ok=True)
        Image.fromarray((test_img * 255).astype(np.uint8)).save(image_file)
        print(f"Created test image at {image_file}")

    # Add explicit warm-up phase
    print("\n==== Performing warm-up iterations ====")
    for i in range(10):
        warmup_iteration(model, image_file)
    print("Warm-up complete, starting inference benchmark...")
    
    # Force garbage collection and wait
    gc.collect()
    time.sleep(0.5)
    
    runs = 5
    inference_results = []
    
    print("\n==== Running inference benchmark ====")
    for i in range(runs):
        print(f"\n--- Inference Test Run {i + 1} ---")
        inference_result = test_network(model, image_file)
        inference_results.append(inference_result)
    
    # Aggregate inference metrics
    avg_inference_time = average([r.time_ms for r in inference_results])
    std_dev_inference_time = std_dev([r.time_ms for r in inference_results])
    
    avg_inference_memory = average([r.memory_used_mb for r in inference_results])
    std_dev_inference_memory = std_dev([r.memory_used_mb for r in inference_results])
    
    avg_inference_peak_memory = average([r.peak_memory_mb for r in inference_results])
    std_dev_inference_peak_memory = std_dev([r.peak_memory_mb for r in inference_results])
    
    common_prediction = mode([r.prediction for r in inference_results])
    
    print("\n===== Average Results After 5 Inference Runs =====")
    print(f"Most common prediction: {common_prediction}")
    print(f"Average inference time: {avg_inference_time:.2f} ms (±{std_dev_inference_time:.2f})")
    print(f"Average inference memory used: {avg_inference_memory:.2f} MB (±{std_dev_inference_memory:.2f})")
    print(f"Average inference peak memory: {avg_inference_peak_memory:.2f} MB (±{std_dev_inference_peak_memory:.2f})")
    
    print("\n===== Combined Results =====")
    print(f"Model creation time: {create_time_ms:.2f} ms")
    print(f"Training time: {training_result.time_ms:.2f} ms")
    print(f"Training memory used: {training_result.memory_used_mb:.2f} MB")
    print(f"Training peak memory: {training_result.peak_memory_mb:.2f} MB")
    print(f"Average inference time: {avg_inference_time:.2f} ms (±{std_dev_inference_time:.2f})")
    print(f"Total time (creation + training + avg inference): {create_time_ms + training_result.time_ms + avg_inference_time:.2f} ms")
    
    # Save comprehensive results to JSON
    save_to_json(
        training_result,
        inference_results,
        create_time_ms,
        avg_inference_time,
        std_dev_inference_time,
        avg_inference_memory,
        std_dev_inference_memory,
        avg_inference_peak_memory,
        std_dev_inference_peak_memory,
        common_prediction
    )
    
    # Save the model
    model.save("trained-easy-model.h5")
    print("Model saved to trained-easy-model.h5")

if __name__ == "__main__":
    main()


==== Creating network model ====
Model creation time: 24.43 ms

==== Training network model ====
Loading data...
Starting training...
Epoch 1/10
Batch 0, loss: 2.4024
Batch 100, loss: 0.8233
Batch 200, loss: 0.5836
Epoch 2/10
Batch 0, loss: 0.2616
Batch 100, loss: 0.2623
Batch 200, loss: 0.2505
Epoch 3/10
Batch 0, loss: 0.2053
Batch 100, loss: 0.2046
Batch 200, loss: 0.1983
Epoch 4/10
Batch 0, loss: 0.1631
Batch 100, loss: 0.1704
Batch 200, loss: 0.1629
Epoch 5/10
Batch 0, loss: 0.1517
Batch 100, loss: 0.1459
Batch 200, loss: 0.1389
Epoch 6/10
Batch 0, loss: 0.1207
Batch 100, loss: 0.1207
Batch 200, loss: 0.1217
Epoch 7/10
Batch 0, loss: 0.1497
Batch 100, loss: 0.1076
Batch 200, loss: 0.1086
Epoch 8/10
Batch 0, loss: 0.1378
Batch 100, loss: 0.0966
Batch 200, loss: 0.0986
Epoch 9/10
Batch 0, loss: 0.1280
Batch 100, loss: 0.0859
Batch 200, loss: 0.0891
Epoch 10/10
Batch 0, loss: 0.0910
Batch 100, loss: 0.0783
Batch 200, loss: 0.0792
Training complete!
Evaluating model...
Test accuracy: 0