<a href="https://colab.research.google.com/github/Salahudin77/thesis/blob/main/python/hard_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist
import time
import gc
import os
import psutil
import numpy as np
from PIL import Image
import json
from datetime import datetime
import threading
from collections import defaultdict
import platform
import multiprocessing

In [14]:
class MemoryTracker:
    def __init__(self, interval_ms=10):
        self.interval_ms = interval_ms / 1000.0  # Convert to seconds
        self.process = psutil.Process(os.getpid())
        self.peak_memory = 0
        self.memory_samples = []
        self.is_running = False
        self.thread = None
        self.start_time = None
        self.end_time = None
        
    def _track_memory(self):
        while self.is_running:
            mem = self.process.memory_info().rss
            self.memory_samples.append({
                'timestamp': time.time(),
                'memory_bytes': mem,
                'memory_mb': mem / (1024 * 1024)
            })
            self.peak_memory = max(self.peak_memory, mem)
            time.sleep(self.interval_ms)
    
    def start(self):
        if not self.is_running:
            self.is_running = True
            self.start_time = time.time()
            self.thread = threading.Thread(target=self._track_memory)
            self.thread.daemon = True
            self.thread.start()
    
    def stop(self):
        if self.is_running:
            self.is_running = False
            self.end_time = time.time()
            if self.thread is not None:
                self.thread.join()
    
    def get_peak_memory_mb(self):
        return self.peak_memory / (1024 * 1024)
    
    def get_summary(self):
        duration = (self.end_time - self.start_time) * 1000 if self.end_time else 0
        return {
            'duration_ms': duration,
            'sample_count': len(self.memory_samples),
            'sample_interval_ms': self.interval_ms * 1000,
            'peak_memory_mb': self.get_peak_memory_mb(),
            'start_time': self.start_time,
            'end_time': self.end_time
        }
    
    def get_time_series_data(self):
        if not self.memory_samples:
            return []
        
        base_time = self.memory_samples[0]['timestamp']
        return [{
            'time_ms': (sample['timestamp'] - base_time) * 1000,
            'memory_mb': sample['memory_mb']
        } for sample in self.memory_samples]

class TrainingResult:
    def __init__(self, time_ms, memory_used_mb, cpu_time_ms, peak_memory_mb, final_accuracy, memory_profile):
        self.time_ms = time_ms
        self.memory_used_mb = memory_used_mb
        self.cpu_time_ms = cpu_time_ms
        self.peak_memory_mb = peak_memory_mb
        self.final_accuracy = final_accuracy
        self.memory_profile = memory_profile

class InferenceResult:
    def __init__(self, prediction, time_ms, memory_used_mb, cpu_time_ms, peak_memory_mb, memory_profile):
        self.prediction = prediction
        self.time_ms = time_ms
        self.memory_used_mb = memory_used_mb
        self.cpu_time_ms = cpu_time_ms
        self.peak_memory_mb = peak_memory_mb
        self.memory_profile = memory_profile

def create_hard_network():
    # Complex CNN with multiple convolutional layers - matching Java version exactly
    model = models.Sequential([
        # First conv layer: 20 filters of size 5x5, matching Java's ConvolutionLayer
        layers.Conv2D(20, (5, 5), strides=(1, 1), activation='relu', input_shape=(28, 28, 1)),
        
        # First pooling layer: 2x2 max pooling with stride 2x2
        layers.MaxPooling2D((2, 2), strides=(2, 2)),
        
        # Second conv layer: 50 filters of size 5x5
        layers.Conv2D(50, (5, 5), strides=(1, 1), activation='relu'),
        
        # Second pooling layer: 2x2 max pooling with stride 2x2
        layers.MaxPooling2D((2, 2), strides=(2, 2)),
        
        # Flatten layer
        layers.Flatten(),
        
        # Dense layer with 256 neurons
        layers.Dense(64, activation='relu'),
        
        # Output layer with 10 neurons (for 10 digits)
        layers.Dense(10, activation='softmax')
    ])
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(0.001),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

def measure_training_performance(model):
    gc.collect()
    time.sleep(0.1)
    
    # Get initial memory usage
    process = psutil.Process(os.getpid())
    start_mem = process.memory_info().rss
    start_time = time.time()
    
    # Training configuration matching Java version
    batch_size = 64
    num_epochs = 10
    
    # Load training data
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0
    x_train = x_train.reshape(-1, 28, 28, 1)
    x_test = x_test.reshape(-1, 28, 28, 1)
    
    # Start memory tracking
    memory_tracker = MemoryTracker(500)  # 500ms interval like Java version
    memory_tracker.start()
    
    # Training loop
    print("Starting training...")
    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        model.fit(
            x_train, y_train,
            batch_size=batch_size,
            epochs=1,
            verbose=0
        )
    
    print("Training complete!")
    
    # Evaluate the model
    test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
    print(f"Test accuracy: {test_acc:.4f}")
    
    # Stop memory tracking
    memory_tracker.stop()
    end_time = time.time()
    end_mem = process.memory_info().rss
    
    # Calculate metrics
    time_ms = (end_time - start_time) * 1000
    memory_used_mb = (end_mem - start_mem) / (1024 * 1024)
    peak_memory_mb = memory_tracker.get_peak_memory_mb()
    
    # Create memory profile
    memory_profile = {
        'summary': memory_tracker.get_summary(),
        'time_series': memory_tracker.get_time_series_data()
    }
    
    print("\n===== Training Performance =====")
    print(f"Training time: {time_ms:.2f} ms ({time_ms/1000:.2f} seconds)")
    print(f"Memory used (end-start): {memory_used_mb:.2f} MB")
    print(f"Peak memory used: {peak_memory_mb:.2f} MB")
    print(f"Final model accuracy: {test_acc*100:.2f}%")
    
    return TrainingResult(time_ms, memory_used_mb, -1, peak_memory_mb, test_acc, memory_profile)

def test_network(model, image_path, runs=5):
    # Warm-up phase
    print("Performing warm-up iterations...")
    for _ in range(10):
        warmup_iteration(model, image_path)
    print("Warm-up complete, starting benchmark...")
    
    gc.collect()
    time.sleep(0.5)
    
    inference_results = []
    
    for i in range(runs):
        print(f"\n--- Test Run {i+1} ---")
        result = run_inference(model, image_path)
        inference_results.append(result)
    
    # Aggregate results
    avg_time = np.mean([r.time_ms for r in inference_results])
    std_dev_time = np.std([r.time_ms for r in inference_results])
    avg_memory = np.mean([r.memory_used_mb for r in inference_results])
    std_dev_memory = np.std([r.memory_used_mb for r in inference_results])
    avg_peak_memory = np.mean([r.peak_memory_mb for r in inference_results])
    std_dev_peak_memory = np.std([r.peak_memory_mb for r in inference_results])
    
    # Find most common prediction
    predictions = [r.prediction for r in inference_results]
    common_prediction = max(set(predictions), key=predictions.count)
    
    print("\n===== Average Inference Results After 5 Runs =====")
    print(f"Most common prediction: {common_prediction}")
    print(f"Average execution time: {avg_time:.2f} ms (±{std_dev_time:.2f})")
    print(f"Average memory used: {avg_memory:.2f} MB (±{std_dev_memory:.2f})")
    print(f"Average peak memory: {avg_peak_memory:.2f} MB (±{std_dev_peak_memory:.2f})")
    
    return inference_results, avg_time, std_dev_time, avg_memory, std_dev_memory, avg_peak_memory, std_dev_peak_memory, common_prediction

def run_inference(model, image_path):
    gc.collect()
    time.sleep(0.1)
    
    process = psutil.Process(os.getpid())
    start_mem = process.memory_info().rss
    start_time = time.time()
    
    # Start memory tracking with 10ms intervals
    memory_tracker = MemoryTracker(10)
    memory_tracker.start()
    
    try:
        # Load and preprocess image
        img = Image.open(image_path).convert('L')
        img = img.resize((28, 28))
        img_array = np.array(img) / 255.0
        img_array = img_array.reshape(1, 28, 28, 1)
        
        # Run prediction
        predictions = model.predict(img_array, verbose=0)
        prediction = np.argmax(predictions[0])
        
    except Exception as e:
        print(f"Error processing image: {e}")
        prediction = -1
    
    end_time = time.time()
    memory_tracker.stop()
    end_mem = process.memory_info().rss
    
    # Calculate metrics
    time_ms = (end_time - start_time) * 1000
    memory_used_mb = (end_mem - start_mem) / (1024 * 1024)
    peak_memory_mb = memory_tracker.get_peak_memory_mb()
    
    # Create memory profile
    memory_profile = {
        'summary': memory_tracker.get_summary(),
        'time_series': memory_tracker.get_time_series_data()
    }
    
    print(f"Prediction: {prediction}")
    print(f"Execution time: {time_ms:.2f} ms")
    print(f"Memory used (end-start): {memory_used_mb:.2f} MB")
    print(f"Peak memory used: {peak_memory_mb:.2f} MB")
    
    return InferenceResult(prediction, time_ms, memory_used_mb, -1, peak_memory_mb, memory_profile)

def warmup_iteration(model, image_path):
    try:
        img = Image.open(image_path).convert('L')
        img = img.resize((28, 28))
        img_array = np.array(img) / 255.0
        img_array = img_array.reshape(1, 28, 28, 1)
        model.predict(img_array, verbose=0)
    except:
        pass

def save_to_json(inference_results, training_result, avg_time, std_dev_time, 
                avg_memory, std_dev_memory, avg_peak_memory, std_dev_peak_memory,
                common_prediction, filename="hard_memory_results64.json"):
    
    # Convert numpy types to native Python types
    def convert(o):
        if isinstance(o, np.generic):
            return o.item()
        if isinstance(o, dict):
            return {k: convert(v) for k, v in o.items()}
        if isinstance(o, (list, tuple)):
            return [convert(i) for i in o]
        return o
    
    root = {
        'timestamp': datetime.now().isoformat(),
        'system_info': get_system_info(),
        'training': {
            'execution_time_ms': convert(training_result.time_ms),
            'execution_time_sec': convert(training_result.time_ms / 1000),
            'cpu_time_ms': convert(training_result.cpu_time_ms),
            'memory_used_mb': convert(training_result.memory_used_mb),
            'peak_memory_mb': convert(training_result.peak_memory_mb),
            'final_accuracy': convert(training_result.final_accuracy),
            'memory_profile': convert(training_result.memory_profile['summary'])
        },
        'inference_runs': [{
            'run': i+1,
            'prediction': convert(r.prediction),
            'execution_time_ms': convert(r.time_ms),
            'cpu_time_ms': convert(r.cpu_time_ms),
            'memory_used_mb': convert(r.memory_used_mb),
            'peak_memory_mb': convert(r.peak_memory_mb),
            'memory_profile': convert(r.memory_profile['summary'])
        } for i, r in enumerate(inference_results)],
        'inference_summary': {
            'average_execution_time_ms': convert(avg_time),
            'std_dev_execution_time_ms': convert(std_dev_time),
            'average_memory_used_mb': convert(avg_memory),
            'std_dev_memory_used_mb': convert(std_dev_memory),
            'average_peak_memory_mb': convert(avg_peak_memory),
            'std_dev_peak_memory_mb': convert(std_dev_peak_memory),
            'most_common_prediction': convert(common_prediction)
        }
    }
    
    with open(filename, 'w') as f:
        json.dump(root, f, indent=4)
    print(f"Results exported to {filename}")

def get_system_info():
    return {
        'available_processors': multiprocessing.cpu_count(),
        'max_memory_mb': psutil.virtual_memory().total / (1024 * 1024),
        'os_name': platform.system(),
        'os_version': platform.release(),
        'os_arch': platform.machine(),
        'python_version': platform.python_version(),
        'tensorflow_version': tf.__version__
    }


In [15]:
def main():
    # Create the model
    model = create_hard_network()
    model.summary()
    
    # Measure training performance
    print("Starting training performance measurement...")
    training_result = measure_training_performance(model)
    
    # Test the network
    image_path = "träningsbilder/testSample/img_1.jpg"
    try:
        inference_results, avg_time, std_dev_time, avg_memory, std_dev_memory, \
        avg_peak_memory, std_dev_peak_memory, common_prediction = test_network(model, image_path)
        
        # Save combined results to JSON
        save_to_json(inference_results, training_result, avg_time, std_dev_time, 
                    avg_memory, std_dev_memory, avg_peak_memory, std_dev_peak_memory,
                    common_prediction)
        
    except FileNotFoundError:
        print(f"Test image not found at {image_path}. Testing on a random test sample instead.")
        (_, _), (x_test, y_test) = mnist.load_data()
        test_idx = np.random.randint(0, len(x_test))
        test_img = x_test[test_idx] / 255.0
        test_img = test_img.reshape(1, 28, 28, 1)
        
        # Run a single inference
        predictions = model.predict(test_img, verbose=0)
        prediction = np.argmax(predictions[0])
        print(f"Test sample true label: {y_test[test_idx]}, prediction: {prediction}")
    
    # Save the trained model
    model.save("trained-hard-model.h5")
    print("Model saved to trained-hard-model.h5")

if __name__ == "__main__":
    main()


Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_8 (Conv2D)           (None, 24, 24, 20)        520       
                                                                 
 max_pooling2d_8 (MaxPooling  (None, 12, 12, 20)       0         
 2D)                                                             
                                                                 
 conv2d_9 (Conv2D)           (None, 8, 8, 50)          25050     
                                                                 
 max_pooling2d_9 (MaxPooling  (None, 4, 4, 50)         0         
 2D)                                                             
                                                                 
 flatten_4 (Flatten)         (None, 800)               0         
                                                                 
 dense_8 (Dense)             (None, 64)               