In [6]:
datasets = ['acuteinflammation',
            'balancescale',
            'breastcancerwisc',
            'cardiotocography3clases',
            'energyy1',
            'energyy2',
            'iris',
            'mammographic',
            'pendigits',
            'seeds',
            'tictactoe',
            'vertebralcolumn2clases',
            'vertebralcolumn3clases']

In [7]:
len(datasets)

13

In [12]:
import re
import os
import glob

def calculate_total_training_time(log_file_path):
    """
    Reads the log file, finds lines with 'Epoch: X' and 'Epoch time: Y',
    and computes total training time *assuming* 'Epoch time: Y' is
    the average time per epoch for the last (X - previous_epoch) epochs.
    """
    total_time = 0.0
    # This regex looks for a line containing something like:
    # | Epoch:    50 | Train loss: ... | Epoch time: 0.2 |
    pattern = re.compile(r'\|\s*Epoch:\s*(\d+)\s*\|.*Epoch time:\s+([\d.]+)')
    
    prev_epoch = None
    with open(log_file_path, 'r', encoding='utf-8') as f:
        for line in f:
            match = pattern.search(line)
            if match:
                current_epoch = int(match.group(1))
                epoch_time_avg = float(match.group(2))  # average time per epoch (assumed)
                
                if prev_epoch is None:
                    # For the very first logged epoch, assume that the block covers all epochs from 0 up to current_epoch
                    block_size = current_epoch + 1  # e.g., epoch=50 implies epochs 0..50 inclusive => 51 epochs
                else:
                    # Otherwise, the block covers (prev_epoch+1) .. current_epoch
                    block_size = current_epoch - prev_epoch
                
                # Multiply average time by the block size
                total_time += epoch_time_avg * block_size
                prev_epoch = current_epoch

    return total_time

if __name__ == "__main__":
    results = []  # This will hold our results
    baseline_total_array = []
    acts = ['tanh', 'relu', 'sigmoid', 'cr', 'none',]
    types = ['Normal', 'Robust', 'Mixed',]
    for a in acts:
        for t in types:
            for j, ds in enumerate(datasets):
                total = 0
                for i in range(0,10):
                    log_path = f'./{a}_10_FaultAnalysis{t}/log/data_{j:02}_{ds}_seed_{i:02d}_*.log'
                    matching_files = glob.glob(log_path)
                    if len(matching_files) == 0:
                        print(f"Log file not found: {log_path}")
                        continue
                    log_path = matching_files[0]
                    total_training_time = calculate_total_training_time(log_path)  # Use your function here
                    total += total_training_time
                baseline_total_array.append(total)
                minutes = int((total % 3600) // 60)
                seconds = total % 60
                days = int(total // 86400)
                hours = int((total % 86400) // 3600)
                results.append({
                    f'{t}_{a}': total,
                    # 'type': t,
                    'dataset': ds,
                    'total_seconds': total,
                    # 'days': days,
                    # 'hours': hours,
                    # 'minutes': minutes,
                    # 'seconds': seconds
                })
                print(f"Total training time for {ds} [{a}, {t}]: {days} days, {hours} hours, {minutes} minutes and {seconds:.2f} seconds")


Total training time for acuteinflammation [tanh, Normal]: 0 days, 8 hours, 54 minutes and 25.70 seconds
Total training time for balancescale [tanh, Normal]: 0 days, 8 hours, 25 minutes and 30.30 seconds
Total training time for breastcancerwisc [tanh, Normal]: 0 days, 9 hours, 54 minutes and 36.80 seconds
Total training time for cardiotocography3clases [tanh, Normal]: 0 days, 14 hours, 19 minutes and 36.30 seconds
Total training time for energyy1 [tanh, Normal]: 0 days, 10 hours, 44 minutes and 39.20 seconds
Total training time for energyy2 [tanh, Normal]: 0 days, 10 hours, 39 minutes and 55.10 seconds
Total training time for iris [tanh, Normal]: 0 days, 8 hours, 23 minutes and 29.40 seconds
Total training time for mammographic [tanh, Normal]: 0 days, 9 hours, 5 minutes and 1.00 seconds
Total training time for pendigits [tanh, Normal]: 0 days, 16 hours, 9 minutes and 20.80 seconds
Total training time for seeds [tanh, Normal]: 0 days, 9 hours, 29 minutes and 44.60 seconds
Total training 

In [13]:
results

[{'Normal_tanh': 32065.699999999997,
  'dataset': 'acuteinflammation',
  'total_seconds': 32065.699999999997},
 {'Normal_tanh': 30330.300000000007,
  'dataset': 'balancescale',
  'total_seconds': 30330.300000000007},
 {'Normal_tanh': 35676.8,
  'dataset': 'breastcancerwisc',
  'total_seconds': 35676.8},
 {'Normal_tanh': 51576.3,
  'dataset': 'cardiotocography3clases',
  'total_seconds': 51576.3},
 {'Normal_tanh': 38679.2, 'dataset': 'energyy1', 'total_seconds': 38679.2},
 {'Normal_tanh': 38395.100000000006,
  'dataset': 'energyy2',
  'total_seconds': 38395.100000000006},
 {'Normal_tanh': 30209.399999999998,
  'dataset': 'iris',
  'total_seconds': 30209.399999999998},
 {'Normal_tanh': 32700.999999999996,
  'dataset': 'mammographic',
  'total_seconds': 32700.999999999996},
 {'Normal_tanh': 58160.799999999996,
  'dataset': 'pendigits',
  'total_seconds': 58160.799999999996},
 {'Normal_tanh': 34184.6, 'dataset': 'seeds', 'total_seconds': 34184.6},
 {'Normal_tanh': 39151.3, 'dataset': 'tict

In [18]:
import pandas as pd

# Suppose 'results' is your list of dictionaries.
# We want to create a new list of dictionaries where each dictionary looks like:
# {'dataset': <dataset>, 'condition': <condition>, 'total_seconds': <value>}
new_results = []
for d in results:
    # Find the key that is not 'dataset' or 'total_seconds'
    condition = None
    for k in d:
        if k not in ['dataset', 'total_seconds']:
            condition = k
            break
    if condition is not None:
        new_results.append({
            'dataset': d['dataset'],
            'condition': condition,
            'total_seconds': d[condition]
        })

# Convert the new list to a DataFrame
df_long = pd.DataFrame(new_results)

# Now pivot the DataFrame:
pivot_df = df_long.pivot(index='dataset', columns='condition', values='total_seconds')

# Optional: sort the columns for clarity
pivot_df = pivot_df.sort_index(axis=1)

# Save the pivoted DataFrame to Excel
pivot_df.to_excel("training_times_pivot.xlsx", index=True)
print("Pivoted results saved to training_times_pivot.xlsx")


Pivoted results saved to training_times_pivot.xlsx


In [17]:
import pandas as pd
# Convert results to DataFrame.
df = pd.DataFrame(results)

columns = []
for a in acts:
    for t in types:
        columns.append(f'{t}_{a}')

# Pivot the DataFrame: rows = dataset, columns = condition, values = total_seconds.
pivot_df = df.pivot(index='dataset', columns=columns, values='total_seconds')


# Optionally sort the columns.
# pivot_df = pivot_df.sort_index(axis=1)

# Save the pivot table to an Excel file.
pivot_df.to_excel("training_times_pivot.xlsx")
print("Pivoted results saved to training_times_pivot.xlsx")

Pivoted results saved to training_times_pivot.xlsx


In [5]:
import re
import os

def calculate_total_training_time(log_file_path):
    """
    Reads the log file, finds lines with 'Epoch: X' and 'Epoch time: Y',
    and computes total training time *assuming* 'Epoch time: Y' is
    the average time per epoch for the last (X - previous_epoch) epochs.
    """
    total_time = 0.0
    # This regex looks for a line containing something like:
    # | Epoch:    50 | Train loss: ... | Epoch time: 0.2 |
    pattern = re.compile(r'\|\s*Epoch:\s*(\d+)\s*\|.*Epoch time:\s+([\d.]+)')
    
    prev_epoch = None
    with open(log_file_path, 'r', encoding='utf-8') as f:
        for line in f:
            match = pattern.search(line)
            if match:
                current_epoch = int(match.group(1))
                epoch_time_avg = float(match.group(2))  # average time per epoch (assumed)
                
                if prev_epoch is None:
                    # For the very first logged epoch, assume that the block covers all epochs from 0 up to current_epoch
                    block_size = current_epoch + 1  # e.g., epoch=50 implies epochs 0..50 inclusive => 51 epochs
                else:
                    # Otherwise, the block covers (prev_epoch+1) .. current_epoch
                    block_size = current_epoch - prev_epoch
                
                # Multiply average time by the block size
                total_time += epoch_time_avg * block_size
                prev_epoch = current_epoch

    return total_time

if __name__ == "__main__":
    ds = datasets[0]
    i = 0
    j = 0
    
    total = 0
    proposed_total_array = []
    # Replace 'path_to_your_log_file.log' with the actual path to your log file
    for i, ds in enumerate(datasets):
        total = 0
        for seed in range(0,1):
            log_path = f'/home/kit/itec/qc0876/projects/PowerAwarePNN/PowerAwarePNN/maincode/PowerAwareAugmentedLagrangian/log/{ds}_seed_{seed}_Penalty_AL_Factor_{int(target[0][i]):04d}.log'
            total_training_time = 0
            if os.path.exists(log_path):
                total_training_time = calculate_total_training_time(log_path)
            # else:
            #     print(f"Log file not found: {log_path}")
            # print(f"Total training time: {total_training_time} seconds")
            total += total_training_time
        proposed_total_array.append(total)
        minutes = int((total % 3600) // 60)
        seconds = total % 60
        days = int(total // 86400)
        hours = int((total % 86400) // 3600)
        print(f"Total training time for {ds}: {days} days, {hours} hours, {minutes} minutes and {seconds:.2f} seconds")


Total training time for data_acuteinflammation: 0 days, 0 hours, 20 minutes and 37.20 seconds
Total training time for data_balancescale: 0 days, 9 hours, 13 minutes and 25.50 seconds
Total training time for data_breastcancerwisc: 0 days, 0 hours, 33 minutes and 20.40 seconds
Total training time for data_cardiotocography3clases: 0 days, 8 hours, 40 minutes and 16.20 seconds
Total training time for data_energyy1: 0 days, 2 hours, 18 minutes and 30.60 seconds
Total training time for data_energyy2: 0 days, 0 hours, 30 minutes and 15.40 seconds
Total training time for data_iris: 0 days, 1 hours, 29 minutes and 0.40 seconds
Total training time for data_mammographic: 0 days, 0 hours, 29 minutes and 0.40 seconds
Total training time for data_pendigits: 0 days, 0 hours, 0 minutes and 0.00 seconds
Total training time for data_seeds: 0 days, 2 hours, 59 minutes and 20.40 seconds
Total training time for data_tictactoe: 0 days, 3 hours, 8 minutes and 10.50 seconds
Total training time for data_verteb

In [6]:
# Re-defining the data after environment reset


# Remove zeros from both lists
filtered_baseline = [b for b in baseline_total_array if b > 0]
filtered_proposed = [p for p in proposed_total_array if p > 0]

# Calculate average times
avg_baseline = sum(filtered_baseline) / len(filtered_baseline)
avg_proposed = sum(filtered_proposed) / len(filtered_proposed)

# Calculate reduction percentages
time_reduction = (1 - (avg_proposed / avg_baseline)) * 100
relative_time_ratio = avg_baseline / avg_proposed

avg_baseline, avg_proposed, time_reduction, relative_time_ratio


(498123.09166666795, 11244.816666666666, 97.74256266075867, 44.298018049797875)

In [7]:
498123.09166666795/60, 11244.816666666666/60

(8302.0515277778, 187.4136111111111)

In [14]:
2.264305555555555 + 187.4136111111111

189.67791666666665

In [2]:
18576.175/60, 1089.141666666647/60

(309.60291666666666, 18.152361111110785)

In [11]:
import re
import os

def calculate_total_training_time(log_file_path):
    """
    Reads the log file, finds lines with 'Epoch: X' and 'Epoch time: Y',
    and computes total training time *assuming* 'Epoch time: Y' is
    the average time per epoch for the last (X - previous_epoch) epochs.
    """
    total_time = 0.0
    # This regex looks for a line containing something like:
    # | Epoch:    50 | Train loss: ... | Epoch time: 0.2 |
    pattern = re.compile(r'\|\s*Epoch:\s*(\d+)\s*\|.*Epoch time:\s+([\d.]+)')
    
    prev_epoch = None
    with open(log_file_path, 'r', encoding='utf-8') as f:
        for line in f:
            match = pattern.search(line)
            if match:
                current_epoch = int(match.group(1))
                epoch_time_avg = float(match.group(2))  # average time per epoch (assumed)
                
                if prev_epoch is None:
                    # For the very first logged epoch, assume that the block covers all epochs from 0 up to current_epoch
                    block_size = current_epoch + 1  # e.g., epoch=50 implies epochs 0..50 inclusive => 51 epochs
                else:
                    # Otherwise, the block covers (prev_epoch+1) .. current_epoch
                    block_size = current_epoch - prev_epoch
                
                # Multiply average time by the block size
                total_time += epoch_time_avg * block_size
                prev_epoch = current_epoch

    return total_time

if __name__ == "__main__":
    ds = datasets[0]
    i = 0
    j = 0
    
    # Replace 'path_to_your_log_file.log' with the actual path to your log file
    mlflow_total_array = []
    for ds in datasets:
        total = 0
        log_path = f'/home/kit/itec/qc0876/projects/PowerAwarePNN/PowerAwarePNN/maincode/v3_MlflowPowerAwareAugmentedLagrangian/log/{ds}_seed_{0}_Penalty_AL_Factor_0000.log'
        total_training_time = 0
        if os.path.exists(log_path):
            total_training_time = calculate_total_training_time(log_path)
        else:
            print(f"Log file not found: {log_path}")
        # print(f"Total training time: {total_training_time} seconds")
        total += total_training_time
        mlflow_total_array.append(total)
        minutes = int((total % 3600) // 60)
        seconds = total % 60
        days = int(total // 86400)
        hours = int((total % 86400) // 3600)
        print(f"Total training time for {ds}: {days} days, {hours} hours, {minutes} minutes and {seconds:.2f} seconds")


Total training time for data_acuteinflammation: 0 days, 0 hours, 3 minutes and 10.30 seconds
Total training time for data_balancescale: 0 days, 0 hours, 0 minutes and 30.30 seconds
Total training time for data_breastcancerwisc: 0 days, 0 hours, 3 minutes and 10.40 seconds
Total training time for data_cardiotocography3clases: 0 days, 0 hours, 0 minutes and 46.10 seconds
Total training time for data_energyy1: 0 days, 0 hours, 3 minutes and 10.40 seconds
Total training time for data_energyy2: 0 days, 0 hours, 3 minutes and 15.30 seconds
Total training time for data_iris: 0 days, 0 hours, 2 minutes and 30.40 seconds
Total training time for data_mammographic: 0 days, 0 hours, 3 minutes and 15.40 seconds
Log file not found: /home/kit/itec/qc0876/projects/PowerAwarePNN/PowerAwarePNN/maincode/v3_MlflowPowerAwareAugmentedLagrangian/log/data_pendigits_seed_0_Penalty_AL_Factor_0000.log
Total training time for data_pendigits: 0 days, 0 hours, 0 minutes and 0.00 seconds
Total training time for data

In [12]:
# Re-defining the data after environment reset


# Remove zeros from both lists
filtered_mlflow = [b for b in mlflow_total_array if b > 0]

# Calculate average times
avg_mlflow = sum(filtered_mlflow) / len(filtered_mlflow)

avg_mlflow

135.85833333333332

In [13]:
135.85833333333332/60

2.264305555555555

In [16]:
import numpy as np
mean_total = avg_proposed
minutes = int((mean_total % 3600) // 60)
seconds = mean_total % 60
days = int(mean_total // 86400)
hours = int((mean_total % 86400) // 3600)
print(f"Total training time: {days} days, {hours} hours, {minutes} minutes and {seconds:.2f} seconds")


Total training time: 0 days, 0 hours, 18 minutes and 9.14 seconds


In [11]:
baseline_total_array, proposed_total_array

([27681.300000000003,
  11748.9,
  15644.69999999998,
  42854.00000000001,
  22705.099999999988,
  23772.300000000036,
  13790.399999999996,
  13965.899999999996,
  0,
  12467.299999999994,
  15468.499999999993,
  11545.899999999992,
  11269.8],
 [148.09999999999826,
  1870.4999999998458,
  85.09999999999948,
  2162.6000000000236,
  423.4000000000096,
  133.89999999999986,
  442.19999999999794,
  1395.599999999886,
  0,
  2193.3000000001725,
  1140.9999999999288,
  1828.6999999999882,
  1245.2999999999147])

In [93]:
baseline_total_array, proposed_total_array

([27681.300000000003,
  11748.9,
  15644.69999999998,
  42854.00000000001,
  22705.099999999988,
  23772.300000000036,
  13790.399999999996,
  13965.899999999996,
  0,
  12467.299999999994,
  15468.499999999993,
  11545.899999999992,
  11269.8],
 [148.09999999999826,
  1870.4999999998458,
  85.09999999999948,
  2162.6000000000236,
  423.4000000000096,
  133.89999999999986,
  442.19999999999794,
  1395.599999999886,
  0,
  2193.3000000001725,
  1140.9999999999288,
  1828.6999999999882,
  1245.2999999999147])

In [None]:
import re
from datetime import datetime

# log_file_path = '/home/kit/itec/qc0876/projects/PowerAwarePNN/PowerAwarePNN/maincode/PowerAwareAugmentedLagrangianHS/log/data_acuteinflammation_seed_0_Penalty_AL_Factor_0200.log'
baseline_total_array = []
for ds in datasets:
    total = 0
    for i in range(0,10):
        for j in range(0,100, 2):
            log_file_path = f'/home/kit/itec/qc0876/projects/PowerAwarePNN/PowerAwarePNN/maincode/PowerAwareBaselineHS/log/{ds}_seed_{i}_Penalty_power_Factor_{j}.log'
            # Read the log file
            if j == 58:
                continue
            try:
                with open(log_file_path, 'r') as file:
                    log_lines = file.readlines()
            except FileNotFoundError:
                print(f"Log file not found: {log_file_path}")
                continue
            

            # # Extract timestamps
            # timestamps = []
            # for line in log_lines:
            #     match = re.search(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}', line)
            #     if match:
            #         timestamps.append(datetime.strptime(match.group(), '%Y-%m-%d %H:%M:%S,%f'))

            # # Calculate total training time considering multiple sessions
            # Calculate total training time considering multiple sessions
            total_training_time = 0
            start_time = None

            for line in log_lines:
                if "Training network on device: cpu." in line:
                    start_time = None  # Reset the start time
                if "Training was already finished." in line:
                    break
                else:
                    match = re.search(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}', line)
                    if match:
                        current_time = datetime.strptime(match.group(), '%Y-%m-%d %H:%M:%S,%f')
                        if start_time is not None:
                            delta = (current_time - start_time).total_seconds()
                            total_training_time += delta
                        start_time = current_time
            # if timestamps:
            #     total_training_time = sum((timestamps[i] - timestamps[i - 1]).total_seconds() for i in range(1, len(timestamps)))
            #     print(f"Total training time: {total_training_time} seconds")
            # else:
            #     print("No timestamps found in the log file.")
            
            total += total_training_time
    minutes = int((total % 3600) // 60)
    seconds = total % 60
    days = int(total // 86400)
    hours = int((total % 86400) // 3600)
    print(f"Dataset: {ds}")
    print(f"Total training time: {days} days, {hours} hours, {minutes} minutes and {seconds:.2f} seconds")
    baseline_total_array.append(total)

In [67]:
baseline_total_array

[301452833.7129997,
 305486729.6469996,
 304775707.1890001,
 297958326.01500016,
 302933716.55099976,
 290528358.09800017,
 304824297.86800003,
 305147568.0889999,
 0,
 305334140.53900003,
 304810356.84200007,
 305606236.00200003,
 305684637.69299984]

In [68]:
import numpy as np
total = np.mean(baseline_total_array)
total = baseline_total_array[3]
total

297958326.01500016

In [52]:
baseline_total_array

[989629.0160000001,
 747224.6640000003,
 842461.8100000002,
 2910353.376000002,
 302405801.02400017,
 302047625.7630001,
 304408486.0039997,
 303469710.964,
 0,
 304245809.47299993,
 303908692.1930001,
 304897891.93299997,
 303762392.625]

In [69]:
minutes = int((total % 3600) // 60)
seconds = total % 60
days = int(total // 86400)
hours = int((total % 86400) // 3600)
print(f"Total training time: {days} days, {hours} hours, {minutes} minutes and {seconds:.2f} seconds")

Total training time: 82766 hours 12 minutes 6.02 seconds
Total training time: 3448 days, 14 hours, 12 minutes and 6.02 seconds


In [37]:
import re
from datetime import datetime

proposed_total_array = []
for i, ds in enumerate(datasets):
    log_file_path = f'/home/kit/itec/qc0876/projects/PowerAwarePNN/PowerAwarePNN/maincode/PowerAwareAugmentedLagrangianHS/log/{ds}_seed_0_Penalty_AL_Factor_{int(target[0][i]):04d}.log'
    # Read the log file
    try:
        with open(log_file_path, 'r') as file:
            log_lines = file.readlines()
    except FileNotFoundError:
        print(f"Log file not found: {log_file_path}")
        continue

    # Extract timestamps
    timestamps = []
    for line in log_lines:
        match = re.search(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}', line)
        if match:
            timestamps.append(datetime.strptime(match.group(), '%Y-%m-%d %H:%M:%S,%f'))

    # Calculate total training time considering multiple sessions
    if timestamps:
        total_training_time = sum((timestamps[i] - timestamps[i - 1]).total_seconds() for i in range(1, len(timestamps)))
        print(f"Total training time: {total_training_time} seconds")
    else:
        print("No timestamps found in the log file.")
    proposed_total_array.append(total_training_time)

Total training time: 421906.49700000003 seconds
Total training time: 752232.4269999999 seconds
Total training time: 752193.716 seconds
Total training time: 747732.25 seconds
Total training time: 702051.645 seconds
Total training time: 1031941.4550000001 seconds
Total training time: 1032028.729 seconds
Total training time: 1031941.9330000001 seconds
Log file not found: /home/kit/itec/qc0876/projects/PowerAwarePNN/PowerAwarePNN/maincode/PowerAwareAugmentedLagrangianHS/log/data_pendigits_seed_0_Penalty_AL_Factor_0600.log
Total training time: 1031907.553 seconds
Total training time: 1031826.9 seconds
Total training time: 1031823.306 seconds
Total training time: 1031789.056 seconds


In [12]:
hours = int(total_training_time // 3600)
minutes = int((total_training_time % 3600) // 60)
seconds = total_training_time % 60
print(f"Total training time: {hours} hours {minutes} minutes {seconds:.2f} seconds")
days = int(total_training_time // 86400)
hours = int((total_training_time % 86400) // 3600)
print(f"Total training time: {days} days, {hours} hours, {minutes} minutes and {seconds:.2f} seconds")

Total training time: 117 hours 11 minutes 46.50 seconds
Total training time: 4 days, 21 hours, 11 minutes and 46.50 seconds


In [7]:
import os
import sys
from pathlib import Path
sys.path.append(os.getcwd())
sys.path.append(str(Path(os.getcwd()).parent))
sys.path.append(os.path.join(os.getcwd(), 'utils'))
from configuration import *
import torch
import pprint
import pNN_Power_Aware as pNN
from utils import *

args = parser.parse_args([])
args = FormulateArgs(args)

ModuleNotFoundError: No module named 'configuration'

In [None]:

file_path = "./maincode/PowerAwareBaselineHS/models/pNN_data_tictactoe_seed_4_Penalty_power_Factor_68_FT.model"

model_PT = torch.load(file_path)

ModuleNotFoundError: No module named 'pNN_Power_Aware'