# Instalamos las dependencias
Necesitamos numpy y pandas

In [1]:
#!conda install -y numpy pandas 
%pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import pandas as pd

MIN_VALUE = 1e7

LOW_CPU_MAX = 1e9
MID_CPU_MAX = 5e9
HIGH_CPU_MAX = 1e11

LOW_COM_MAX = 1e6
MID_COM_MAX = 1e8
HIGH_COM_MAX = 1e9

def compute_walltime(cpu, com, np_val, scale_factor=2200):
    if np_val <= 0:
        raise ValueError("np_val debe ser un número positivo.")
    
    # Factor base de tiempo proporcional a la suma de CPU y COM
    base_time = (cpu + com) / (HIGH_CPU_MAX + HIGH_COM_MAX)
        
    # Ajustar el factor base de tiempo por la cantidad de recursos
    walltime = base_time * scale_factor / np_val
        
    return walltime


df_profiles = pd.read_json('profiles.json')
df_jobs = pd.read_csv('../../output/easy-backfill/easy-backfill_jobs.csv')

# Lista para almacenar las diferencias entre walltime y execution_time
relative_errors = []
recalculated_walltimes = []

# Recorrer todos los trabajos de df_jobs
for index, row in df_jobs.iterrows():
    print(row[['profile', 'execution_time']])
    
    profile_name = row['profile']
    
    # Usa row['profile'] como clave para buscar en df_profiles
    if profile_name in df_profiles.columns:
        profile_info = df_profiles[profile_name]
        print(profile_info)
        
        # Obtén los valores necesarios de profile_info
        cpu = profile_info.loc['cpu']
        com = profile_info.loc['com']
        np_val = profile_info.loc['np']
        
        # Recalcula walltime utilizando la función compute_walltime
        recalculated_walltime = compute_walltime(cpu, com, np_val)
        recalculated_walltimes.append(recalculated_walltime)
        
        execution_time = row['execution_time']
        
        # Calcula el porcentaje de error relativo y añádelo a la lista
        if execution_time != 0:  # Asegurarse de que no haya división por cero
            relative_error = ((recalculated_walltime - execution_time) / execution_time) * 100
            relative_errors.append(relative_error)
            
            print(f"Walltime recalculado para {profile_name}: {recalculated_walltime} - {execution_time} = {relative_error}%")
        else:
            print(f"{profile_name} tiene un execution_time de 0.")

print('------------------')
# Calcula estadísticas sobre los errores relativos
if relative_errors:
    mean_relative_error = np.mean(relative_errors)
    std_relative_error = np.std(relative_errors)
    
    print(f"Media del error relativo entre walltime recalculado y execution_time: {mean_relative_error}%")
    print(f"Desviación estándar del error relativo entre walltime recalculado y execution_time: {std_relative_error}%")

# Añade una nueva columna al DataFrame df_jobs con los walltimes recalculados
df_jobs['recalculated_walltime'] = recalculated_walltimes


profile            low_23
execution_time    7.08488
Name: 0, dtype: object
type        parallel_homogeneous
cpu                   3.4356e+08
com                       928580
np                             1
walltime                 12.2788
Name: low_23, dtype: object
Walltime recalculado para low_23: 7.503706343165772 - 7.084875 = 5.911626431881597%
profile            low_60
execution_time    3.58278
Name: 1, dtype: object
type        parallel_homogeneous
cpu                  1.73736e+08
com                       223750
np                             1
walltime                 6.20056
Name: low_60, dtype: object
Walltime recalculado para low_60: 3.7892297802419637 - 3.5827839999999997 = 5.762160940820433%
profile            med_76
execution_time    56.2442
Name: 2, dtype: object
type        parallel_homogeneous
cpu                  2.72739e+09
com                  6.07562e+07
np                             1
walltime                 99.3796
Name: med_76, dtype: object
Walltime recalcul

In [3]:
from src.ProfileGenerator import ProfileGenerator

In [4]:
# Crear una instancia del generador de perfiles
generator = ProfileGenerator(max_resources=4)

# Generar perfiles
number_of_profiles = 50
generator.generate(number_of_profiles, low_percent=0.4, med_percent=0.6, high_percent=0.0)

{'low_1': {'type': 'parallel_homogeneous',
  'cpu': 904220766.9358256,
  'com': 414025.99862903706,
  'np': 1,
  'walltime': 19.704916281740598},
 'low_2': {'type': 'parallel_homogeneous',
  'cpu': 615282404.9142865,
  'com': 726299.1140083517,
  'np': 1,
  'walltime': 13.418011374873748},
 'low_3': {'type': 'parallel_homogeneous',
  'cpu': 616237626.9734203,
  'com': 642160.094532098,
  'np': 1,
  'walltime': 13.436985460886092},
 'low_4': {'type': 'parallel_homogeneous',
  'cpu': 249822431.99324274,
  'com': 958361.027464606,
  'np': 1,
  'walltime': 5.462551927183725},
 'low_5': {'type': 'parallel_homogeneous',
  'cpu': 219066790.40798578,
  'com': 721943.1670575931,
  'np': 1,
  'walltime': 4.787477365000945},
 'low_6': {'type': 'parallel_homogeneous',
  'cpu': 62279189.43804058,
  'com': 451125.1088800771,
  'np': 1,
  'walltime': 1.366402891121044},
 'low_7': {'type': 'parallel_homogeneous',
  'cpu': 192099714.4879804,
  'com': 743649.3923592097,
  'np': 1,
  'walltime': 4.200548

In [5]:
# Convertir los perfiles a un DataFrame de pandas y mostrarlo
df = generator.to_dataframe()
df.head()

Unnamed: 0,type,cpu,com,np,walltime
low_1,parallel_homogeneous,904220800.0,414025.998629,1,19.704916
low_2,parallel_homogeneous,615282400.0,726299.114008,1,13.418011
low_3,parallel_homogeneous,616237600.0,642160.094532,1,13.436985
low_4,parallel_homogeneous,249822400.0,958361.027465,1,5.462552
low_5,parallel_homogeneous,219066800.0,721943.167058,1,4.787477


In [6]:
# Convertir los perfiles a una representación JSON y mostrarla
json_representation = generator.to_json()