In [None]:
import script_maker2000 as scr_mkr
import numpy as np
import plotly.express as px
from collections import defaultdict
from molmass import Formula

file_path = r"C:\Users\AK\Documents\Daskalakis_Gwydion\AC-Forschi\cpu_benchmark\cpu_benchmark_job_backup.json"

In [None]:
jb = scr_mkr.analysis.load_job_backup(file_path)

columns_with_units = [
    "CPUTimeRAW",
    "ElapsedRaw",
    "TimelimitRaw",
    "ConsumedEnergyRaw",
    "MaxDiskRead",
    "MaxDiskWrite",
    "MaxVMSize",
    "ReqMem",
]
filter_list = ["PBEh_3c_opt", "r2SCAN3c", "PBEh3c_freq", "B3LYP_D4", "PBEh3c"]

df = scr_mkr.analysis.extract_efficency_dataframe(
    jb, columns_with_units=columns_with_units, filter_list=filter_list
)

In [None]:
df

In [None]:
df_filtered = scr_mkr.analysis.filter_dataframe(df, "B3LYP_D4")
df_filtered.head()

In [None]:
df_filtered.shape

In [None]:
hist = [513, 3208, 4243, 2810, 1402, 601, 304, 134, 54, 20, 14, 9, 4, 0, 1, 3]
bin_edges = [
    30.102,
    156.0416875,
    281.981375,
    407.9210625,
    533.86075,
    659.8004375,
    785.740125,
    911.6798125,
    1037.6195,
    1163.5591875,
    1289.498875,
    1415.4385625,
    1541.37825,
    1667.3179375,
    1793.257625,
    1919.1973125,
    2045.137,
]

In [None]:
bin_list_dict = {}

for mol in df_filtered.index.get_level_values("Mol_Id").unique():
    print(Formula(mol).mass)
    bin_list_dict[mol] = np.digitize(Formula(mol).mass, bin_edges)
print(bin_list_dict)

n, count = np.unique(list(bin_list_dict.values()), return_counts=True)

In [None]:
scr_mkr.analysis.plot_efficiency(df_filtered, "mol_weight", "ElapsedRaw")

In [None]:
scr_mkr.analysis.plot_efficiency(df_filtered, "NCPUS", "ElapsedRaw")

In [None]:
methos_filters = ["B3LYP_D4", "PBEh3c", "PBEh3c_freq", "r2SCAN3c"]
n_jobs = 60
single_scan_results = scr_mkr.analysis.estimate_runtime_all_methods(
    df, filter_methods=methos_filters, n_jobs=n_jobs, available_cpus=48
)

px.bar(
    single_scan_results.reset_index(),
    x="index",
    y=[1, 2, 4, 8, 12, 16],
    barmode="group",
    title=f"Estimated total run time for {n_jobs} jobs for different number of CPUs",
    labels={
        "value": "Total run time [h]",
        "index": "Method",
        "variable": "Number of CPUs",
    },
    text_auto=".2f",
)

In [None]:
normalized_df = single_scan_results / single_scan_results.values.max()
normalized_df
px.bar(
    normalized_df.reset_index(),
    x="index",
    y=[1, 2, 4, 8, 12, 16],
    barmode="group",
    title=f"Estimated total run time for {n_jobs} jobs for different number of CPUs",
    labels={
        "value": "Normalized runtime",
        "index": "Method",
        "variable": "Number of CPUs",
    },
    text_auto=".2f",
)

In [None]:
# scan for most efficient cpu count
methos_filters = ["B3LYP_D4", "PBEh3c", "PBEh3c_freq", "r2SCAN3c"]

result_dict = defaultdict(dict)

scan_df = scr_mkr.analysis.scan_optimal_cpu_count(
    df, filter_methods=methos_filters, min_runs=1, max_runs=120, step_size=1
)
scan_df.head(10)

In [None]:
px.scatter(
    scan_df,
    x=scan_df.index,
    y=scan_df.columns,
    title="Optimal number of CPUs for different methods",
    labels={"index": "Number of calculations", "value": "Optimal number of CPUs"},
)