# Starexec Data Extraction

#### Imports

In [None]:
import pandas as pd
import os
import cProfile
import re
import sys
from pathlib import Path
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

starexec_outputs = "analysis_data/starexec_outputs"
job_index = 5151
problem_folder = starexec_outputs + f"/Job{str(job_index)}_output/"

/Users/jannisgehring/VSCode/coursework-pyres/src


### StarExec csv output

In [2]:
df_se_csv = pd.read_csv(problem_folder + f"Job{str(job_index)}_info.csv")
df_se_csv
# df_se_csv.sort_values("wallclock time").reset_index(drop=True)[["cpu time","wallclock time"]].plot(style=".")
# plt.legend()

Unnamed: 0,pair id,benchmark,benchmark id,solver,solver id,configuration,configuration id,status,cpu time,wallclock time,memory usage,result,expected,SZSResult,SZSOutput,SZSStatus
0,129713277,jannis_gehring/FakeProblem.p,4249265,0.0.0,3749,PyRes_default,400622,complete,0.283572,0.271146,119284.0,Inappropriate,,IAP-Non,Non,IAP
1,129713276,jannis_gehring/PUZ001+1.p,4249264,0.0.0,3749,PyRes_default,400622,complete,1.805580,1.959170,280876.0,Theorem,Theorem,THM-CRf,CRf,THM
2,129713278,jannis_gehring/PUZ001-1.p,4249297,0.0.0,3749,PyRes_default,400622,complete,0.652491,0.811316,272700.0,Unsatisfiable,Unsatisfiable,UNS-CRf,CRf,UNS
3,129696380,jannis_gehring/TPTP v9.0.0/Problems/KRS/KRS001...,4176249,0.0.0,3749,PyRes_default,400622,complete,0.686994,0.823224,272700.0,Unsatisfiable,Unsatisfiable,UNS-CRf,CRf,UNS
4,129696381,jannis_gehring/TPTP v9.0.0/Problems/KRS/KRS002...,4176250,0.0.0,3749,PyRes_default,400622,complete,0.481014,0.618230,255424.0,Unsatisfiable,Unsatisfiable,UNS-CRf,CRf,UNS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28194,129724574,jannis_gehring/TPTP v9.0.0/Problems/KLE/KLE178...,4176244,0.0.0,3749,PyRes_default,400622,timeout (cpu),5.080000,5.252670,317756.0,ResourceOut,Satisfiable,UNK-Non,Non,UNK
28195,129724575,jannis_gehring/TPTP v9.0.0/Problems/KLE/KLE179...,4176245,0.0.0,3749,PyRes_default,400622,timeout (cpu),5.030000,5.236910,316732.0,ResourceOut,Satisfiable,UNK-Non,Non,UNK
28196,129724576,jannis_gehring/TPTP v9.0.0/Problems/KLE/KLE180...,4176246,0.0.0,3749,PyRes_default,400622,timeout (cpu),5.060000,5.235130,324908.0,ResourceOut,Satisfiable,UNK-Non,Non,UNK
28197,129724577,jannis_gehring/TPTP v9.0.0/Problems/KLE/KLE181...,4176247,0.0.0,3749,PyRes_default,400622,timeout (cpu),5.020000,5.192470,320828.0,ResourceOut,Satisfiable,UNK-Non,Non,UNK


### StarExec individual job files

In [3]:
load_from_file = True
store = False
if load_from_file:
    df_se_indi = pd.read_csv(problem_folder + f"df_se_indi_{str(job_index)}.csv", index_col=0)
else:
    print(f"Searching in {problem_folder}")

    # Get file texts
    file_dir_names = [
        subdir+"/"+files[0]
        for (subdir, dirs, files) in os.walk(problem_folder + "jannis_gehring")
        if len(dirs)==0
    ]
    texts = []
    for full_file_name in tqdm(file_dir_names):
        if not os.path.isfile(full_file_name):
            texts.append(None)
            continue
        with open(full_file_name) as full_file_name:
            s_mp_runs = full_file_name.read()
            texts.append(s_mp_runs)
    df_se_indi = pd.DataFrame({
        "file_dir_name": file_dir_names,
        "text": texts,
    })

    # Search through problems with regex
    # todo: fix: this regex patterns doesnt allow for lines starting with 79.84/10.64
    new_column_specs = [
        ("problem",                         r"(?<=Problem    : )(\S+)(?= : )",               str),
        ("python_version",                  r"(?<=Python )(\S+)",                            str),
        ("cpu_limit",                       r"(?<=% CPULimit   : )\S+",                      int),
        ("pyres_version",                   r"(?<=% Version:  )(\S+)",                       str),
        ("rel_distance",                    r"(?<=# rel_distance: )\S+",                     int),
        ("graph_construction_time",         r"(?<=# graph_construction_time: )(\S+)",        float),
        ("neighbourhood_computation_time",  r"(?<=# neighbourhood_computation_time: )(\S+)", float),
        ("szs_status",                      r"(?<=% SZS status )(\S+)",                      str),
        ("initial_clauses",                 r"(?<=% Initial clauses    : )(\S+)",            int),
        ("processed_clauses",               r"(?<=% Processed clauses  : )(\S+)", 			 int),
        ("factors_computed",                r"(?<=% Factors computed   : )(\S+)", 			 int),
        ("resolvents_computed",             r"(?<=% Resolvents computed: )(\S+)", 			 int),
        ("tautologies_deleted",             r"(?<=% Tautologies deleted: )(\S+)", 			 int),
        ("forward_subsumed",                r"(?<=% Forward subsumed   : )(\S+)", 			 int),
        ("backward_subsumed",               r"(?<=% Backward subsumed  : )(\S+)", 			 int),
        ("user_time",                       r"(?<=% User time          : )(\S+)", 			 float),
        ("system_time",                     r"(?<=% System time        : )(\S+)", 			 float),
        ("total_time",                      r"(?<=% Total time         : )(\S+)", 			 float),
    ]

    for new_column_name, pattern, col_type in tqdm(new_column_specs, desc="Searching for regex patterns"):
        df_se_indi[new_column_name] = (
            df_se_indi["text"]
            .apply(lambda text: re.search(pattern + r"|$", text)[0])
            .apply(lambda text: col_type(text) if text !="" else None)
        )
    df_se_indi = df_se_indi.drop(["text"], axis="columns")

if store:
    df_se_indi.to_csv(problem_folder + f"df_se_indi_{str(job_index)}.csv")

df_se_indi

Unnamed: 0,file_dir_name,problem,python_version,cpu_limit,pyres_version,rel_distance,graph_construction_time,neighbourhood_computation_time,szs_status,initial_clauses,processed_clauses,factors_computed,resolvents_computed,tautologies_deleted,forward_subsumed,backward_subsumed,user_time,system_time,total_time
0,/Users/jannisgehring/VSCode/coursework-pyres/s...,ITP088^7,3.13.0,,,,,,Inappropriate,,,,,,,,,,
1,/Users/jannisgehring/VSCode/coursework-pyres/s...,ITP069_5,3.13.0,,,,,,Inappropriate,,,,,,,,,,
2,/Users/jannisgehring/VSCode/coursework-pyres/s...,ITP087^4,3.13.0,,,,,,Inappropriate,,,,,,,,,,
3,/Users/jannisgehring/VSCode/coursework-pyres/s...,ITP122^7,3.13.0,,,,,,Inappropriate,,,,,,,,,,
4,/Users/jannisgehring/VSCode/coursework-pyres/s...,ITP068^4,3.13.0,,,,,,Inappropriate,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28194,/Users/jannisgehring/VSCode/coursework-pyres/s...,MED008+1,3.13.0,5.0,1.5,,,,ResourceOut,,,,,,,,,,
28195,/Users/jannisgehring/VSCode/coursework-pyres/s...,MED009+1,3.13.0,5.0,,,,,,,,,,,,,,,
28196,/Users/jannisgehring/VSCode/coursework-pyres/s...,,3.13.0,,,,,,Inappropriate,,,,,,,,,,
28197,/Users/jannisgehring/VSCode/coursework-pyres/s...,PUZ001+1,3.13.0,5.0,1.5,,,,Theorem,22.0,304.0,33.0,4844.0,4.0,689.0,12.0,1.410,0.031,1.441
