In [3]:
import os
import re
import csv

In [16]:
print(csv.__version__)

1.0


In [17]:
def extract_grh_filename(filename):
    match = re.match(r'^log_fmpcf_(.+?)_l_\d+_\d+(?:\.\d+)?\.txt$',filename)
    if match:
        return match.group(1)
    return filename

In [18]:
#Test cases to check if the extract_grh_file_naem function correctly or not

print(f"{extract_grh_filename('log_fmpcf_small_world_graph_m_40_l_3_0.9.txt')}")
print(f"{extract_grh_filename('log_fmpcf_scale_free_graph_m_45_1_n_279611_3rd_time_l_7_0.9.txt')}")
print(f"{extract_grh_filename('log_fmpcf_scale_free_graph_m_26_2nd_time_l_6_0.9.txt')}")
print(f"{extract_grh_filename('log_fmpcf_small_world_graph_m_31_2nd_time_l_1_0.9.txt')}")
print(f"{extract_grh_filename('log_fmpcf_176bit_LCC_remapped_l_3_0.9.txt')}")

small_world_graph_m_40
scale_free_graph_m_45_1_n_279611_3rd_time
scale_free_graph_m_26_2nd_time
small_world_graph_m_31_2nd_time
176bit_LCC_remapped


In [19]:
def get_m_p_n_from_filename(log_file_path):
    filename = os.path.basename(log_file_path)
    m = p = n = None
    
    #get the m from the filename
    m_match = re.search(r'm_(\d+)',filename)
    m = m_match.group(1) if m_match else "N/A"
    
    #get the p from the filename
    p_match = re.search(r'p_([\d.]+)',filename)
    p = p_match.group(1) if p_match else "N/A"
    
    # get the n from the filename
    n_match = re.search(r'n_(\d+)',filename)
    n = n_match.group(1) if n_match else "N/A"
    
    return m,p,n

In [20]:
def parse_log_file(log_file_path):
    m = p = n = R = min_core = degeneracy = upper_bound = edge_count = sub_edge_count = None
    with open(log_file_path,"r") as file:
        for line in file:
            if line.startswith("R"):
                R = line.strip().split("=")[1].strip()
            elif "minimum core" in line:
                min_core = line.strip().split("=")[1].strip()
            elif "degeneracy" in line:
                degeneracy = line.strip().split("=")[-1].strip()
            elif "Upper-bound" in line:
                upper_bound = line.strip().split("=")[-1].strip()
            elif "#edges" in line:
                match = re.search(r'#edges\s*=\s*(\d+),\s*#edges\(\*sub\)\s*=\s*(\d+)',line)
                if match:
                    edge_count = match.group(1)
                    sub_edge_count = match.group(2)
                    
        m, p, n = get_m_p_n_from_filename(log_file_path)
    
    return m,p,n,R,min_core,degeneracy,upper_bound,edge_count,sub_edge_count

In [21]:
def parse_time_file(time_file_path):
    with open(time_file_path,"r") as file:
        lines = file.readlines()
        
        if "Command exited with non-zero status 143" in lines[0]:
            return None
        
        for line in lines:
            match = re.search(r'TIME=(\d+\.\d+)\+(\d+\.\d+)',line)
            
            if match:
                return float(match.group(1)) + float(match.group(2))
    return None

In [22]:
def process_grh(folder_path):
    files = sorted(os.listdir(folder_path))
    
    num_groups = len(files)//2
    
    log_files = files[:num_groups] 
    time_files = files[num_groups:] 
    
    results = []
    
    for i in range(0,num_groups,8):
        log_group = log_files[i:i+8]
        time_group = time_files[i:i+8]
        
        grh_name = extract_grh_filename(log_group[0])
        
        skip = False
        times=[]
        
        for t_file in time_group:
            t_file_path = os.path.join(folder_path,t_file)
            exec_time = parse_time_file(t_file_path)
            
            if exec_time is None:
                skip = True
                break
            times.append(exec_time)
        
        if skip:
            continue
        
        log_file_path = os.path.join(folder_path, log_group[0])
        
        m,p,n,R,min_core,degeneracy,upper_bound,edge_count,sub_edge_count = parse_log_file(log_file_path)
        
        
        results.append({
            "grh_name" : grh_name,
            "m" : m,
            "p" : p,
            "n" : n,
            "R" : R,
            "min_core" : min_core,
            "degeneracy" : degeneracy,
            "upper_bound" : upper_bound,
            "edge" : edge_count,
            "sub_edge" : sub_edge_count,
            "times" : times,
        })
        
    return results

In [23]:
def save_to_csv(results,output_path):
    with open(output_path,"a", newline="") as file:
        writer = csv.writer(file)
        
        header = ["grh_name","m","p","n","R","min_core","degeneracy","upper_bound","edge","sub_edge"] + [f"time_{i+1}" for i in range(8)]
        writer.writerow(header)
        
        for result in results:
            row = [result["grh_name"],result["m"], result["p"], result["n"], result["R"], result["min_core"], result["degeneracy"], result["upper_bound"], result["edge"], result["sub_edge"]] + result["times"]
            writer.writerow(row)

In [29]:
synth_graphs_2_folder_path = "/home/ara2/PCE-Test/FMPCF-expt/real-synth-graph-analysis/log/fmpcf/synth-graphs_2"
results_of_synth_graphs_2 = process_grh(synth_graphs_2_folder_path)

i = 0
while i < len(results_of_synth_graphs_2 ):
    print(f"{i+1}:\n{results_of_synth_graphs_2[i]}")
    i += 1

1:
{'grh_name': 'scale_free_graph_m_42_1_n_309635_5th_time', 'm': '42', 'p': 'N/A', 'n': '309635', 'R': '3', 'min_core': '0', 'degeneracy': '30', 'upper_bound': '66', 'edge': '6658020', 'sub_edge': '6653085', 'times': [8986.92, 8996.599999999999, 7968.25, 8031.429999999999, 9000.78, 8019.04, 9010.32, 8192.67]}
2:
{'grh_name': 'scale_free_graph_m_43_1_n_393417_5th_time', 'm': '43', 'p': 'N/A', 'n': '393417', 'R': '3', 'min_core': '0', 'degeneracy': '30', 'upper_bound': '66', 'edge': '8652736', 'sub_edge': '8646650', 'times': [3289.59, 3268.09, 3390.0699999999997, 3385.84, 3462.4300000000003, 3467.1200000000003, 3419.9300000000003, 3385.12]}
3:
{'grh_name': 'scale_free_graph_m_44_1_n_343785_5th_time', 'm': '44', 'p': 'N/A', 'n': '343785', 'R': '3', 'min_core': '0', 'degeneracy': '31', 'upper_bound': '68', 'edge': '7737198', 'sub_edge': '7731973', 'times': [32017.09, 31863.5, 30964.75, 30859.66, 31981.64, 30892.89, 31584.629999999997, 30324.68]}
4:
{'grh_name': 'scale_free_graph_m_45_1_n_

In [81]:
real_graphs_2_folder_path = "/home/ara2/PCE-Test/FMPCF-expt/real-synth-graph-analysis/log/fmpcf/real-graphs_2"
results_of_real_graphs_2 = process_grh(real_graphs_2_folder_path)

i = 0

while i < len(results_of_real_graphs_2):
    print(f"{i+1}:\n{results_of_real_graphs_2[i]}")
    i += 1

1:
{'grh_name': '176bit_LCC_remapped', 'm': 'N/A', 'p': 'N/A', 'n': 'N/A', 'R': '3', 'min_core': '0', 'degeneracy': '12', 'upper_bound': '26', 'edge': '82204', 'sub_edge': '82204', 'times': [7.04, 7.44, 6.89, 6.96, 7.62, 7.07, 7.3, 7.05]}
2:
{'grh_name': '3D_28984_Tetra_LCC_remapped', 'm': 'N/A', 'p': 'N/A', 'n': 'N/A', 'R': '3', 'min_core': '0', 'degeneracy': '14', 'upper_bound': '31', 'edge': '288684', 'sub_edge': '288684', 'times': [21.36, 21.28, 22.66, 22.44, 20.94, 22.94, 21.42, 23.14]}
3:
{'grh_name': '598a_LCC_remapped', 'm': 'N/A', 'p': 'N/A', 'n': 'N/A', 'R': '3', 'min_core': '0', 'degeneracy': '8', 'upper_bound': '17', 'edge': '741934', 'sub_edge': '741934', 'times': [1.53, 1.55, 1.56, 1.59, 1.55, 1.55, 1.53, 1.62]}
4:
{'grh_name': 'AIDS_LCC_remapped', 'm': 'N/A', 'p': 'N/A', 'n': 'N/A', 'R': '3', 'min_core': '0', 'degeneracy': '2', 'upper_bound': '3', 'edge': '103', 'sub_edge': '74', 'times': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
5:
{'grh_name': 'CA-CondMat_cleaned_LCC_r

In [1]:
output_csv_path = "/home/ara2/Desktop/Najifa_Arif_CSE491/feature_matrix.csv"

save_to_csv(results_of_synth_graphs_2, output_csv_path)
# save_to_csv(results_of_real_graphs_2,output_csv_path)

NameError: name 'save_to_csv' is not defined

In [13]:
# 20/10/25

real_graphs_2_folder_path = "/home/ara2/PCE-Test/FMPCF-expt/real-synth-graph-analysis/log/fmpcf/temp_log_to_generate_feature_csv"
results_of_real_graphs_2 = process_grh(real_graphs_2_folder_path)

i = 0

while i < len(results_of_real_graphs_2):
    print(f"{i+1}:\n{results_of_real_graphs_2[i]}")
    i += 1

1:
{'grh_name': 'BZR_LCC_remapped', 'm': 'N/A', 'p': 'N/A', 'n': 'N/A', 'R': '3', 'min_core': '0', 'degeneracy': '2', 'upper_bound': '3', 'edge': '58', 'sub_edge': '14', 'times': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
2:
{'grh_name': 'COIL-RAG_LCC_remapped', 'm': 'N/A', 'p': 'N/A', 'n': 'N/A', 'R': '3', 'min_core': '0', 'degeneracy': '3', 'upper_bound': '5', 'edge': '12', 'sub_edge': '11', 'times': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
3:
{'grh_name': 'COX2_LCC_remapped', 'm': 'N/A', 'p': 'N/A', 'n': 'N/A', 'R': '3', 'min_core': '0', 'degeneracy': '2', 'upper_bound': '3', 'edge': '59', 'sub_edge': '28', 'times': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
4:
{'grh_name': 'DBLP-v1_LCC_remapped', 'm': 'N/A', 'p': 'N/A', 'n': 'N/A', 'R': '3', 'min_core': '0', 'degeneracy': '8', 'upper_bound': '17', 'edge': '100', 'sub_edge': '94', 'times': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}
5:
{'grh_name': 'DD_LCC_remapped', 'm': 'N/A', 'p': 'N/A', 'n': 'N/A', 'R': '3', 'min_core': '2', 'degen

In [15]:
output_csv_path = "/home/ara2/Desktop/Najifa_Arif_CSE491/feature_matrix_20-10-25.csv"

save_to_csv(results_of_real_graphs_2, output_csv_path)

In [27]:
# 21/10/25

real_graphs_2_folder_path = "/home/ara2/PCE-Test/FMPCF-expt/real-synth-graph-analysis/log/fmpcf/real-graphs_2"
results_of_real_graphs_2 = process_grh(real_graphs_2_folder_path)

i = 0

while i < len(results_of_real_graphs_2):
    print(f"{i+1}:\n{results_of_real_graphs_2[i]}")
    i += 1

1:
{'grh_name': 'as19971108_remapped', 'm': 'N/A', 'p': 'N/A', 'n': 'N/A', 'R': '3', 'min_core': '2', 'degeneracy': '3', 'upper_bound': '5', 'edge': '10312', 'sub_edge': '10306', 'times': [0.03, 0.03, 0.03, 0.03, 0.02, 0.02, 0.03, 0.02]}
2:
{'grh_name': 'as19991006_remapped', 'm': 'N/A', 'p': 'N/A', 'n': 'N/A', 'R': '3', 'min_core': '2', 'degeneracy': '3', 'upper_bound': '5', 'edge': '22790', 'sub_edge': '22786', 'times': [0.15, 0.1, 0.13, 0.09, 0.12, 0.11, 0.13, 0.11]}
3:
{'grh_name': 'as19991007_remapped', 'm': 'N/A', 'p': 'N/A', 'n': 'N/A', 'R': '3', 'min_core': '2', 'degeneracy': '3', 'upper_bound': '5', 'edge': '22772', 'sub_edge': '22768', 'times': [0.15, 0.14, 0.15, 0.13, 0.13, 0.13, 0.12, 0.1]}
4:
{'grh_name': 'as19991008_remapped', 'm': 'N/A', 'p': 'N/A', 'n': 'N/A', 'R': '3', 'min_core': '2', 'degeneracy': '3', 'upper_bound': '5', 'edge': '22850', 'sub_edge': '22848', 'times': [0.14, 0.14, 0.14, 0.13, 0.13, 0.15, 0.12, 0.14]}
5:
{'grh_name': 'as19991009_remapped', 'm': 'N/A',

In [28]:
output_csv_path = "/home/ara2/Desktop/Najifa_Arif_CSE491/feature_matrix_21-10-25.csv"

save_to_csv(results_of_real_graphs_2, output_csv_path)