In [None]:
import os
import json
import time
import subprocess
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib_venn import venn2, venn3
from itertools import product
from pathlib import Path
from tqdm import tqdm

In [None]:
from FLResultAnalyst import FLResultAnalyst
from Utils.STEnvConfig import get_pathConfig
from Utils.DatasetConfig import get_D4Jprojects, get_D4Jversions, get_SrcPath4D4J, get_TestCases4D4J
from Utils.FileStatistic import get_files_with_suffix, count_lines
from Utils.PandasHelper import move_column_to_pos, move_rows_with_value_to_end
from Utils.ColorPalette import GenshinImpactColorPalette as GIColorPalette

In [None]:
pathConfig = get_pathConfig()
if pathConfig:
    D4J = Path(pathConfig["D4J"])
    MBFL_Metric = Path(pathConfig["MBFL_Metric"])
print(D4J.as_posix())
print(MBFL_Metric.as_posix())

### **Count the Code Line**

In [None]:
results = []

projects = get_D4Jprojects(DatasetVersion="v2.0")
for project in projects:
    start_time = time.time()  
    
    project_repo_path = D4J / "project_repository"
    total_lines = []
    total_testcases = []
    
    versions = get_D4Jversions(project)
    for version in versions:
        project_version_repo_path = project_repo_path / f"{project}/{version}b"
        project_version_src_path = project_version_repo_path / get_SrcPath4D4J(project, version)
        java_files_auto = get_files_with_suffix(project_version_src_path, method='auto', suffix='.java', recursive=True)
        
        version_lines = sum(count_lines(file, include_empty_lines=True) for file in java_files_auto)
        version_testcases = len(get_TestCases4D4J(project, version))
        
        total_lines.append(version_lines)
        total_testcases.append(version_testcases)
    
    avg_kloc = sum(total_lines) / len(total_lines) / 1000
    avg_testcases = sum(total_testcases) / len(total_testcases)
    
    results.append([project, len(versions), avg_testcases, avg_kloc])
    
    end_time = time.time()  
    print(f"[{project}] {avg_kloc:4.0f} | Execution time: {end_time - start_time} seconds")
    
    df = pd.DataFrame(results, columns=['Project', '
    
df.to_csv('./Results/DataSetDescription.csv', index=False)