In [1]:
import subprocess
from pathlib import Path
import logging
from datetime import datetime

CLONED_REPOS_PATH = Path("E:\\Nagad_API\\KSL Codebase")
OUTPUT_BASE_PATH = Path("E:\\Nagad_API\\Summary_of_code_smell")
DESIGNITE_JAR_PATH = Path("E:\\DesigniteJava\\DesigniteJava.jar") 

LOG_FILE = Path.cwd() / f"process_repositories_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"

PARALLEL_PROCESSES = 1 

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[
        logging.FileHandler(LOG_FILE),
        logging.StreamHandler()
    ]
)


def verify_paths():
    """
    Verify that essential paths exist.
    """
    if not CLONED_REPOS_PATH.exists() or not CLONED_REPOS_PATH.is_dir():
        logging.error(f"Cloned repositories path does not exist or is not a directory: {CLONED_REPOS_PATH}")
        return False

    if not DESIGNITE_JAR_PATH.exists() or not DESIGNITE_JAR_PATH.is_file():
        logging.error(f"DesigniteJava.jar not found at path: {DESIGNITE_JAR_PATH}")
        return False

    # Create output base directory if it doesn't exist
    if not OUTPUT_BASE_PATH.exists():
        try:
            OUTPUT_BASE_PATH.mkdir(parents=True, exist_ok=True)
            logging.info(f"Created output base directory: {OUTPUT_BASE_PATH}")
        except Exception as e:
            logging.error(f"Failed to create output base directory: {e}")
            return False

    return True

def get_repositories():
    """
    Get all repository directories under the cloned repositories path.
    """
    repos = [repo for repo in CLONED_REPOS_PATH.iterdir() if repo.is_dir()]
    logging.info(f"Found {len(repos)} repositories in {CLONED_REPOS_PATH}")
    return repos

def create_output_directory(repo_name):
    """
    Create an output directory for the given repository.
    """
    output_dir = OUTPUT_BASE_PATH / repo_name
    if not output_dir.exists():
        try:
            output_dir.mkdir(parents=True, exist_ok=True)
            logging.info(f"Created output directory: {output_dir}")
        except Exception as e:
            logging.error(f"Failed to create output directory for {repo_name}: {e}")
            return None
    else:
        logging.info(f"Output directory already exists: {output_dir}")
    return output_dir

def run_designite(repo_path, output_path):
    """
    Run the DesigniteJava.jar command for the given repository.
    """
    command = [
        "java",
        "-jar",
        str(DESIGNITE_JAR_PATH),
        "-i",
        str(repo_path),
        "-o",
        str(output_path)
    ]
    repo_name = repo_path.name
    logging.info(f"Processing repository: {repo_name}")
    logging.info(f"Executing command: {' '.join(command)}")

    try:
        result = subprocess.run(
            command,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
            check=True
        )
        logging.info(f"Successfully processed: {repo_name}")
        # Optionally, log the output from DesigniteJava.jar
        # logging.debug(result.stdout)
    except subprocess.CalledProcessError as e:
        logging.error(f"DesigniteJava returned a non-zero exit code for {repo_name}: {e.returncode}")
        logging.error(f"Error output: {e.stderr}")
    except Exception as e:
        logging.error(f"Failed to process {repo_name}: {e}")

def process_repositories_sequentially(repos):
    """
    Process repositories one by one.
    """
    for repo in repos:
        repo_name = repo.name
        output_dir = create_output_directory(repo_name)
        if output_dir:
            run_designite(repo, output_dir)

def process_repositories_in_parallel(repos, max_workers=4):
    """
    Process repositories in parallel using multiprocessing.
    """
    from concurrent.futures import ThreadPoolExecutor, as_completed

    def process_single_repo(repo):
        repo_name = repo.name
        output_dir = create_output_directory(repo_name)
        if output_dir:
            run_designite(repo, output_dir)

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_repo = {executor.submit(process_single_repo, repo): repo for repo in repos}
        for future in as_completed(future_to_repo):
            repo = future_to_repo[future]
            try:
                future.result()
            except Exception as exc:
                logging.error(f"Repository {repo.name} generated an exception: {exc}")


def main():
    logging.info("=== Starting Processing of Java Repositories ===")

    if not verify_paths():
        logging.error("Verification of paths failed. Exiting.")
        return

    repositories = get_repositories()

    if not repositories:
        logging.info("No repositories found to process. Exiting.")
        return

    if PARALLEL_PROCESSES > 1:
        logging.info(f"Processing repositories in parallel with {PARALLEL_PROCESSES} workers.")
        process_repositories_in_parallel(repositories, max_workers=PARALLEL_PROCESSES)
    else:
        logging.info("Processing repositories sequentially.")
        process_repositories_sequentially(repositories)

    logging.info("=== All repositories have been processed. ===")


main()

2024-09-22 22:19:52,390 - INFO - === Starting Processing of Java Repositories ===
2024-09-22 22:19:52,394 - INFO - Created output base directory: E:\Nagad_API\Summary_of_code_smell
2024-09-22 22:19:52,401 - INFO - Found 5 repositories in E:\Nagad_API\KSL Codebase
2024-09-22 22:19:52,401 - INFO - Processing repositories sequentially.
2024-09-22 22:19:52,406 - INFO - Created output directory: E:\Nagad_API\Summary_of_code_smell\auto-lifting
2024-09-22 22:19:52,408 - INFO - Processing repository: auto-lifting
2024-09-22 22:19:52,409 - INFO - Executing command: java -jar E:\DesigniteJava\DesigniteJava.jar -i E:\Nagad_API\KSL Codebase\auto-lifting -o E:\Nagad_API\Summary_of_code_smell\auto-lifting
2024-09-22 22:20:05,450 - INFO - Successfully processed: auto-lifting
2024-09-22 22:20:05,450 - INFO - Created output directory: E:\Nagad_API\Summary_of_code_smell\bankintegration
2024-09-22 22:20:05,450 - INFO - Processing repository: bankintegration
2024-09-22 22:20:05,458 - INFO - Executing comm

In [2]:
import pandas as pd
from pathlib import Path
import logging
from datetime import datetime
import warnings

SUMMARY_ANALYSIS_PATH = Path("E:\\Nagad_API\\Summary_of_code_smell")
OUTPUT_BASE_PATH = Path("E:\\Nagad_API\\Merged_Summary_Analysis")

# Output Files
ARCHITECTURE_SMELLS_OUTPUT = OUTPUT_BASE_PATH / "Architecture_smells.csv"
DESIGN_SMELLS_OUTPUT = OUTPUT_BASE_PATH / "Design_smells.csv"
IMPLEMENTATION_SMELLS_OUTPUT = OUTPUT_BASE_PATH / "Implementation_smells.csv"
TESTABILITY_SMELLS_OUTPUT = OUTPUT_BASE_PATH / "Testabilty_smells.csv"
TEST_SMELLS_OUTPUT = OUTPUT_BASE_PATH / "Test_smells.csv"


LOG_FILE = Path.cwd() / f"merge_smells_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"

# Initialize a global variable to keep track of the current file being read
current_file_being_read = None

def warning_handler(message):
    if current_file_being_read:
        logging.warning(f"{current_file_being_read}: {message}")
    else:
        logging.warning(f"{message}")

warnings.showwarning = warning_handler

def verify_paths():
    if not SUMMARY_ANALYSIS_PATH.exists() or not SUMMARY_ANALYSIS_PATH.is_dir():
        logging.error(f"Summary analysis path does not exist or is not a directory: {SUMMARY_ANALYSIS_PATH}")
        return False

    if not OUTPUT_BASE_PATH.exists():
        try:
            OUTPUT_BASE_PATH.mkdir(parents=True, exist_ok=True)
            logging.info(f"Created output base directory: {OUTPUT_BASE_PATH}")
        except Exception as e:
            logging.error(f"Failed to create output base directory: {e}")
            return False

    return True

def get_csv_files():
    architecture_files = []
    design_files = []
    implementation_files = []
    testability_files = []
    test_files = []

    for repo_dir in SUMMARY_ANALYSIS_PATH.iterdir():
        if repo_dir.is_dir():
            arch_file = repo_dir / "ArchitectureSmells.csv"
            design_file = repo_dir / "DesignSmells.csv"
            implementation_file = repo_dir / "ImplementationSmells.csv"
            testability_file = repo_dir / "TestabilitySmells.csv"
            test_file = repo_dir / "TestSmells.csv"

            if arch_file.exists() and arch_file.is_file():
                architecture_files.append(arch_file)
                logging.info(f"Found ArchitectureSmells.csv: {arch_file}")
            else:
                logging.warning(f"ArchitectureSmells.csv not found in: {repo_dir}")

            if design_file.exists() and design_file.is_file():
                design_files.append(design_file)
                logging.info(f"Found DesignSmells.csv: {design_file}")
            else:
                logging.warning(f"DesignSmells.csv not found in: {repo_dir}")

            if implementation_file.exists() and implementation_file.is_file():
                implementation_files.append(implementation_file)
                logging.info(f"Found ImplementationSmells.csv: {implementation_file}")
            else:
                logging.warning(f"ImplementationSmells.csv not found in: {repo_dir}")

            if testability_file.exists() and testability_file.is_file():
                testability_files.append(testability_file)
                logging.info(f"Found TestabilitySmells.csv: {testability_file}")
            else:
                logging.warning(f"TestabilitySmells.csv not found in: {repo_dir}")

            if test_file.exists() and test_file.is_file():
                test_files.append(test_file)
                logging.info(f"Found TestSmells.csv: {test_file}")
            else:
                logging.warning(f"TestSmells.csv not found in: {repo_dir}")

    logging.info(f"Total ArchitectureSmells.csv files found: {len(architecture_files)}")
    logging.info(f"Total DesignSmells.csv files found: {len(design_files)}")
    logging.info(f"Total ImplementationSmells.csv files found: {len(implementation_files)}")
    logging.info(f"Total TestabilitySmells.csv files found: {len(testability_files)}")
    logging.info(f"Total TestSmells.csv files found: {len(test_files)}")

    return architecture_files, design_files, implementation_files, testability_files, test_files

def merge_csv_files(csv_files, smell_type="Architecture"):

    if not csv_files:
        logging.warning(f"No {smell_type}Smells.csv files to merge.")
        return pd.DataFrame()

    merged_df = pd.DataFrame()
    for file in csv_files:
        global current_file_being_read
        current_file_being_read = file 

        try:
            df = pd.read_csv(
                file,
                on_bad_lines='warn',  
                engine='python'   
            )

            # Add a column for repository name
            df['Repository'] = file.parent.name
            merged_df = pd.concat([merged_df, df], ignore_index=True)
            logging.info(f"Successfully merged file: {file}")

        except pd.errors.ParserError as pe:
            logging.error(f"ParserError while reading {file}: {pe}")
        except Exception as e:
            logging.error(f"Failed to read {file}: {e}")
        finally:
            current_file_being_read = None 

    logging.info(f"Merged {len(csv_files)} {smell_type}Smells.csv files into one DataFrame.")
    return merged_df

def save_merged_csv(df, output_path, smell_type="Architecture"):

    if df.empty:
        logging.warning(f"No data to save for {smell_type} smells.")
        return

    try:
        df.to_csv(output_path, index=False)
        logging.info(f"Saved merged {smell_type} smells to: {output_path}")
    except Exception as e:
        logging.error(f"Failed to save merged {smell_type} smells: {e}")

def main():
    logging.info("=== Starting Merge of All Smells ===")

    if not verify_paths():
        logging.error("Path verification failed. Exiting.")
        return

    architecture_files, design_files, implementation_files, testability_files, test_files = get_csv_files()

    # Merge Architecture Smells
    architecture_df = merge_csv_files(architecture_files, smell_type="Architecture")
    save_merged_csv(architecture_df, ARCHITECTURE_SMELLS_OUTPUT, smell_type="Architecture")

    # Merge Design Smells
    design_df = merge_csv_files(design_files, smell_type="Design")
    save_merged_csv(design_df, DESIGN_SMELLS_OUTPUT, smell_type="Design")

    # Merge Implementation Smells
    implementation_df = merge_csv_files(implementation_files, smell_type="Implementation")
    save_merged_csv(implementation_df, IMPLEMENTATION_SMELLS_OUTPUT, smell_type="Implementation")

    # Merge Testability Smells
    testability_df = merge_csv_files(testability_files, smell_type="Testability")
    save_merged_csv(testability_df, TESTABILITY_SMELLS_OUTPUT, smell_type="Testability")

    # Merge Test Smells
    test_df = merge_csv_files(test_files, smell_type="Test")
    save_merged_csv(test_df, TEST_SMELLS_OUTPUT, smell_type="Test")

    print(f"Architecture_smells.csv contains {len(architecture_df)} architecture smells.")
    print(f"Design_smells.csv contains {len(design_df)} design smells.")
    print(f"Implementation_smells.csv contains {len(implementation_df)} design smells.")
    print(f"Testability_smells.csv contains {len(testability_df)} design smells.")
    print(f"Test_smells.csv contains {len(test_df)} design smells.")

    logging.info("=== Completed Merging of Smells ===")


main()

2024-09-22 22:30:16,699 - INFO - === Starting Merge of All Smells ===
2024-09-22 22:30:16,705 - INFO - Found ArchitectureSmells.csv: E:\Nagad_API\Summary_of_code_smell\auto-lifting\ArchitectureSmells.csv
2024-09-22 22:30:16,709 - INFO - Found DesignSmells.csv: E:\Nagad_API\Summary_of_code_smell\auto-lifting\DesignSmells.csv
2024-09-22 22:30:16,709 - INFO - Found ImplementationSmells.csv: E:\Nagad_API\Summary_of_code_smell\auto-lifting\ImplementationSmells.csv
2024-09-22 22:30:16,713 - INFO - Found TestabilitySmells.csv: E:\Nagad_API\Summary_of_code_smell\auto-lifting\TestabilitySmells.csv
2024-09-22 22:30:16,716 - INFO - Found TestSmells.csv: E:\Nagad_API\Summary_of_code_smell\auto-lifting\TestSmells.csv
2024-09-22 22:30:16,719 - INFO - Found ArchitectureSmells.csv: E:\Nagad_API\Summary_of_code_smell\bankintegration\ArchitectureSmells.csv
2024-09-22 22:30:16,721 - INFO - Found DesignSmells.csv: E:\Nagad_API\Summary_of_code_smell\bankintegration\DesignSmells.csv
2024-09-22 22:30:16,723 

Architecture_smells.csv contains 3 architecture smells.
Design_smells.csv contains 58 design smells.
Implementation_smells.csv contains 573 design smells.
Testability_smells.csv contains 23 design smells.
Test_smells.csv contains 10 design smells.
