Imports

In [None]:
import subprocess
import csv
import os

Varaibles

In [None]:
# C files directory
source_directory = "/home/mishutin/Desktop/Research/Projects/Example/files"
# Compiled files directory
output_directory = "/home/mishutin/Desktop/Research/Projects/Example/compiled_files"

# Define the compilation options
compilers = ["gcc", "clang"]
#optimization_levels = ["O0", "O1", "O2", "O3", "Os", "Ofast"]
optimization_levels = ["O0", "O1"]
# obfuscation_techniques = ["obfuscation1", "obfuscation2"]

Compile Files

In [None]:
def createDir(directory_name):
    new_directory = f"{output_directory}/{directory_name}"
    try:
        # Create the directory
        os.makedirs(new_directory)
        print(f"Directory '{new_directory}' created successfully.")
    except FileExistsError:
        print(f"Directory '{new_directory}' already exists.")

def compileFile(file, compiler, optimization):
    createDir(file)
    output_path = f"{output_directory}/{file}/{os.path.splitext(file)[0]}_{compiler}_{optimization}.o"
    input_path = f"{source_directory}/{file}"
    return subprocess.run([compiler, "-g", input_path, f"-{optimization}", "-o", output_path])

def compileAllFiles():
    # Iterate over all C files
    for file in os.listdir(source_directory):
        # Compile file1 with different options
        for compiler in compilers:
            for optimization in optimization_levels:
                # Compile file1 with compiler1, optimization1
                compiled = compileFile(file, compiler, optimization)

                # Check if the compilation was successful
                if compiled.returncode == 0:
                    print("Compilation successful!")
                else:
                    print(f"Compilation failed with the following error:\n{compiled.stderr}")


Create Dataset

In [None]:
dataset = {}

def clear_csv_file():
    with open("dataset.csv", "w", newline="") as csvfile:
        csvfile.write("")  # Writing an empty string to clear the file

def write_to_CSV(file1, path1, compiler1, optimization1, file2, path2, compiler2, optimization2, sameSourceFile):
    with open("dataset.csv", "a", newline="") as csvfile:
        csv_writer = csv.writer(csvfile)
        # Write to the dataset
        csv_writer.writerow([
        file1, path1, compiler1, optimization1,
        file2, path2, compiler2, optimization2,
        sameSourceFile
        ])

def extract_info_from_filename(filename):
    # Assuming the filename is in the format "filename_compiler_optimization.o"
    parts = os.path.splitext(filename)[0].split('_')
    if len(parts) == 3:
        return parts[0], parts[1], parts[2]
    else:
        return None, None
    
def updateDataset(filename1, file_path1, compiler1, optimization1, filename2, file_path2, compiler2, optimization2):
    if (filename1, compiler1, optimization1, filename2, compiler2, optimization2) not in dataset:

        dataset[(filename1, compiler1, optimization1, filename2, compiler2, optimization2)] = True
        dataset[(filename2, compiler2, optimization2, filename1, compiler1, optimization1)] = True

        # Check if file1 and file2 came from the same source
        same_source = filename1 == filename2
        
        write_to_CSV(filename1, file_path1, compiler1, optimization1, filename2, file_path2, compiler2, optimization2, same_source)
    
def process_directory(directory_path):
    files = {}
    for filename1 in os.listdir(directory_path):
        file_path1 = os.path.join(output_directory, filename1)
        file_name1, compiler1, optimization1 = extract_info_from_filename(filename1)
        for filename2 in os.listdir(directory_path):
            # Skip the same file
            if filename1 == filename2:
                continue
            file_path2 = os.path.join(output_directory, filename2)
            file_name2, compiler2, optimization2 = extract_info_from_filename(filename2)

            if compiler1 is not None and optimization1 is not None and compiler2 is not None and optimization2 is not None:
                updateDataset(file_name1, file_path1, compiler1, optimization1, file_name2, file_path2, compiler2, optimization2)
            else:
                print("Bad arguments")
    
def process_two_directories(subdirectory1_path, subdirectory2_path):
    for filename1 in os.listdir(subdirectory1_path):
        compiler1, optimization1 = extract_info_from_filename(filename1)
        for filename2 in os.listdir(subdirectory2_path):
            compiler2, optimization2 = extract_info_from_filename(filename2)

            if compiler1 is not None and optimization1 is not None and compiler2 is not None and optimization2 is not None:
                updateDataset(filename1, subdirectory1_path, compiler1, optimization1, filename2, subdirectory2_path, compiler2, optimization2)
            else:
                print("Bad arguments")

def createDataset():
    clear_csv_file()
    csv_writer = write_to_CSV("File1", "path1", "Compiler1", "Optimization1","File2", "path2", "Compiler2", "Optimization2", "SameSourceFile")

    for subdirectory1 in os.listdir(output_directory):
        subdirectory1_path = os.path.join(output_directory, subdirectory1)
        for subdirectory2 in os.listdir(output_directory):
            subdirectory2_path = os.path.join(output_directory, subdirectory2)
            if os.path.isdir(subdirectory1_path) and os.path.isdir(subdirectory2_path):
                if subdirectory1 != subdirectory2:  
                    process_directory(subdirectory1_path, csv_writer)         
                    process_directory(subdirectory2_path, csv_writer)
                    process_two_directories(subdirectory1_path, subdirectory2_path)
                else:
                    process_directory(subdirectory1_path)

Main Run

In [None]:
compileAllFiles()
createDataset()