In [None]:
#Refactoring Example
!sourcery review --fix APPS_3.5_W

In [ ]:
#Reordering Example1
import os
import ast
import hashlib
import astor

def get_sorted_hash(value):
    sorted_value = ''.join(sorted(value))
    return hashlib.sha256(sorted_value.encode()).hexdigest()

def transform_comparison(node, lines, file_path):
    if isinstance(node, ast.Compare):
        for op in node.ops:
            if isinstance(op, (ast.Gt, ast.Lt, ast.GtE, ast.LtE)):
                left_var = node.left.id if isinstance(node.left, ast.Name) else None
                right_var = node.comparators[0].id if isinstance(node.comparators[0], ast.Name) else None

                if left_var and right_var:
                    hash_left = get_sorted_hash(left_var)
                    hash_right = get_sorted_hash(right_var)

                    original_code = astor.to_source(node)

                    print(f"Comparing hashes: {left_var}: {hash_left}, {right_var}: {hash_right}")

                    if hash_left > hash_right:
                        if isinstance(op, ast.Gt):
                            node.left, node.comparators[0] = node.comparators[0], node.left
                        elif isinstance(op, ast.Lt):
                            node.left, node.comparators[0] = node.comparators[0], node.left
                            node.ops[0] = ast.Gt()
                        elif isinstance(op, ast.GtE):
                            node.left, node.comparators[0] = node.comparators[0], node.left
                            node.ops[0] = ast.LtE()
                        elif isinstance(op, ast.LtE):
                            node.left, node.comparators[0] = node.comparators[0], node.left
                            node.ops[0] = ast.GtE()

                        transformed_code = astor.to_source(node).strip()

                        if original_code.strip() != transformed_code:
                            transformed_line = lines[node.lineno - 1].replace(original_code.strip(), transformed_code)
                            print(f"Transformed in file: {file_path}")
                            print(f"Original line: {lines[node.lineno - 1].strip()}")
                            print(f"Transformed line: {transformed_code.strip()}\n")
                            return

def process_file(file_path):
    with open(file_path, 'r') as file:
        content = file.read()
        lines = content.split('\n')

    tree = ast.parse(content)

    for node in ast.walk(tree):
        transform_comparison(node, lines, file_path)

    new_content = astor.to_source(tree)
    with open(file_path, 'w') as file:
        new_content = '\n'.join(line for line in new_content.split('\n') if line.strip())
        file.write(new_content)

def process_files(folder_path):
    for filename in os.listdir(folder_path):
        if filename.endswith('.py'):
            file_path = os.path.join(folder_path, filename)
            try:
                process_file(file_path)
            except Exception as e:
                print(f"Error processing file {file_path}: {e}")

if __name__ == "__main__":
    folder_paths = [
        "APPS_3.5_W",
        "APPS_4_W"
    ]

    for folder_path in folder_paths:
        process_files(folder_path)


In [ ]:
#Reordering Example2
import os
import ast
import hashlib
import astor

def get_sorted_hash(value):
    sorted_value = ''.join(sorted(value))
    return hashlib.sha256(sorted_value.encode()).hexdigest()

def transform_operations(node, lines, file_path):
    if isinstance(node, ast.BinOp):
        if isinstance(node.op, (ast.Add, ast.Mult)):
            left_var = node.left.id if isinstance(node.left, ast.Name) else None
            right_var = node.right.id if isinstance(node.right, ast.Name) else None

            if left_var and right_var:
                hash_left = get_sorted_hash(left_var)
                hash_right = get_sorted_hash(right_var)

                original_code = astor.to_source(node)

                print(f"Comparing hashes: {left_var}: {hash_left}, {right_var}: {hash_right}")

                if hash_left > hash_right:
                    node.left, node.right = node.right, node.left

                    transformed_code = astor.to_source(node).strip()

                    if original_code.strip() != transformed_code:
                        transformed_line = lines[node.lineno - 1].replace(original_code.strip(), transformed_code)
                        print(f"Transformed in file: {file_path}")
                        print(f"Original line: {lines[node.lineno - 1].strip()}")
                        print(f"Transformed line: {transformed_code.strip()}\n")
                        return

def process_file(file_path):
    try:
        with open(file_path, 'r') as file:
            content = file.read()
            lines = content.split('\n')

        tree = ast.parse(content)

        for node in ast.walk(tree):
            transform_operations(node, lines, file_path)

        new_content = astor.to_source(tree)
        with open(file_path, 'w') as file:
            new_content = '\n'.join(line for line in new_content.split('\n') if line.strip())
            file.write(new_content)

    except Exception as e:
        print(f"Error processing file {file_path}: {e}")

def process_files(folder_paths):
    for folder_path in folder_paths:
        for filename in os.listdir(folder_path):
            if filename.endswith('.py'):
                file_path = os.path.join(folder_path, filename)
                process_file(file_path)

if __name__ == "__main__":
    folder_paths = [
        "APPS_3.5_W",
        "APPS_4_W"
    ]

    process_files(folder_paths)


In [ ]:
#Formating Examples
!autopep8 --in-place --select=E101,E11,E121,E122,E125 --verbose MBPP_3.5_W/*.py
!autopep8 --in-place --select=E126 --verbose MBPP_3.5_W/*.py
!autopep8 --in-place --select=E225,E226,E227,E228,E241,E242,E251,E252 --verbose MBPP_3.5_W/*.py
!autopep8 --in-place --select=E27 --verbose MBPP_3.5_W/*.py
!autopep8 --in-place --select=E26,E265 --verbose MBPP_3.5_W/*.py
!autopep8 --in-place --select=E266 --verbose MBPP_3.5_W/*.py
!autopep8 --in-place --select=E123 --verbose MBPP_3.5_W/*.py

In [ ]:
#Distortion Attack Example
!yapf -r MBPP_3.5_W
!sourcery review --fix APPS_3.5_W

In [ ]:
#Forgery Attack Example
!yapf -r MBPP_3.5_H
!sourcery review --fix APPS_3.5_H

In [ ]:
#Create Baseline
import shutil
import os

rq1_folder = ''
new_suffix = '_Baseline'
sub_folders = ['APPS_3.5', 'APPS_4', 'MBPP_3.5', 'MBPP_4', 'MBPP_Starcoder']


for sub_folder in sub_folders:
    sub_folder_path = os.path.join (rq1_folder, sub_folder)
    sub_sub_folders = ['H', 'W']

    for sub_sub_folder in sub_sub_folders:
        sub_sub_folder_path = os.path.join (sub_folder_path, f'{sub_folder}_{sub_sub_folder}')
        new_folder_name = f'{sub_folder}_{sub_sub_folder}{new_suffix}'
        new_folder_path = os.path.join (rq1_folder, sub_folder, new_folder_name)
        shutil.copytree (sub_sub_folder_path, new_folder_path)


In [ ]:
#Calculate Results
# (After reimplementing watermarking)
import os
import filecmp

def calculate_accuracy(testing_folder, baseline_folder):
    test_files = [f for f in os.listdir(testing_folder) if f.endswith('.py')]
    baseline_files = [f for f in os.listdir(baseline_folder) if f.endswith('.py')]

    total_files = len(test_files)
    correct_count = 0

    for test_file in test_files:
        test_file_path = os.path.join(testing_folder, test_file)
        baseline_file = os.path.join(baseline_folder, test_file)

        if os.path.exists(baseline_file) and filecmp.cmp(test_file_path, baseline_file):
            correct_count += 1

    accuracy = (correct_count / total_files) * 100
    return accuracy

def calculate_accuracy_real(testing_folder, baseline_folder):
    test_files = [f for f in os.listdir(testing_folder) if f.endswith('.py')]
    baseline_files = [f for f in os.listdir(baseline_folder) if f.endswith('.py')]

    total_files = len(test_files)
    correct_count = 0

    for test_file in test_files:
        test_file_path = os.path.join(testing_folder, test_file)
        baseline_file = os.path.join(baseline_folder, test_file)

        if os.path.exists(baseline_file) and not filecmp.cmp(test_file_path, baseline_file):
            correct_count += 1

    accuracy = (correct_count / total_files) * 100
    return accuracy

# Define paths for testing and baseline folders for watermarked and real cases
watermarked_testing_folder = 'MBPP_3.5/MBPP_3.5_W'
watermarked_baseline_folder = 'MBPP_3.5/MBPP_3.5_W_Baseline'

real_testing_folder = 'MBPP_3.5/MBPP_3.5_H'
real_baseline_folder = 'MBPP_3.5/MBPP_3.5_H_Baseline'

# Calculate accuracy for watermarked and real cases
watermarked_accuracy = calculate_accuracy(watermarked_testing_folder, watermarked_baseline_folder)
real_accuracy = calculate_accuracy_real(real_testing_folder, real_baseline_folder)

# Calculate True Positive Rate (TPR)
total_watermarked_samples = len(os.listdir(watermarked_testing_folder))
true_positive = (watermarked_accuracy / 100) * total_watermarked_samples
false_negative = total_watermarked_samples - true_positive
tpr = true_positive / (true_positive + false_negative)

# Calculate False Positive Rate (FPR)
total_real_samples = len(os.listdir(real_testing_folder))
false_positive = ((total_real_samples - (real_accuracy / 100) * total_real_samples))
true_negative = total_real_samples - false_positive
fpr = false_positive / (false_positive + true_negative)

# Calculate Overall Accuracy
total_testing_samples = len(os.listdir(watermarked_testing_folder)) + len(os.listdir(real_testing_folder))
overall_correct_samples = watermarked_accuracy / 100 * len(os.listdir(watermarked_testing_folder)) + real_accuracy / 100 * len(os.listdir(real_testing_folder))
overall_accuracy = (overall_correct_samples / total_testing_samples) * 100

print('Overall Accuracy: {:.2f}%'.format(overall_accuracy))
print('True Positive Rate (TPR): {:.2f}%'.format(tpr * 100))
print('False Positive Rate (FPR): {:.2f}%'.format(fpr * 100))


In [ ]:
# Applicable Transformation Example
import os
import re

with open('path', 'r', encoding='UTF-8') as file:
    lines = file.readlines()

seen_filenames = set()

for line in lines:
    match = re.match(r'(.+\\35\\)(.+\.py):\d+-\d+ (.+)', line) or re.match(r'(.+\\35\\)(.+\.py):\d+ (.+)', line)
    if match:
        directory, file_name, modification = match.groups()

        if file_name in seen_filenames:
            continue
        else:
            seen_filenames.add(file_name)

            filename, file_extension = os.path.splitext(file_name)

            parts = filename.split("\\")
            front_address = parts[0]
            new_directory = directory.replace('\\35\\', '\\')

            d = new_directory + front_address
            all_filenames = os.listdir(d)
            filename0 = parts[1]
            new_filename = [filename for filename in all_filenames if filename.startswith(filename0)]
            new_filename = new_filename[0].strip("'")
            new_filename = front_address + "/" + new_filename
            new_file_path = os.path.join(new_directory, new_filename[:-3] + '-35.py')
            try:
                os.rename(os.path.join(new_directory, new_filename), new_file_path)
                print(f"replace {os.path.join(new_directory, new_filename)} as {new_file_path}")
            except FileNotFoundError:
                print(f" {os.path.join(new_directory, new_filename)} not found")
                continue


In [ ]:
# Applicable Transformations Count
import os

if __name__ == "__main__":
    folder_paths_G = [
        "G\APPS_3.5_G",
        "G\APPS_4_G",
        "G\MBPP_3.5_G",
        "G\MBPP_4_G",
        "G\MBPP_Starcoder_G"

    ]
    folder_paths_H = [
        "H\APPS_3.5_H",
        "H\APPS_4_H",
        "H\MBPP_3.5_H",
        "H\MBPP_4_H",
        "H\MBPP_Starcoder_H"

    ]
    num_G = [[0,0,0,0,0,0,0],[0,0,0,0,0,0,0],[0,0,0,0,0,0,0],[0,0,0,0,0,0,0],[0,0,0,0,0,0,0]]
    num_H = [[0,0,0,0,0,0,0],[0,0,0,0,0,0,0],[0,0,0,0,0,0,0],[0,0,0,0,0,0,0],[0,0,0,0,0,0,0]]

    num_of_set = 0
    for folder_path in folder_paths_G:
        python_files = [f for f in os.listdir(folder_path) if f.endswith('.py')]
        for file in python_files:
            file_path = os.path.join(folder_path, file)
            index = file.count('-')
            if 0<index<=6:
                new_name = os.path.join(folder_path,folder_path[2:]+'_'+str(index),file)
                os.rename(file_path,new_name)
                num_G[num_of_set][index-1]+=1
            elif index>6:
                new_name = os.path.join(folder_path,folder_path[2:]+'_over6',file)
                os.rename(file_path,new_name)
                num_G[num_of_set][6]+=1
        num_of_set+=1
    num_of_set = 0
    for folder_path in folder_paths_H:
     
        python_files = [f for f in os.listdir(folder_path) if f.endswith('.py')]
        for file in python_files:
            file_path = os.path.join(folder_path, file)
            index = file.count('-')
            if 0 < index <= 6:
                new_name = os.path.join(folder_path, folder_path[2:] + '_' + str(index), file)
                os.rename(file_path,new_name)
                num_H[num_of_set][index - 1] += 1
            elif index > 6:
                new_name = os.path.join(folder_path, folder_path[2:] + '_over6', file)
                os.rename(file_path,new_name)
                num_H[num_of_set][6] += 1
        num_of_set += 1
    print(num_G)
    print(num_H)
    num_final = [[0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0]]
    for x in range(5):
        for y in range(7):
            num_final[x][y] = min(num_G[x][y], num_H[x][y])
    print(num_final)
   
    C = [
        "_1",
        "_2",
        "_3",
        "_4",
        "_5",
        "_6",
        "_over6",

    ]
    num_of_set = 0

    count = 0
    for folder_path in folder_paths_G:
        num_of_class = 0
        for c in C:
            count = 0
            new_folder_path = os.path.join(folder_path, folder_path[2:]+c)
            python_files = [f for f in os.listdir(new_folder_path) if f.endswith('.py')]
            for file in python_files:
                count+=1
                if count> num_final[num_of_set][num_of_class]:
                    remove_path = os.path.join(new_folder_path,file)
                    os.remove(remove_path)
                    # a=1
            num_of_class+=1
        num_of_set+=1
    num_of_set = 0

    count = 0
    for folder_path in folder_paths_H:
        num_of_class = 0
      
        for c in C:
            count=0
            new_folder_path = os.path.join(folder_path, folder_path[2:]+c)
            python_files = [f for f in os.listdir(new_folder_path) if f.endswith('.py')]
            for file in python_files:
                count+=1
                if count> num_final[num_of_set][num_of_class]:
                    remove_path = os.path.join(new_folder_path,file)
                    os.remove(remove_path)
                
            num_of_class += 1
        num_of_set += 1


In [ ]:
#Bit Detection Example
import os
import ast
import hashlib
import astor

def get_sorted_hash(value):
    sorted_value = ''.join(sorted(value))
    return hashlib.sha256(sorted_value.encode()).hexdigest()

def transform_operations(node, lines, file_path):
    if isinstance(node, ast.BinOp):
        if isinstance(node.op, ast.Add):
            left_var = node.left.id if isinstance(node.left, ast.Name) else None
            right_var = node.right.id if isinstance(node.right, ast.Name) else None

            if left_var and right_var:
                hash_left = get_sorted_hash(left_var)
                hash_right = get_sorted_hash(right_var)

                original_code = astor.to_source(node)

                print(f"Comparing hashes: {left_var}: {hash_left}, {right_var}: {hash_right}")

                if hash_left > hash_right:
                    node.left, node.right = node.right, node.left

                    transformed_code = astor.to_source(node).strip()

                    if original_code.strip() != transformed_code:
                        transformed_line = lines[node.lineno - 1].replace(original_code.strip(), transformed_code)
                        lines[node.lineno - 1] = transformed_line
                        print(f"Transformed in file: {file_path}")
                        print(f"Original line: {lines[node.lineno - 1].strip()}")
                        print(f"Transformed line: {transformed_code.strip()}\n")
                        return True
    return False

def process_file(file_path):
    with open(file_path, 'r', encoding='UTF-8') as file:
        content = file.read()
        lines = content.split('\n')

    tree = ast.parse(content)
    modified = False

    for node in ast.walk(tree):
        if transform_operations(node, lines, file_path):
            modified = True

    if modified:
        new_content = astor.to_source(tree)
        with open(file_path, 'w', encoding='UTF-8') as file:
            new_content = '\n'.join(line for line in new_content.split('\n') if line.strip())
            file.write(new_content)
            os.remove(file_path)
            print(f"Original file {file_path} deleted.")
        print(f"File transformed and saved as {file_path}")


def process_files(file_path):
                try:
                    process_file(file_path)
                except Exception as e:
                    print(f"Error processing file {file_path}: {e}")

if __name__ == "__main__":
    file_path = "path"
    process_files(file_path)

import os
import autopep8

def process_file(file_path):
    with open(file_path, 'r') as file:
        content = file.read()

    formatted_code = autopep8.fix_code(content, options={'ignore': ['E101,E11,E121,E122,E125']})
    file_changed = False

    if content != formatted_code:
        print('changed')
    else:
        print('non-changed')

    with open(file_path, 'w') as file:
        file.write(formatted_code)

    if file_changed:
        os.remove(file_path)
        print(f"Original file {file_path} deleted.")

    print(f"File transformed and saved as {file_path}")

def process_files(file_path):

                try:
                    process_file(file_path)
                except Exception as e:
                    print(f"Error processing file {file_path}: {e}")

if __name__ == "__main__":
    file_path = "path"

    process_files(file_path)
