In [1]:
# directory management and files
import os
import shutil
# sced algorithm dinamyc programming
from algorithms.sced.sced_algorithm import Sced_algorithm
# rkrgst algorithm greedy
from algorithms.rkrgst.Codesight import Codesight
# pycode algorithm 
from algorithms.pycode.pycode import Pycode
# Union Find Disjoint Sets
from algorithms.ufds.ufds_algorithm import UFDS

### Definir el nombre del log y porcentaje

In [2]:
name_log = "logs-2290.txt"
extension = ".py"
percentage = 90
# algorithm options: "sced", "rkrgst", "pycode"
algorithm = "pycode"

### Crear directorios y soluciones

In [3]:
# current path
current_path = os.path.abspath(os.getcwd())

# clean log
try:
    log_path = os.path.join(current_path, "logs", name_log)
    log_file = open(log_path, "r")
    txt_log = log_file.read().strip() 
    log_file.close()
    log_file = open(log_path, "w")
    log_file.writelines(txt_log)
    log_file.close()
except Exception as e:
    print(e)

# clean dirs solutions, results and vimdiff
solutions_path = os.path.join(current_path, "solutions")
for root, dirs, files in os.walk(solutions_path):
    for f in files:
        os.unlink(os.path.join(root, f))
    for d in dirs:
        shutil.rmtree(os.path.join(root, d))
results_path = os.path.join(current_path, "results")
for root, dirs, files in os.walk(results_path):
    for f in files:
        os.unlink(os.path.join(root, f))
    for d in dirs:
        shutil.rmtree(os.path.join(root, d))
vimdiff_path = os.path.join(current_path, "vimdiff")
for root, dirs, files in os.walk(vimdiff_path):
    for f in files:
        os.unlink(os.path.join(root, f))
    for d in dirs:
        shutil.rmtree(os.path.join(root, d))
        
# make dirs and solutions files
log_path = os.path.join(current_path, "logs", name_log)
separator = "------------------------------------------------------\n"
try:
    log_file = open(log_path, "r")
    set_direc = set()
    solution_name = ""
    solution_txt = ""
    cnt_line = 0
    for line in log_file:
        if separator in line:
            if "Accepted" in solution_name:
                direc = solution_name.split(":")[1].replace("Problem","")
                solution_name = solution_name.replace(":", "_")
                if not direc in set_direc:
                    direc_path = os.path.join(current_path, "solutions", direc)
                    os.mkdir(direc_path)
                    set_direc.add(direc)
                solution_path = os.path.join(current_path, "solutions", direc, solution_name + extension)
                solution_file = open(solution_path, "w")
                solution_file.write(solution_txt)
                solution_file.close()
            cnt_line = 0
            solution_txt = ""
        else:
            if cnt_line == 0:
                solution_name = line.strip()
            else:
                solution_txt += line
            cnt_line += 1
    log_file.close()
except Exception as e:
    print(e)

### Crear resultados y calcular porcentajes

In [4]:
solutions_path = os.path.join(current_path, "solutions")
problem_set = os.listdir(solutions_path)
for problem in problem_set:
    problem_path = os.path.join(current_path, "solutions", problem)
    solution_set = os.listdir(problem_path)
    results_path = os.path.join(current_path, "results", problem + ".txt")
    results_file = open(results_path, "w")
    for i in range(len(solution_set) - 1):
        for j in range(i+1, len(solution_set)):
            solution_a = os.path.join(current_path, "solutions", problem, solution_set[i])
            solution_b = os.path.join(current_path, "solutions", problem, solution_set[j])
            try:
                if algorithm == "sced":
                    program = Sced_algorithm(solution_a, solution_b, 100)
                    percentage_program = program.get_per_similarity()
                elif algorithm == "rkrgst":
                    program = Codesight(solution_a, solution_b, 10)
                    percentage_program = program.get_per_similarity()
                else:
                    program = Pycode(solution_a, solution_b)
                    percentage_program = program.get_per_similarity()
                if percentage_program >= percentage:
                    line = solution_a + "\n" + solution_b + "\n" + str(percentage_program) + "\n"
                    results_file.writelines('____________COPIA____________\n')
                    results_file.writelines(line)
            except:
                print('____________NO_EXTENSION____________\n')
                print(solution_a+ "\n" + solution_b + "\n\n")
    results_file.close()

### Agrupamiento

In [5]:
results_path = os.path.join(current_path, "results")
results_set = os.listdir(results_path)
for result in results_set:
    problem = result.replace(".txt","")
    problem_path = os.path.join(current_path, "solutions", problem)
    solution_set = os.listdir(problem_path)
    map_names = {}
    arr_names = []
    idx_names = 0
    for solution in solution_set:
        solution_path = os.path.join(current_path, "solutions", problem, solution)
        if map_names.get(solution_path, -1) == -1:
            arr_names.append(solution_path)
            map_names[solution_path] = idx_names
            idx_names += 1
    
    result_path = os.path.join(current_path, "results", result)
    result_file = open(result_path, "r")
    relations = []
    source = ""
    target = ""
    num_line = 0
    for line in result_file:
        line = line.strip()
        if num_line == 1:
            source = line
        elif num_line == 2:
            target = line
        elif num_line == 3:
            relations.append([source, target])
        num_line = (num_line + 1) % 4
    n = len(map_names)
    program = UFDS(n)
    for rel in relations:
        source = rel[0]
        target = rel[1]
        u = map_names[source]
        v = map_names[target]
        program.union(u, v)
    
    vimdiff_path = os.path.join(current_path, "vimdiff", problem + ".txt")
    vimdiff_file = open(vimdiff_path, "w")
    print("No copiaron")
    for idx, i in enumerate(program.parents):
        if program.size(i) == 1:
            print(arr_names[i].split(os.sep)[-1])
        elif idx != i:
            solution_a = arr_names[i]
            solution_b = arr_names[idx]
            line = "vimdiff -R " + solution_a + " " + solution_b + "\n"
            vimdiff_file.writelines(line)
    print()
    vimdiff_file.close()

No copiaron
Alemax_Problem1593_Accepted.py
jaimico313_Problem1593_Accepted.py
JBTicona_Problem1593_Accepted.py
Marco_Cusicanqui_Problem1593_Accepted.py

No copiaron
11542541_Problem2175_Accepted.py
Aizaljp_Problem2175_Accepted.py
Ale412_Problem2175_Accepted.py
andy147_Problem2175_Accepted.py
Anthony784_Problem2175_Accepted.py
CastellonS_Problem2175_Accepted.py
David973_Problem2175_Accepted.py
jaimico313_Problem2175_Accepted.py
JLenin_Problem2175_Accepted.py
JOSECUETO_Problem2175_Accepted.py
kjolvea_Problem2175_Accepted.py
lkchavez_Problem2175_Accepted.py
maquiroz4_Problem2175_Accepted.py
Marco_Cusicanqui_Problem2175_Accepted.py
mecapriles_Problem2175_Accepted.py

No copiaron
13373050_Problem2177_Accepted.py
Ale412_Problem2177_Accepted.py
Alemax_Problem2177_Accepted.py
Brherrera3114_Problem2177_Accepted.py
Carolina3114_Problem2177_Accepted.py
DANILOMAGNE_Problem2177_Accepted.py
David973_Problem2177_Accepted.py
jaimico313_Problem2177_Accepted.py
JBTicona_Problem2177_Accepted.py
JLenin_Pr