In [19]:

import rouge

rouge = rouge.Rouge()
# Assume that the correct modification is black -> red
hyp_list = [
    'A black fox',  # identical
    'A  black\t fox',  # identical but some format differences
    'A red fox',  # correct update
    'A cute black fox jumps over the lazy dog',  # extend only
    'A cute red fox jumps over the lazy dog',  # correct update while extend more
    'A crimson vulpine',  # Correct but close to generation instead of updating
]
ref = ['A black fox'] * len(hyp_list)

scores = rouge.get_scores(refs=ref, hyps=hyp_list)
score = scores[0]['rouge-l']['r']  # focus on the recall point

print(f"Original: {ref[0]}")
print(f"Expected: {hyp_list[2]}")
for i, score in enumerate(scores):
    hyp = hyp_list[i]
    recall = score['rouge-l']['r']
    precision = score['rouge-l']['p']
    f = score['rouge-l']['f']
    print(f'p: {precision:.2f} r: {recall:.2f} f: {f:.2f} - {hyp}')

Original: A black fox
Expected: A red fox
p: 1.00 r: 1.00 f: 1.00 - A black fox
p: 1.00 r: 1.00 f: 1.00 - A  black	 fox
p: 0.67 r: 0.67 f: 0.67 - A red fox
p: 0.33 r: 1.00 f: 0.50 - A cute black fox jumps over the lazy dog
p: 0.22 r: 0.67 f: 0.33 - A cute red fox jumps over the lazy dog
p: 0.33 r: 0.33 f: 0.33 - A crimson vulpine


In [20]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def cal_cosine_similarity(text1, text2):
    # Create a TF-IDF Vectorizer
    vectorizer = TfidfVectorizer()

    # Transform texts to TF-IDF vectors
    tfidf_matrix = vectorizer.fit_transform([text1, text2])

    # Calculate Cosine Similarity
    similarity = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]

    return similarity


In [44]:
import difflib


def calculate_diff(original: str, updated: str):
    matcher = difflib.SequenceMatcher(lambda x: x == " ", original, updated)
    print(matcher.get_opcodes())
    modifications = []
    for tag, i1, i2, j1, j2 in matcher.get_opcodes():
        if tag != 'equal':
            # Add the modification pair (old, new) to the list
            modifications.append((original[i1:i2], updated[j1:j2]))

    return modifications


src_method ="public void process(T gray, GrayU8 binary) {\n\t\tif( verbose ) System.out.println(\"ENTER  DetectPolygonFromContour.process()\");\n\t\tInputSanityCheck.checkSameShape(binary, gray);\n\n\t\tif( imageWidth != gray.width || imageHeight != gray.height )\n\t\t\tconfigure(gray.width,gray.height);\n\n\t\t// reset storage for output. Call reset individually here to ensure that all references\n\t\t// are nulled from last time\n\t\tfor (int i = 0; i < foundInfo.size; i++) {\n\t\t\tfoundInfo.get(i).reset();\n\t\t}\n\t\tfoundInfo.reset();\n\n\t\tif( contourEdgeIntensity != null )\n\t\t\tcontourEdgeIntensity.setImage(gray);\n\n\t\tlong time0 = System.nanoTime();\n\n\t\t// find all the contours\n\t\tcontourFinder.process(binary);\n\n\t\tlong time1 = System.nanoTime();\n\n\t\t// Using the contours find the polygons\n\t\tfindCandidateShapes();\n\n\t\tlong time2 = System.nanoTime();\n\n\t\tdouble a = (time1-time0)*1e-6;\n\t\tdouble b = (time2-time1)*1e-6;\n\n\t\tmilliContour.update(a);\n\t\tmilliShapes.update(b);\n\n\t\tif( verbose ) System.out.println(\"EXIT  DetectPolygonFromContour.process()\");\n\t}"
dst_method =  "public void process(T gray, GrayU8 binary) {\n\t\tif( verbose ) System.out.println(\"ENTER  DetectPolygonFromContour.process()\");\n\n\t\tif( contourPadded != null && !contourPadded.isCreatePaddedCopy() ) {\n\t\t\tint padding = 2;\n\t\t\tif( gray.width+padding != binary.width || gray.height+padding != binary.height ) {\n\t\t\t\tthrow new IllegalArgumentException(\"Including padding, expected a binary image with shape \"\n\t\t\t\t+ (gray.width+padding)+\"x\"+(gray.height+padding));\n\t\t\t}\n\t\t} else {\n\t\t\tInputSanityCheck.checkSameShape(binary, gray);\n\t\t}\n\t\tif( imageWidth != gray.width || imageHeight != gray.height )\n\t\t\tconfigure(gray.width,gray.height);\n\n\t\t// reset storage for output. Call reset individually here to ensure that all references\n\t\t// are nulled from last time\n\t\tfor (int i = 0; i < foundInfo.size; i++) {\n\t\t\tfoundInfo.get(i).reset();\n\t\t}\n\t\tfoundInfo.reset();\n\n\t\tif( contourEdgeIntensity != null )\n\t\t\tcontourEdgeIntensity.setImage(gray);\n\n\t\tlong time0 = System.nanoTime();\n\n\t\t// find all the contours\n\t\tcontourFinder.process(binary);\n\n\t\tlong time1 = System.nanoTime();\n\n\t\t// Using the contours find the polygons\n\t\tfindCandidateShapes();\n\n\t\tlong time2 = System.nanoTime();\n\n\t\tdouble a = (time1-time0)*1e-6;\n\t\tdouble b = (time2-time1)*1e-6;\n\n\t\tmilliContour.update(a);\n\t\tmilliShapes.update(b);\n\n\t\tif( verbose ) System.out.println(\"EXIT  DetectPolygonFromContour.process()\");\n\t}"
src_javadoc = 'Examines the undistorted gray scake input image for squares.'

print(cal_cosine_similarity(src_method, dst_method))

candidates = [
    'Examines the undistorted gray scake input image for squares.',
    'Examines the undistorted gray scale input image for squares.',
    'Examines the scake input image for squares.',
    'Examines the undistorted gray for squares.',
    'Examines the input image for squares.',
    'Examines the undistorted gray scale input image.',
    'Examines the undistorted gray scale input.',
    'Examines the squares.',
    'Examines the undistorted squares.',
    'Analyzes the unaltered grayscale input image to identify square shapes.',
]

scores = rouge.get_scores(hyps=candidates,
                          refs=[src_javadoc] * len(candidates))
cand_tuples = []
for i, score in enumerate(scores):
    hyp = candidates[i]
    recall = score['rouge-l']['r']
    precision = score['rouge-l']['p']
    f = score['rouge-l']['f']
    cs = cal_cosine_similarity(dst_method, hyp)
    cand_tuples.append((hyp, recall, cs))
    # print(f'p: {precision:.2f} r: {recall:.2f} f: {f:.2f} cs: {cal_cosine_similarity(dst_method, hyp):.5f} - {hyp}')

n = len(candidates)
m = 5
cand_tuples = sorted(cand_tuples, key=lambda x: x[2], reverse=True)
cand_tuples = list(filter(lambda x: x[0] != src_javadoc, cand_tuples))
cand_tuples = cand_tuples[:min(m, len(cand_tuples))]
cand_tuples = sorted(cand_tuples, key=lambda x: x[1], reverse=True)

for t in cand_tuples:
    print(f'recall: {t[1]:.2f} cs: {t[2]:.2f} - {t[0]}')


0.8661142549896184
recall: 0.89 cs: 0.16 - Examines the undistorted gray scale input image for squares.
recall: 0.67 cs: 0.18 - Examines the undistorted gray for squares.
recall: 0.67 cs: 0.16 - Examines the undistorted gray scale input image.
recall: 0.67 cs: 0.06 - Examines the input image for squares.
recall: 0.56 cs: 0.15 - Examines the undistorted gray scale input.
