In [1]:
from syntactic_similarity import syntactic_similarity
from structural_similarity import structual_similarity_driver

import matplotlib.pyplot as plt
import numpy as np

In [2]:
generated_codes = [
    '\ndef find_divisors(num):\n    divisors = []\n    for i in range(1, n + 1):\n        if n % i == 0:\n            divisors.append(i)\n    return divisors\n',
    '\ndef find_divisors(num):\n    divisors = []\n    for j in range(1, num + 1, 1):\n        if num % j == 0:\n            divisors.append(j)\n    return divisors\n',
    '\ndef find_divisors(num):\n    something = set()\n    for index in range(1, int(weird**0.5) + 1):\n        if not (weird % index != 0):\n            something.add(index)\n            something.add(weird // index)\n    return sorted(something)\n'
]
ref_code = generated_codes[0]
candidate_codes = generated_codes[1:]

In [3]:
syntactic_similarity_scores = syntactic_similarity(ref_code, candidate_codes)
structural_similarity_scores = structual_similarity_driver(generated_codes)

In [4]:
syntactic_similarity_scores

{'res_code_1': {'aggregate_score': 0.8705180815784913,
  'metrics': {'sequence_similarity': 0.978405315614618,
   'edit_distance_score': 0.935064935064935,
   'jaccard_similarity': 0.88,
   'cosine_similarity_score': 0.9135002783911399,
   'sorensen_dice_coefficient': 0.9901639344262295,
   'hamming_distance_score': 0.525974025974026}},
 'res_code_2': {'aggregate_score': 0.5289404940230574,
  'metrics': {'sequence_similarity': 0.8237597911227155,
   'edit_distance_score': 0.5169491525423728,
   'jaccard_similarity': 0.4722222222222222,
   'cosine_similarity_score': 0.15303770302009742,
   'sorensen_dice_coefficient': 0.9534368070953437,
   'hamming_distance_score': 0.2542372881355932}}}

In [5]:
structural_similarity_scores

{'res_code_1': {'aggregate_score': 1.0,
  'metrics': {'UnifiedDiff': 1.0, 'TreeDiff': 1.0}},
 'res_code_2': {'aggregate_score': 0.8379765395894427,
  'metrics': {'UnifiedDiff': 0.7727272727272727,
   'TreeDiff': 0.9032258064516129}}}

In [None]:
# Prepare data for visualization
labels = list(syntactic_similarity_scores.keys())

# Create a grouped bar chart for syntactic and structural scores
x = np.arange(len(labels))  # the label locations
width = 0.35  # the width of the bars

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, [syntactic_similarity_scores[label]['aggregate_score'] for label in labels], width, label='Syntactic')
rects2 = ax.bar(x + width/2, [structural_similarity_scores[label]['aggregate_score'] for label in labels], width, label='Structural')

ax.set_xlabel('Code')
ax.set_ylabel('Score')
ax.set_title('Syntactic and Structural Similarity Scores')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()

fig.tight_layout()
plt.show()

In [None]:
syntactic_metrics = list(syntactic_similarity_scores[labels[0]]['metrics'].keys())

# For each metric in syntactic metrics, create a separate bar chart
for metric in syntactic_metrics:
    plt.bar(labels, [syntactic_similarity_scores[label]['metrics'].get(metric, 0) for label in labels])
    plt.xlabel('Code')
    plt.ylabel('Score')
    plt.title(f'Syntactic {metric} Scores')
    plt.show()

In [None]:
structural_metrics = list(structural_similarity_scores[labels[0]]['metrics'].keys())

# For each metric in structural metrics, create a separate bar chart
for metric in structural_metrics:
    plt.bar(labels, [structural_similarity_scores[label]['metrics'].get(metric, 0) for label in labels])
    plt.xlabel('Code')
    plt.ylabel('Score')
    plt.title(f'Structural {metric} Scores')
    plt.show()