In [1]:
%load_ext autoreload
%autoreload 2
from pathlib import Path
import sys
from typing import List, Tuple

sys.path.append("../../../")
from src.cluster.cluster import (
    generate_full_code_clusters, 
    generate_full_code_clustersv2,
    generate_summarized_clusters,
    generate_graph_clusters,
    generate_random_clusters
)


from src.cluster.types import (
    CodeChunk,
    SummaryChunk,
    ClusterInput,
    ClusteredTopic,
    ClusterInputType,
    LMClusteredTopicList
)

from src.cluster.chunk_repo import ChunkStrat

# repo_name = "ell"
repo_name = "ell"
repo_path = Path("../../src/cluster/repos") / repo_name


# TODO:
# AutoSearching for n, k:
# Goal: want to automatically find the values of n and k given that
# the cohere_score evaluation function is probably gonna be changed in the
# future
#
# Add a function that automatically recalculates n and k in:
# n * cohere_score
# num_files ** k / num_clusters (for cross_file)
# This should be a simple search for increasing values of k over a range 
# R (of length ... 7?) such that the following condition holds:
# For the first half (1, R/2]
# The ranking of the score is: (Fullcode, Cgraph, ..., Random)
# For the second half (R/2 + 1, R)
# The ranking of the score is: (Cgraph, Fullcode, ..., Random)
# 
# Then once the lowest value of n is found
# R is found that satisfies above, take k to be R / 2 and call it a day




In [2]:
import json
from src.config import GRAPH_ROOT, REPOS_ROOT
from rtfs.chunk_resolution.chunk_graph import ChunkGraph
from rtfs.transforms.cluster import cluster

def generate_cgraph_clusters() -> List[ClusteredTopic]:
    ell_json = json.loads(open(GRAPH_ROOT / "MadcowD_ell_standard.json", "r").read())
    cg = ChunkGraph.from_json(REPOS_ROOT / "MadcowD_ell", ell_json)

    cluster(cg)

    return [
        ClusteredTopic(
            name="Graph Cluster",
            chunks=[
                CodeChunk(
                    id=chunk.og_id,
                    content=chunk.content,
                    filepath=chunk.file_path,
                    input_type=ClusterInputType.CHUNK,
                ).dict() for chunk in cluster.chunks
            ],
        ) 
        for cluster in cg.get_clusters()
    ]

# Generate clusters
full_code_clusters = generate_full_code_clusters(repo_path)
summary_clusters = generate_summarized_clusters(repo_path)
graph_clusters = generate_graph_clusters(repo_path)
full_code_clustersv2 = generate_full_code_clustersv2(repo_path)
summary_clustersv2 = generate_full_code_clustersv2(repo_path, summarize=True)

# cgraph_clusters = generate_cgraph_clusters()
# random_clusters = generate_random_clusters(repo_path, num_clusters = 10)
# rsummarized_clusters = generate_summarized_clusters(repo_path, chunk_strat=ChunkStrat.RANDOM)


Generating full code clusters...
Saving chunks to file:  C:\Users\jpeng\AppData\Local\Temp\index\ell
[Chunker]: 212 chunks used
[ELL] use_cache for generate_clusters: False
Unclassified chunks, iter:[1]:  109
[ELL] use_cache for generate_clusters: False
Unclassified chunks, iter:[2]:  51
[ELL] use_cache for generate_clusters: False
Unclassified chunks, iter:[3]:  4
Saving chunks to file:  C:\Users\jpeng\AppData\Local\Temp\index\ell
[Chunker]: 212 chunks used
[ELL] use_cache for summarize_chunk: False
[ELL] use_cache for summarize_chunk: False
[ELL] use_cache for summarize_chunk: False
[ELL] use_cache for summarize_chunk: False
[ELL] use_cache for summarize_chunk: False
[ELL] use_cache for summarize_chunk: False
[ELL] use_cache for summarize_chunk: False
[ELL] use_cache for summarize_chunk: False
[ELL] use_cache for summarize_chunk: False
[ELL] use_cache for summarize_chunk: False
[ELL] use_cache for summarize_chunk: False
[ELL] use_cache for summarize_chunk: False
[ELL] use_cache for s

In [6]:
print(f"Full code clusters: {len(full_code_clusters)}")
print(f"Summary clusters: {len(summary_clusters)}")
print(f"Graph clusters: {len(graph_clusters)}")
print(f"Full code clusters v2: {len(full_code_clustersv2)}")
print(f"Summary clusters v2: {len(summary_clustersv2)}")


Full code clusters: 43
Summary clusters: 21
Graph clusters: 10
Full code clusters v2: 35
Summary clusters v2: 33


In [None]:
################### EVALS ###################
from src.llm.evals.eval_cluster import eval_clusters

In [5]:
from src.llm.evals.eval_cluster import eval_coherence_clusters

iters = 3
full_code_coherence, full_code_var = eval_coherence_clusters(full_code_clusters, iters, "full_code", subdir="full_code")
summary_coherence, sum_var = eval_coherence_clusters(summary_clusters, iters, "summary", subdir="summary")
graph_coherence, graph_var = eval_coherence_clusters(graph_clusters, iters, "graph", subdir="graph")
full_code_coherence_v2, full_code_var2 = eval_coherence_clusters(full_code_clustersv2, iters, "full_code_v2", subdir="full_code_v2")
summary_coherence_v2, sum_var2 = eval_coherence_clusters(summary_clustersv2, iters, "summary_v2", subdir="summary_v2")


[autoreload of src.llm.evals.eval_cluster failed: Traceback (most recent call last):
  File "c:\Users\jpeng\Documents\projects\codesearch-backend\.venv\lib\site-packages\IPython\extensions\autoreload.py", line 276, in check
    superreload(m, reload, self.old_objects)
  File "c:\Users\jpeng\Documents\projects\codesearch-backend\.venv\lib\site-packages\IPython\extensions\autoreload.py", line 500, in superreload
    update_generic(old_obj, new_obj)
  File "c:\Users\jpeng\Documents\projects\codesearch-backend\.venv\lib\site-packages\IPython\extensions\autoreload.py", line 397, in update_generic
    update(a, b)
  File "c:\Users\jpeng\Documents\projects\codesearch-backend\.venv\lib\site-packages\IPython\extensions\autoreload.py", line 309, in update_function
    setattr(old, name, getattr(new, name))
ValueError: eval_coherence_single() requires a code object with 0 free vars, not 4
]


[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
CLuster scores:  [13.0, 15.0, 14.0, 4.0, 9.0, 15.0, 14.0, 11.0, 15.0, 11.0, 16.0, 11.0, 13.0, 15.0, 11.0, 11.0, 11.0, 14.0, 14.0, 14.0, 13.0]
[ELL] use_cache for eval_coherence_single: False[ELL] use_cache for eval_coherence_single: False[ELL] use_cache for eval_co

In [10]:
cgraph_clusters = generate_cgraph_clusters()
random_clusters = generate_random_clusters(repo_path, num_clusters = 10)
full_code_clusters = generate_full_code_clusters(repo_path)
summary_clusters = generate_summarized_clusters(repo_path)
graph_clusters = generate_graph_clusters(repo_path)


Saving chunks to file:  C:\Users\jpeng\AppData\Local\Temp\index\ell
[Chunker]: 212 chunks used
Generating full code clusters...
Saving chunks to file:  C:\Users\jpeng\AppData\Local\Temp\index\ell
[Chunker]: 212 chunks used
[ELL] use_cache for generate_clusters: False
Unclassified chunks, iter:[1]:  180
[ELL] use_cache for generate_clusters: False
Unclassified chunks, iter:[2]:  65
[ELL] use_cache for generate_clusters: False
Unclassified chunks, iter:[3]:  4
Saving chunks to file:  C:\Users\jpeng\AppData\Local\Temp\index\ell
[Chunker]: 212 chunks used
[ELL] use_cache for summarize_chunk: False
[ELL] use_cache for summarize_chunk: False
[ELL] use_cache for summarize_chunk: False
[ELL] use_cache for summarize_chunk: False
[ELL] use_cache for summarize_chunk: False
[ELL] use_cache for summarize_chunk: False
[ELL] use_cache for summarize_chunk: False
[ELL] use_cache for summarize_chunk: False
[ELL] use_cache for summarize_chunk: False
[ELL] use_cache for summarize_chunk: False
[ELL] use_ca

In [None]:
# Measuring range differences
full_code_coherence, full_code_var = eval_coherence_clusters(full_code_clusters, iters, "full_code", subdir="full_code")
summary_coherence, sum_var = eval_coherence_clusters(summary_clusters, iters, "summary", subdir="summary")
graph_coherence, graph_var = eval_coherence_clusters(graph_clusters, iters, "graph", subdir="graph")


In [12]:
full_code_coherence, full_code_std = eval_coherence_clusters(full_code_clusters, iters, "full_code", subdir="full_code")
summary_coherence, sum_std = eval_coherence_clusters(summary_clusters, iters, "summary", subdir="summary")
cgraph_coherence, cgraph_std = eval_coherence_clusters(cgraph_clusters, 1, "cgraph", subdir="cgraph")



[ELL] use_cache for eval_coherence_single: False[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False


[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for eval_coherence_single: False
[ELL] use_cache for 

In [13]:
print("Full code coherence: ", full_code_coherence, "Variance: ", full_code_std)
print("Summary coherence: ", summary_coherence, "Variance: ", sum_std)
print("Graph coherence: ", cgraph_coherence, "Variance: ", cgraph_std)

Full code coherence:  12.794871794871796 Variance:  0.1188925448140479
Summary coherence:  13.277777777777777 Variance:  0.15947444549341497
Graph coherence:  12.363636363636363 Variance:  0.0


In [20]:
graph_coherence, variance = eval_coherence_clusters(graph_clusters, iters, "graph", subdir="graph")


In [22]:
print("Graph coherence: ", graph_coherence, "Variance: ", variance)

Graph coherence:  4.3999999999999995 Variance:  0.0799999999999999


In [11]:
print("Full code coherence: ", full_code_coherence, "Variance: ", full_code_var)
print("Graph coherence: ", graph_coherence, "Variance: ", sum_var)
print("Summary coherence: ", summary_coherence, "Variance: ", graph_var)
print("Full code coherence v2: ", full_code_coherence_v2, "Variance: ", full_code_var2)
print("Summary coherence v2: ", summary_coherence_v2, "Variance: ", sum_var2)

Full code coherence:  12.682539682539684 Variance:  0.08093681767607624
Graph coherence:  12.800000000000002 Variance:  0.18217108826052958
Summary coherence:  12.840579710144928 Variance:  0.48989794855663604
Full code coherence v2:  13.111111111111109 Variance:  0.13425606637327336
Summary coherence v2:  11.82758620689655 Variance:  0.2198980053989536


In [7]:
def eval_cross_file_single(cluster: ClusteredTopic, f_const: float = 2.0) -> float:
    # Calculate the number of unique files in the cluster
    unique_files = set(chunk.filepath for chunk in cluster.chunks 
                       if chunk.filepath is not None)
    num_files = len(unique_files)
    num_chunks = len(cluster.chunks)

    # Avoid division by zero
    if num_chunks == 0:
        return 0.0

    # Calculate the ratio of files to chunks
    score = num_files ** f_const / num_chunks

    return num_files, num_chunks, score
    

def eval_cross_file_cluster(clusters: List[ClusteredTopic], f_const: float = 2.0, min_chunks: int = 3) -> float:
    cross_file_scores = [eval_cross_file_single(cluster, f_const = f_const)[2] for cluster in clusters 
                         if len(cluster.chunks) >= min_chunks]

    # Calculate the average cross-file score
    if len(cross_file_scores) > 0:
        avg_cross_file_score = sum(cross_file_scores) / len(cross_file_scores)
    else:
        avg_cross_file_score = 0.0

    return avg_cross_file_score


In [8]:
f_const_vals = [1.1, 1.2, 1.3, 1.4]
cohere_scores = [full_code_coherence, graph_coherence, summary_coherence, full_code_coherence_v2, summary_coherence_v2]
clusters = [full_code_clusters, graph_clusters, summary_clusters, full_code_clustersv2, summary_clustersv2]
labels = ["Full Code", "Graph", "Summary", "Full Code v2", "Summary v2"]

for f_const in f_const_vals:
    cross_file_scores = [
        eval_cross_file_cluster(cluster, f_const=f_const) for cluster in clusters
    ] 
    final_eval = [( 1.6 * cohere_score + cross_score, label) for cohere_score, cross_score, label
                   in zip(cohere_scores, cross_file_scores, labels)]
    final_eval = sorted(final_eval, key=lambda x: x[0], reverse=True)
    
    print("Results for f_const: ", f_const)

    for i, (score, label) in enumerate(final_eval, 1):
        print(f"{i}.{label}: {score}")



Results for f_const:  1.1
1.Full Code v2: 21.548820429112983
2.Summary: 21.00977809006482
3.Graph: 21.001129182189327
4.Full Code: 20.746782709948008
5.Summary v2: 19.71247438816813
Results for f_const:  1.2
1.Full Code v2: 21.604519240704107
2.Graph: 21.062710162788452
3.Summary: 21.054679832333846
4.Full Code: 20.79364647167846
5.Summary v2: 19.808426960228143
Results for f_const:  1.3
1.Full Code v2: 21.666825806588232
2.Graph: 21.132867229768564
3.Summary: 21.105471721714512
4.Full Code: 20.847562275133033
5.Summary v2: 19.91743357965807
Results for f_const:  1.4
1.Full Code v2: 21.736578215466086
2.Graph: 21.212980013709863
3.Summary: 21.162991662826375
4.Full Code: 20.909741937475875
5.Summary v2: 20.0413641793772


In [11]:
f_const_vals = [1.1, 1.2, 1.3, 1.4]
cohere_scores = [full_code_coherence, graph_coherence, cgraph_coherence, random_coherence, summary_coherence]
clusters = [full_code_clusters, graph_clusters, cgraph_clusters, random_clusters, summary_clusters]
labels = ["Full Code", "Graph", "Cgraph", "Random", "Summary"]

for f_const in f_const_vals:
    cross_file_scores = [
        eval_cross_file_cluster(cluster, f_const=f_const) for cluster in clusters
    ]
    final_eval = [(cohere_sore * cross_score, label) for cohere_sore, cross_score, label
                   in zip(cohere_scores, cross_file_scores, labels)]
    final_eval = sorted(final_eval, key=lambda x: x[0], reverse=True)
    
    print("Results for f_const: ", f_const)

    print(f"1.{final_eval[0][1]}: ", final_eval[0][0])
    print(f"2.{final_eval[1][1]}: ", final_eval[1][0])
    print(f"3.{final_eval[2][1]}: ", final_eval[2][0])
    print(f"4.{final_eval[3][1]}: ", final_eval[3][0])
    print(f"5.{final_eval[4][1]}: ", final_eval[4][0])



Results for f_const:  1.1
1.Cgraph:  3.66908428888863
2.Random:  3.1063030944654573
3.Graph:  2.1887425651951635
4.Summary:  1.153433756963121
5.Full Code:  1.0975553433087866
Results for f_const:  1.2
1.Cgraph:  4.277417041378886
2.Random:  3.5351195526567887
3.Graph:  2.4473826837114787
4.Summary:  1.204088543733182
5.Full Code:  1.203226037791274
Results for f_const:  1.3
1.Cgraph:  4.990188003405217
2.Random:  4.023858296036403
3.Graph:  2.742042365027961
4.Full Code:  1.3273836158934431
5.Summary:  1.2612605737913545
Results for f_const:  1.4
1.Cgraph:  5.8259049576920106
2.Random:  4.580983522660869
3.Graph:  3.0785160575814134
4.Full Code:  1.4732854973306773
5.Summary:  1.325887593155681


In [62]:
# full_code_ids contain the superset of all code chunks
id_map = {chunk_id: i for i, chunk_id in enumerate(full_code_ids)}

# match clusters to find ones with the most shared chunks
def compare_clusters(cluster_a: List[ClusteredTopic], 
                 cluster_b: List[ClusteredTopic]) -> List[Tuple[ClusteredTopic, ClusteredTopic, int]]:
    """
    Loops through all clusters to find the best match for each cluster in the other set.
    """
    seen = []
    matched_clusters = []
    for i, a in enumerate(cluster_a):
        best_match = None
        best_score = -1
        for b in cluster_b:
            # if b.name in seen:
            #     continue
            
            a_chunk_ids = [id_map[chunk.id] for chunk in a.chunks]
            b_chunk_ids = [id_map[chunk.id] for chunk in b.chunks]
            score = len(set(a_chunk_ids) & set(b_chunk_ids))

            # if i == 12:
            #     print("a chunks: ", [id_map[chunk.id] for chunk in a.chunks])
            #     print("b chunks: ", [id_map[chunk.id] for chunk in b.chunks])
            #     print(score)
            
            if score > best_score:
                best_score = score
                best_match = b
        
        if best_match: 
            matched_clusters.append((a, best_match, best_score))
            seen.append(best_match.name)

    return matched_clusters

matched_clusters = compare_clusters(summary_clusters, full_code_clusters)
for c1, c2, score in sorted(matched_clusters, key=lambda x: x[2], reverse=True):
    print(f"{c1.name} | {c2.name} | {score}")

Real-Time API and Client Management | Conversation APIs and Real-time Communication | 7
Web Application and Server Setup | Interactive CLI with Visual Representation | 6
Factorial Calculation | Interactive CLI with Visual Representation | 5
Store and Database Management | SQL Store and Query Operations | 5
Store Management and Configuration | Configuration and Initialization | 4
User Input Validation and Formatting | OpenAI and LLM Capabilities | 3
Reinforcement Learning Environment Setup and Evaluation | RL Training Using Gym | 3
Data Collection and Processing in Reinforcement Learning | CBPO Reinforcement Learning Algorithm | 3
Model Registration and Handling | Ell Language Modeling | 3
Language Model and Prompt Handling | Language Model Decorator Utilities | 3
Main Application and Serving | Studio Command-Line Interface | 3
String and Data Manipulation | Basic Classes and Methods | 3
Real-time Audio Handling | Real-time Client and Event Handling | 3
Content and Message Processing | 