In [None]:
!pip install -U langchain-ollama
!pip install faiss-cpu
!pip install -U langchain-community

In [None]:
%pip install -r requirements.txt

## Clone repositories

* Fetches a list of forks for the given base repository
* Checks if the repositories are accesible
* Clones the repositories to the local machine

### Initialize functions and variables

In [None]:
import os
import requests
import subprocess
from dotenv import load_dotenv

GITHUB_API_URL="https://api.github.com"
# Output directory for cloned repositories
OUTPUT_DIR = "repos"

load_dotenv()
BASE_REPO = os.getenv("BASE_REPO")
API_TOKEN = os.getenv("GITHUB_API_TOKEN")

if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)


def clone_repo(repo: str, output_dir: str=OUTPUT_DIR) -> None:
    """
    Clones a single repository from github to OUTPUT_DIR.
    Skips repositories that already exist in the OUTPUT_DIR.
    
    Args:
        repo (str): Name of the repository given as "user/repository_name"
    Returns:
        None
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    dir_name = repo.replace("/", "_")
    clone_path = f"{output_dir}/{dir_name}"
    # If folder does not exist, clone the repository
    if not os.path.exists(clone_path):
        try:
            print(f"Cloning: https://github.com/{repo}")
            subprocess.run(["git", "clone", f"https://github.com/{repo}", dir_name], cwd=output_dir, check=True)
        except subprocess.CalledProcessError as e:
            print(f"Failed to clone: {repo}\n{e}")
    else :
        print(f"Already cloned: {clone_path}")

def clone_repos(repos: list[str]) -> None:
    """
    Loops over a list of repository names and clones them to OUTPUT_DIR
    
    Args:
        repos (list[str]): List of repository names given as "user/repository_name"
    Returns:
        None
    """

    for repo in repos:
        clone_repo(repo)
    print("Finished!")

def repo_exists(repo: str) -> bool:
    """
    Checks if a repository exists on Github
    
    Args:
        repo (str): Name of the repository given as "user/repository_name"
    Returns:
        bool: True if the repository exists else False
    """
    
    headers = {"Authorization": "token " + API_TOKEN}
    url = f"{GITHUB_API_URL}/repos/{repo}"
    r = requests.get(url, headers=headers)
    return r.status_code == 200

def get_forks(repo: str, count: int, page: int = 1) -> list[str]:
    """
    Returns a list of forks as "user/repository_name" from given base repository.
    
    Args:
        repo (str): Name of the base repository given as "user/repository_name"
        count (int): Number of forks to retrieve. Gets clamped to: min 1, max 100
        page (int): Page from which to retrieve the forks. Defaults to the first page.
    Returns:
        list[str]: List of repository names given as "user/repository_name"
    """

    if count <= 0:
        count = 1
    per_page = min(count, 100)
    url = f"{GITHUB_API_URL}/repos/{repo}/forks"
    headers = {"Authorization": "token " + API_TOKEN}
    fork_urls = []

    # Fetch forks until count is filled or no more is found
    while len(fork_urls) < count:
        params = {"per_page": per_page, "page": page}
        r = requests.get(url, params=params, headers=headers)

        if r.status_code == 200:
            forks = r.json()

            # If no more forks are found, stop
            if not forks:
                break

            for fork in forks:
                # If count is reached, stop
                if len(fork_urls) >= count:
                    break
                # full_name = e.g. vanna-ai/vanna
                full_name = fork["full_name"]
                # Some forks may be unavailable so check them first
                if repo_exists(full_name):
                    print("Valid: ", full_name)
                    fork_urls.append(full_name)
                else:
                    print("Unavailable: ", full_name)
        else:
            print(f"Failed getting forks: {r.status_code}")
            break
        
        page += 1

    print("Forks found: ", str(len(fork_urls)))
    print(forks)
    return fork_urls

def get_repos(count: int) -> None:
    # Clone the base repository
    clone_repo(BASE_REPO)

    # Get base repo forks
    forks = get_forks(BASE_REPO, count)

    # Clone forks sources
    clone_repos(forks)


### Run

In [None]:
get_repos(10)

## LLM analysis

Performs comparisons using LLM by taking two repositories, reading their source code files, fetching relevant snippets and prompting the LLM

### Initialize functions and variables

In [None]:
from langchain_ollama.llms import OllamaLLM
from langchain_ollama import OllamaEmbeddings
from langchain.vectorstores import FAISS
from langchain.schema import Document
import time
import json
import os
from dotenv import load_dotenv

load_dotenv()
BASE_REPO = os.getenv("BASE_REPO")
API_TOKEN = os.getenv("GITHUB_API_TOKEN")

RESULT_DIR = "results"
OUTPUT_DIR = "repos"

if not os.path.exists(RESULT_DIR):
    os.makedirs(RESULT_DIR)

model_name = "qwen2.5"

# Load the LLM model
model = OllamaLLM(model=model_name, temperature=0.1)

# Load embedding model
embeddings = OllamaEmbeddings(model=model_name)

def load_code_files(dir: str) -> list[Document]:
    """
    Searches a given directory and its subdirectories for source code files.
    Reads found files and wraps them in Document objects.

    Includes files with file extensions shown in the file_extensions variable.
    Ignores subdirectories in ignore_dirs variable.
    
    Args:
        dir (str): Path of the directory to collect code files from
    Returns:
        list[Document]: List of document objects containing the source code files.
    """

    file_extensions = ['.abap', '.asc', '.ash', '.ampl', '.mod', '.g4', '.apib', '.apl', '.dyalog', '.asp', '.asax', '.ascx', '.ashx', '.asmx', '.aspx', '.axd', '.dats', '.hats', '.sats', '.as', '.adb', '.ada', '.ads', '.agda', '.als', '.apacheconf', '.vhost', '.cls', '.applescript', '.scpt', '.arc', '.ino', '.asciidoc', '.adoc', '.asc', '.aj', '.asm', '.a51', '.inc', '.nasm', '.aug', '.ahk', '.ahkl', '.au3', '.awk', '.auk', '.gawk', '.mawk', '.nawk', '.bat', '.cmd', '.befunge', '.bison', '.bb', '.bb', '.decls', '.bmx', '.bsv', '.boo', '.b', '.bf', '.brs', '.bro', '.c', '.cats', '.h', '.idc', '.w', '.cs', '.cake', '.cshtml', '.csx', '.cpp', '.c++', '.cc', '.cp', '.cxx', '.h', '.h++', '.hh', '.hpp', '.hxx', '.inc', '.inl', '.ipp', '.tcc', '.tpp', '.c-objdump', '.chs', '.clp', '.cmake', '.cmake.in', '.cob', '.cbl', '.ccp', '.cobol', '.cpy', '.css', '.capnp', '.mss', '.ceylon', '.chpl', '.ch', '.ck', '.cirru', '.clw', '.icl', '.dcl', '.click', '.clj', '.boot', '.cl2', '.cljc', '.cljs', '.cljs.hl', '.cljscm', '.cljx', '.hic', '.coffee', '._coffee', '.cake', '.cjsx', '.cson', '.iced', '.cfm', '.cfml', '.cfc', '.lisp', '.asd', '.cl', '.l', '.lsp', '.ny', '.podsl', '.sexp', '.cp', '.cps', '.cl', '.coq', '.v', '.cppobjdump', '.c++-objdump', '.c++objdump', '.cpp-objdump', '.cxx-objdump', '.creole', '.cr', '.feature', '.cu', '.cuh', '.cy', '.pyx', '.pxd', '.pxi', '.d', '.di', '.d-objdump', '.com', '.dm', '.zone', '.arpa', '.d', '.darcspatch', '.dpatch', '.dart', '.diff', '.patch', '.dockerfile', '.djs', '.dylan', '.dyl', '.intr', '.lid', '.E', '.ecl', '.eclxml', '.ecl', '.sch', '.brd', '.epj', '.e', '.ex', '.exs', '.elm', '.el', '.emacs', '.emacs.desktop', '.em', '.emberscript', '.erl', '.es', '.escript', '.hrl', '.xrl', '.yrl', '.fs', '.fsi', '.fsx', '.fx', '.flux', '.f90', '.f', '.f03', '.f08', '.f77', '.f95', '.for', '.fpp', '.factor', '.fy', '.fancypack', '.fan', '.fs', '.for', '.eam.fs', '.fth', '.4th', '.f', '.for', '.forth', '.fr', '.frt', '.fs', '.ftl', '.fr', '.g', '.gco', '.gcode', '.gms', '.g', '.gap', '.gd', '.gi', '.tst', '.s', '.ms', '.gd', '.glsl', '.fp', '.frag', '.frg', '.fs', '.fsh', '.fshader', '.geo', '.geom', '.glslv', '.gshader', '.shader', '.vert', '.vrx', '.vsh', '.vshader', '.gml', '.kid', '.ebuild', '.eclass', '.po', '.pot', '.glf', '.gp', '.gnu', '.gnuplot', '.plot', '.plt', '.go', '.golo', '.gs', '.gst', '.gsx', '.vark', '.grace', '.gradle', '.gf', '.gml', '.graphql', '.dot', '.gv', '.man', '.1', '.1in', '.1m', '.1x', '.2', '.3', '.3in', '.3m', '.3qt', '.3x', '.4', '.5', '.6', '.7', '.8', '.9', '.l', '.me', '.ms', '.n', '.rno', '.roff', '.groovy', '.grt', '.gtpl', '.gvy', '.gsp', '.hcl', '.tf', '.hlsl', '.fx', '.fxh', '.hlsli', '.html', '.htm', '.html.hl', '.inc', '.st', '.xht', '.xhtml', '.mustache', '.jinja', '.eex', '.erb', '.erb.deface', '.phtml', '.http', '.hh', '.php', '.haml', '.haml.deface', '.handlebars', '.hbs', '.hb', '.hs', '.hsc', '.hx', '.hxsl', '.hy', '.bf', '.pro', '.dlm', '.ipf',  '.prefs', '.pro', '.properties', '.irclog', '.weechatlog', '.idr', '.lidr', '.ni', '.i7x', '.iss', '.io', '.ik', '.thy', '.ijs', '.flex', '.jflex', '.lock', '.topojson', '.jq', '.jsx', '.jade', '.j', '.java', '.jsp', '.js', '._js', '.bones', '.es', '.es6', '.frag', '.gs', '.jake', '.jsb', '.jscad', '.jsfl', '.jsm', '.jss', '.njs', '.pac', '.sjs', '.ssjs', '.sublime-build', '.sublime-commands', '.sublime-completions', '.sublime-keymap', '.sublime-macro', '.sublime-menu', '.sublime-mousemap', '.sublime-project', '.sublime-settings', '.sublime-theme', '.sublime-workspace', '.sublime_metrics', '.sublime_session', '.xsjs', '.xsjslib', '.jl', '.ipynb', '.krl', '.sch', '.brd', '.kicad_pcb', '.kit', '.kt', '.ktm', '.kts', '.lfe', '.ll', '.lol', '.lsl', '.lslp', '.lvproj', '.lasso', '.las', '.lasso8', '.lasso9', '.ldml', '.latte', '.lean', '.hlean', '.less', '.l', '.lex', '.ly', '.ily', '.b', '.m', '.ld', '.lds', '.mod', '.liquid', '.lagda', '.litcoffee', '.lhs', '.ls', '._ls', '.xm', '.x', '.xi', '.lgt', '.logtalk', '.lookml', '.ls', '.lua', '.fcgi', '.nse', '.pd_lua', '.rbxs', '.wlua', '.mumps', '.m', '.m4', '.m4', '.ms', '.mcr', '.mtml', '.muf', '.m', '.mak', '.d', '.mk', '.mkfile', '.mako', '.mao', '.ron', '.mask', '.mathematica', '.cdf', '.m', '.ma', '.mt', '.nb', '.nbp', '.wl', '.wlt', '.matlab', '.m', '.maxpat', '.maxhelp', '.maxproj', '.mxt', '.pat', '.mediawiki', '.wiki', '.m', '.moo', '.metal', '.minid', '.druby', '.duby', '.mir', '.mirah', '.mo', '.mod', '.mms', '.mmk', '.monkey', '.moo', '.moon', '.myt', '.ncl', '.nl', '.nsi', '.nsh', '.n', '.axs', '.axi', '.axs.erb', '.axi.erb', '.nlogo', '.nl', '.lisp', '.lsp', '.nginxconf', '.vhost', '.nim', '.nimrod', '.ninja', '.nit', '.nix', '.nu', '.numpy', '.numpyw', '.numsc', '.ml', '.eliom', '.eliomi', '.ml4', '.mli', '.mll', '.mly', '.objdump', '.m', '.h', '.mm', '.j', '.sj', '.omgrofl', '.opa', '.opal', '.cl', '.opencl', '.p', '.cls', '.scad', '.org', '.ox', '.oxh', '.oxo', '.oxygene', '.oz', '.pwn', '.inc', '.php', '.aw', '.ctp', '.fcgi', '.inc', '.php3', '.php4', '.php5', '.phps', '.phpt', '.pls', '.pck', '.pkb', '.pks', '.plb', '.plsql', '.sql', '.sql', '.pov', '.inc', '.pan', '.psc', '.parrot', '.pasm', '.pir', '.pas', '.dfm', '.dpr', '.inc', '.lpr', '.pp', '.pl', '.al', '.cgi', '.fcgi', '.perl', '.ph', '.plx', '.pm', '.pod', '.psgi', '.t', '.6pl', '.6pm', '.nqp', '.p6', '.p6l', '.p6m', '.pl', '.pl6', '.pm', '.pm6', '.t', '.pkl', '.l', '.pig', '.pike', '.pmod', '.pod', '.pogo', '.pony', '.ps', '.eps', '.ps1', '.psd1', '.psm1', '.pde', '.pl', '.pro', '.prolog', '.yap', '.spin', '.proto', '.asc', '.pub', '.pp', '.pd', '.pb', '.pbi', '.purs', '.py', '.bzl', '.cgi', '.fcgi', '.gyp', '.lmi', '.pyde', '.pyp', '.pyt', '.pyw', '.rpy', '.tac', '.wsgi', '.xpy', '.pytb', '.qml', '.qbs', '.pro', '.pri', '.r', '.rd', '.rsx', '.raml', '.rdoc', '.rbbas', '.rbfrm', '.rbmnu', '.rbres', '.rbtbar', '.rbuistate', '.rhtml', '.rmd', '.rkt', '.rktd', '.rktl', '.scrbl', '.rl', '.raw', '.reb', '.r', '.r2', '.r3', '.rebol', '.red', '.reds', '.cw', '.rpy', '.rs', '.rsh', '.robot', '.rg', '.rb', '.builder', '.fcgi', '.gemspec', '.god', '.irbrc', '.jbuilder', '.mspec', '.pluginspec', '.podspec', '.rabl', '.rake', '.rbuild', '.rbw', '.rbx', '.ru', '.ruby', '.thor', '.watchr', '.rs', '.rs.in', '.sas', '.scss', '.smt2', '.smt', '.sparql', '.rq', '.sqf', '.hqf', '.sql', '.cql', '.ddl', '.inc', '.prc', '.tab', '.udf', '.viw', '.sql', '.db2', '.ston', '.svg', '.sage', '.sagews', '.sls', '.sass', '.scala', '.sbt', '.sc', '.scaml', '.scm', '.sld', '.sls', '.sps', '.ss', '.sci', '.sce', '.tst', '.self', '.sh', '.bash', '.bats', '.cgi', '.command', '.fcgi', '.ksh', '.sh.in', '.tmux', '.tool', '.zsh', '.sh-session', '.shen', '.sl', '.slim', '.smali', '.st', '.cs', '.tpl', '.sp', '.inc', '.sma', '.nut', '.stan', '.ML', '.fun', '.sig', '.sml', '.do', '.ado', '.doh', '.ihlp', '.mata', '.matah', '.sthlp', '.styl', '.sc', '.scd', '.swift', '.sv', '.svh', '.vh', '.txl', '.tcl', '.adp', '.tm', '.tcsh', '.csh', '.tex', '.aux', '.bbx', '.bib', '.cbx', '.cls', '.dtx', '.ins', '.lbx', '.ltx', '.mkii', '.mkiv', '.mkvi', '.sty', '.toc', '.tea', '.t', '.fr', '.nb', '.ncl', '.no', '.textile', '.thrift', '.t', '.tu', '.ttl', '.twig', '.ts', '.tsx', '.upc', '.anim', '.asset', '.mat', '.meta', '.prefab', '.unity', '.uno', '.uc', '.ur', '.urs', '.vcl', '.vhdl', '.vhd', '.vhf', '.vhi', '.vho', '.vhs', '.vht', '.vhw', '.vala', '.vapi', '.v', '.veo', '.vim', '.vb', '.bas', '.cls', '.frm', '.frx', '.vba', '.vbhtml', '.vbs', '.volt', '.vue', '.owl', '.webidl', '.x10', '.xc', '.ant', '.axml', '.ccxml', '.clixml', '.cproject', '.csl', '.csproj', '.ct', '.dita', '.ditamap', '.ditaval', '.dll.config', '.dotsettings', '.filters', '.fsproj', '.fxml', '.glade', '.gml', '.grxml', '.iml', '.ivy', '.jelly', '.jsproj', '.kml', '.launch', '.mdpolicy', '.mm', '.mod', '.mxml', '.nproj', '.nuspec', '.odd', '.osm', '.plist', '.pluginspec', '.props', '.ps1xml', '.psc1', '.pt', '.rdf', '.rss', '.scxml', '.srdf', '.storyboard', '.stTheme', '.sublime-snippet', '.targets', '.tmCommand', '.tml', '.tmLanguage', '.tmPreferences', '.tmSnippet', '.tmTheme', '.ts', '.tsx', '.ui', '.urdf', '.ux', '.vbproj', '.vcxproj', '.vssettings', '.vxml', '.wsdl', '.wsf', '.wxi', '.wxl', '.wxs', '.x3d', '.xacro', '.xaml', '.xib', '.xlf', '.xliff', '.xmi', '.xml.dist', '.xproj', '.xsd', '.xul', '.zcml', '.xsp-config', '.xsp.metadata', '.xpl', '.xproc', '.xquery', '.xq', '.xql', '.xqm', '.xqy', '.xs', '.xslt', '.xsl', '.xojo_code', '.xojo_menu', '.xojo_report', '.xojo_script', '.xojo_toolbar', '.xojo_window', '.xtend', '.reek', '.rviz', '.sublime-syntax', '.syntax', '.yang', '.y', '.yacc', '.yy', '.zep', '.zimpl', '.zmpl', '.zpl', '.desktop', '.desktop.in', '.ec', '.eh', '.edn', '.fish', '.mu', '.nc', '.ooc', '.rst', '.rest', '.rest.txt', '.rst.txt', '.wisp', '.prg', '.ch', '.prw']
    ignore_dirs = ["tests", "docs"]
    source_code_files = []

    # List files
    for root, _, files in os.walk(dir):
        # Skip irrelevant directories
        if any(ignored in root for ignored in ignore_dirs):
            continue

        for f in files:
            path = os.path.join(root, f)
            # Read only source code files
            for fe in file_extensions:
                if f.endswith(fe):
                    # print(path)
                    with open(path, 'r', encoding='utf-8') as codefile:
                        # Wrap source code into Document objects
                        source_code_files.append(Document(page_content=codefile.read()))

    return source_code_files

def initialize_retriever(embeddings: OllamaEmbeddings, source_dir: str):
    """
    Initializes a retriever for searching code documents using FAISS vector storage

    Args:
        embeddings: An embedding model used to convert text into vector representations.
        source_dir (str): The directory containing the source code files to be indexed.

    Returns:
        A FAISS-based retriever that enables semantic search over the source code documents.
    """

    # Get source code
    source_code = load_code_files(source_dir)

    # Create FAISS vectorstore
    vectorstore = FAISS.from_documents(source_code, embeddings)

    return vectorstore.as_retriever()

def compare_code(query: str, base_retriever, compare_retriever) -> str:
    """
    Generates a comparison using code snippets from a base repository and a forked repository. 

    Args:
        query: Query to be used for searching relevant code snippets
        base_retriever: Retriever for the base repository
        compare_retriever: Retriever for the forked repository
    Returns:
        str: Generated analysis as a string
    """

    base_results = base_retriever.get_relevant_documents(query)
    compare_results = compare_retriever.get_relevant_documents(query)

    base_snippets = "\n\n".join([doc.page_content for doc in base_results])
    compare_snippets = "\n\n".join([doc.page_content for doc in compare_results])

    prompt = f"""
    ### **Instructions:**
    You are given source code snippets from a base repository and a fork of that repository. 

    **Base Version:**
    {base_snippets}

    **Forked Version:**
    {compare_snippets}

    
    Assess the degree of similarity between the two repositories:
    - **Perform semantic comparisons between code segments**
    - **Focus on identifying refactoring patterns and significant alterations**
    - **Give a percentage of similarity between the repositories**

    ### **Additional Instructions:**
    - You must never hallucinate
    - You have to always answer in English
    - Make your response clear and structured
    """

    analysis = model.invoke(prompt)
    return analysis

def generate_comparisions(base_retriever, base_dir: str, clear_json: bool = False) -> None:
    """
    Generates comparisons between the given base directory and other repositories in the OUTPUT_DIR.
    Saves generated analysis into a json object:
    {
        "BASE": "user_repository_name",
        "FORK": "user_repository_name",
        "generation_time": time it took to generate the analysis in seconds,
        "analysis": analysis_result,
    }

    Generated json is saved into RESULTS_DIR/results.json

    Args:
        base_retriever: Retriever for the base repository
        base_dir (str): Path to the base directory
        clear_json (bool): If true, clears the existing results.json file. If false, appends generated content to the existing file. Default to False.
    Returns:
        None
    """

    query = "Analyze the structure and patterns of this code."
    results_path = f"{RESULT_DIR}/results.json"

    # Load existing results data if the file exists
    if clear_json:
        data = []
    else:
        if os.path.exists(results_path):
            with open(results_path, "r") as f:
                try:
                    data = json.load(f)
                except json.JSONDecodeError:
                    data = []
        else:
            data = []

    # List files
    for f in os.listdir(OUTPUT_DIR):
        skip = False
        # Skip comparing the base directory with itself
        if f == base_dir:
            continue

        # Skip if analysis has been previously generated
        for obj in data:
            if obj.get("FORK") == f:
                print("Already generated for: ", f)
                skip = True
        if skip: continue

        print(f"Initializing retriver for: {OUTPUT_DIR}/{f}...")
        start_time = time.perf_counter()
        COMPARE_RETRIEVER = initialize_retriever(embeddings, f"{OUTPUT_DIR}/{f}")

        print(f"Generating comparisons for: {OUTPUT_DIR}/{f}...")
        analysis_result = compare_code(query, base_retriever, COMPARE_RETRIEVER)
        end_time = time.perf_counter()
        elapsed = end_time - start_time

        # Generation_time contains setting up retrievers and generating analysis
        data.append({
            "BASE": base_dir,
            "FORK": f,
            "generation_time": elapsed,
            "analysis": analysis_result,
        })

        with open(results_path, 'w') as f:
            json.dump(data, f, indent=4)

def run_llm_analysis():
    BASE_DIR = BASE_REPO.replace("/", "_")
    BASE_RETRIEVER = initialize_retriever(embeddings, f"{OUTPUT_DIR}/{BASE_DIR}")

    generate_comparisions(BASE_RETRIEVER, BASE_DIR)

### Run

In [None]:
run_llm_analysis()

## Other useful functions

Miscellaneous functions that can be used for quickly doing some common tasks.

In [None]:
import json
import csv

def json_to_csv(json_path: str, output_path: str = "results/results_csv.csv") -> None:
    """
    Transforms the json data into a csv file

    Args:
        json_path: Path to the json results file
        output_path: (Optional) output path for the csv
    Returns:
        None
    """
    with open(json_path, "r") as f:
        data = json.load(f)

    headers = data[0].keys()
    with open(output_path, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=headers)
        writer.writeheader()
        writer.writerows(data)

json_to_csv("results/results.json")

In [None]:
import json

def calculate_total_generation_time(json_path: str) -> float:
    """
    Sums the generation times from the results file

    Args:
        json_path: Path to the results file
    Returns:
        float: Total generation time in seconds
    """
    
    with open(json_path, "r") as f:
        data = json.load(f)

    return sum([result.get("generation_time") for result in data])

print(calculate_total_generation_time("results/results.json"))

In [None]:
import json

def get_analysis_for_fork(json_path: str, fork_name: str):
    """
    Gets the generated analysis for the given fork name

    Args:
        json_path: Path to the results file
        fork_name: Name of the fork given as "user_repository"
    Returns:
        Generated analysis OR
        None if the name is not found in the results
    """
    
    with open(json_path, "r") as f:
        data = json.load(f)

    for obj in data:
        if obj.get("FORK") == fork_name:
            return obj.get("analysis")

    return None

example = get_analysis_for_fork("results/results.json", "ZebinLiu_vanna")
if example: print(example)

## Tests

In [None]:
import csv

# ---TESTS---
TEST_JSON = "test_data/test_results.json"

def test_repo_exists():
    valid_result = repo_exists("vanna-ai/vanna")
    invalid_result = repo_exists("aaaaaaaa")

    assert valid_result, "Checking valid repository failed"
    assert not invalid_result, "Checking invalid repository failed"

    print("Test passed: test_repo_exists")

def test_clone_repo():
    valid_repo = "Ferkku/Advanced-Software-Quality"
    invalid_repo = "asdf"
    valid_name = valid_repo.replace("/", "_")
    invalid_name = invalid_repo.replace("/", "_")
    
    clone_repo(valid_repo, "test_output")
    clone_repo(invalid_repo, "test_output")

    assert os.path.exists(f"test_output/{valid_name}"), "Cloning valid repository failed"
    assert not os.path.exists(f"test_output/{invalid_name}"), "Cloning invalid repository didn't cause an error"

    print("Test passed: test_clone_repo")

def test_get_forks():
    count = 10
    forks = get_forks("vanna-ai/vanna", count)

    assert len(forks) == count, f"Forks fetched was {len(forks)}, should've been {count}"

    print("Test passed: test_get_forks")


def test_json_to_csv():
    output_path = "test_output/test_results_csv.csv"

    expected_data = [
        ["BASE", "FORK", "generation_time", "analysis"],
        ["test_base", "test_fork1", "10.5", "test json output 1"],
        ["test_base", "test_fork2", "20.0", "test json output 2"],
        ["test_base", "test_fork3", "5.8", "test json output 3"],
    ]

    json_to_csv(TEST_JSON, output_path)

    assert os.path.exists(output_path), "Creating csv failed: path not found"

    with open(output_path, newline='') as f:
        reader = csv.reader(f)
        read_data = [row for row in reader]

    assert expected_data == read_data, "Created csv has different data than expected"

    # Clean up
    os.remove(output_path)

    print("Test passed: test_json_to_csv")

def test_calculate_total_generation_time():
    result = calculate_total_generation_time(TEST_JSON)

    assert result == 36.3

    print("Test passed: test_calculate_total_generation_time")

def test_get_analysis_for_fork():
    result = get_analysis_for_fork(TEST_JSON, "test_fork2")
    expected = "test json output 2"

    assert result == expected, f"Fetched result: {result}, differs from expected: {expected}"

    print("Test passed: test_get_analysis_for_fork")

def run_tests():
    test_repo_exists()
    test_clone_repo()
    test_get_forks()
    test_json_to_csv()
    test_calculate_total_generation_time()
    test_get_analysis_for_fork()

run_tests()