In [None]:
from IPython.display import clear_output

In [None]:
!sudo apt install python3.8
clear_output()

In [None]:
!pip install radon
clear_output()

In [None]:
TXT_FILENAME = 'em_repo_stars_part3.txt'
with open(TXT_FILENAME) as f:
    repos = [line.strip() for line in f.readlines()]

In [None]:
import subprocess
from tqdm.notebook import tqdm
import os
from fnmatch import fnmatch

In [None]:
def get_python_filenames(root_dir):
    filenames = []
    for path, subdirs, files in os.walk(root_dir):
        for name in files:
            if fnmatch(name, "*.py"):
                filenames.append(os.path.join(path, name))
    return filenames

In [None]:
pbar = tqdm([repos[-1]])
for repo in pbar:
    # get folder name
    folder_name = repo.split('/')[-1]
    # get repo name
    repo_name = "_".join(repo.split('/')[-2:])
    pbar.set_description_str(f"Processing {repo_name}")
    # clone repo
    pbar.set_postfix_str("Cloning Repository...")
    subprocess.run(["git", "clone", repo])
    # calculate metrics
    pbar.set_postfix_str(f"Running Radon...")
    full_filenames = get_python_filenames(folder_name)
    subprocess.run(["mkdir", "-p", f"Results/{repo_name}/Halstead/"])
    subprocess.run(["mkdir", "-p", f"Results/{repo_name}/MI/"])
    for i, filename in enumerate(tqdm(full_filenames)):
        with open(f"Results/{repo_name}/Halstead/results.jsonl", 'a') as f:
            subprocess.run(
                ["radon", "hal", "-f", filename, '-j'],
                stdout=f
            )
        with open(f"Results/{repo_name}/MI/results.jsonl", 'a') as f:
            subprocess.run(
                ["radon", "mi", filename, '-j'],
                stdout=f
            )
    # # save to gdrive
    pbar.set_postfix_str(f"Saving Results to GDrive...")
    subprocess.run(['zip', '-r', 'RadonResultsPart1.zip', 'Results'])
    subprocess.run(['cp', '-r', 'RadonResultsPart1.zip', 'drive/MyDrive'])
    subprocess.run(['rm', '-rf', 'RadonResultsPart1.zip'])
    # delete repository
    pbar.set_postfix_str(f"Deleting repository...")
    subprocess.run(['rm', '-rf', folder_name])

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1690 [00:00<?, ?it/s]

In [None]:
! mv ./Results ./RadonResultsPart1_new

In [None]:
! zip -r "./RadonResultsPart0_new" "./"

In [None]:
! unzip "./RadonResultsPart3_1.zip" -d "./RadonResultsPart3"
! unzip "./RadonResultsPart3_2.zip" -d "./RadonResultsPart3"
! unzip "./RadonResultsPart3_3.zip" -d "./RadonResultsPart3"
! unzip "./RadonResultsPart3_4.zip" -d "./RadonResultsPart3"

Archive:  ./RadonResultsPart3_1.zip
   creating: ./RadonResultsPart3/Results/
   creating: ./RadonResultsPart3/Results/pyecharts_pyecharts/
   creating: ./RadonResultsPart3/Results/pyecharts_pyecharts/Halstead/
  inflating: ./RadonResultsPart3/Results/pyecharts_pyecharts/Halstead/results.jsonl  
   creating: ./RadonResultsPart3/Results/pyecharts_pyecharts/MI/
  inflating: ./RadonResultsPart3/Results/pyecharts_pyecharts/MI/results.jsonl  
   creating: ./RadonResultsPart3/Results/aleju_imgaug/
   creating: ./RadonResultsPart3/Results/aleju_imgaug/Halstead/
  inflating: ./RadonResultsPart3/Results/aleju_imgaug/Halstead/results.jsonl  
   creating: ./RadonResultsPart3/Results/aleju_imgaug/MI/
  inflating: ./RadonResultsPart3/Results/aleju_imgaug/MI/results.jsonl  
   creating: ./RadonResultsPart3/Results/horovod_horovod/
   creating: ./RadonResultsPart3/Results/horovod_horovod/Halstead/
  inflating: ./RadonResultsPart3/Results/horovod_horovod/Halstead/results.jsonl  
   creating: ./RadonRe

In [None]:
! rm -rf /content/RadonResultsPart3/Results/pyodide_pyodide
! mv -v /content/RadonResultsPart3/ResultNew/* /content/RadonResultsPart3/Results/

renamed '/content/RadonResultsPart3/ResultNew/apache_tvm' -> '/content/RadonResultsPart3/Results/apache_tvm'
renamed '/content/RadonResultsPart3/ResultNew/borgbackup_borg' -> '/content/RadonResultsPart3/Results/borgbackup_borg'
renamed '/content/RadonResultsPart3/ResultNew/facebookresearch_ParlAI' -> '/content/RadonResultsPart3/Results/facebookresearch_ParlAI'
renamed '/content/RadonResultsPart3/ResultNew/ludwig-ai_ludwig' -> '/content/RadonResultsPart3/Results/ludwig-ai_ludwig'
renamed '/content/RadonResultsPart3/ResultNew/Nuitka_Nuitka' -> '/content/RadonResultsPart3/Results/Nuitka_Nuitka'
renamed '/content/RadonResultsPart3/ResultNew/PaddlePaddle_PaddleNLP' -> '/content/RadonResultsPart3/Results/PaddlePaddle_PaddleNLP'
renamed '/content/RadonResultsPart3/ResultNew/pyodide_pyodide' -> '/content/RadonResultsPart3/Results/pyodide_pyodide'
renamed '/content/RadonResultsPart3/ResultNew/smicallef_spiderfoot' -> '/content/RadonResultsPart3/Results/smicallef_spiderfoot'
renamed '/content/Ra

In [None]:
import json
import os

def convert_jsonl_to_json(input_jsonl_file, output_json_file):
    # Ensure the output folder exists
    output_json_folder = "/".join(output_json_file.split('/')[:4])
    print(output_json_folder)
    os.makedirs(output_json_folder, exist_ok=True)

    # Determine the output JSON filename
    output_json_file = os.path.join(output_json_file + '.json')
    # Read the JSONL file and aggregate the data
    data = []
    with open(input_jsonl_file, 'r') as jsonl_file:
        for line_number, line in enumerate(jsonl_file, start=1):
            line = line.strip()
            if not line:  # Skip empty lines
                continue
            try:
                data.append(json.loads(line))
            except json.JSONDecodeError as e:
                print(f"Error decoding JSON on line {line_number}: {e}")
                continue

    # Write to the JSON file
    print(output_json_file)
    with open(output_json_file, 'w+') as json_file:
      json.dump(data, json_file, indent=4)


    print(f"Converted {input_jsonl_file} to {output_json_file}")

In [None]:
os.mkdir("/content/Radon_Part3_json/", mode=0o777)

In [None]:
from pathlib import Path, PurePath
import glob
import shutil

dirs = os.listdir("/content/RadonResultsPart3/Results/")
# for dir in dirs:
  # os.mkdir("/content/Radon_Part1_json/"+dir, mode=0o777)

for file in glob.glob(r'/content/RadonResultsPart3/Results/*/*/results.jsonl'):
    print(file)
    if os.path.isfile(file):
        new_name = "/content/Radon_Part3_json/" + file.removeprefix("/content/RadonResultsPart3/Results/").removesuffix("/results.jsonl")
        print(new_name)
        convert_jsonl_to_json(file, new_name)
        shutil.copy(file, "/content/Radon_Part3_json/")
    else:
        print("fail")

/content/RadonResultsPart3/Results/gunthercox_ChatterBot/Halstead/results.jsonl
/content/Radon_Part3_json/gunthercox_ChatterBot/Halstead
/content/Radon_Part3_json/gunthercox_ChatterBot
/content/Radon_Part3_json/gunthercox_ChatterBot/Halstead.json
Converted /content/RadonResultsPart3/Results/gunthercox_ChatterBot/Halstead/results.jsonl to /content/Radon_Part3_json/gunthercox_ChatterBot/Halstead.json
/content/RadonResultsPart3/Results/gunthercox_ChatterBot/MI/results.jsonl
/content/Radon_Part3_json/gunthercox_ChatterBot/MI
/content/Radon_Part3_json/gunthercox_ChatterBot
/content/Radon_Part3_json/gunthercox_ChatterBot/MI.json
Converted /content/RadonResultsPart3/Results/gunthercox_ChatterBot/MI/results.jsonl to /content/Radon_Part3_json/gunthercox_ChatterBot/MI.json
/content/RadonResultsPart3/Results/Mikubill_sd-webui-controlnet/Halstead/results.jsonl
/content/Radon_Part3_json/Mikubill_sd-webui-controlnet/Halstead
/content/Radon_Part3_json/Mikubill_sd-webui-controlnet
/content/Radon_Part3

In [None]:
# ! find . -name '*_MI.json' -delete

In [None]:
import pandas as pd
import json
import csv
import os
import seaborn as sns
from tqdm import tqdm

In [None]:
def get_metric_column_name(metric='all'):
  metrics_columns = {
    'name': 'name',
    'path': 'path',
    'HCPL': 'calculated_length',
    'HDIF': 'difficulty',
    'HEFF': 'effort',
    'HNDB': 'bugs',
    'HPL':  'length',
    'HPV':  'vocabulary',
    'HTRP': 'time',
    'HVOL': 'volume',
  }
  if metric=='all':
    return list(metrics_columns.keys())
  if metrics_columns.get(metric):
    return metrics_columns[metric]
  else:
    return None

In [None]:
! rm -rf /content/Radon_Part2_Methods

In [None]:
os.mkdir("/content/Radon_Part3_Methods/", mode=0o777)

FileExistsError: [Errno 17] File exists: '/content/Radon_Part3_Methods/'

In [None]:
def json_to_csv(repo_metrics_path):
    with open(f'{repo_metrics_path}/Halstead.json') as f:
        data_halstead = json.load(f)

    repo_name = repo_metrics_path.removeprefix("/content/Radon_Part3_json/")
    metrics_columns = get_metric_column_name()

    data_file = open(f'/content/Radon_Part3_Methods/{repo_name}-Methods.csv', 'w')
    csv_writer = csv.writer(data_file)
    csv_writer.writerow(metrics_columns)

    for path in data_halstead:
      if bool(path.values()):
        methods = list(path.values())[0].get("functions")
        if bool(methods):
          for method_metrics in methods.items():
            row = []
            for column in metrics_columns:
              if column == "name":
                row.append(method_metrics[0])
              elif column == "path":
                row.append(list(path.keys())[0]+"")
              else:
                row.append(method_metrics[1][get_metric_column_name(column)])
            csv_writer.writerow(row)

    data_file.close()

In [None]:
repo_metrics_paths = glob.glob("/content/Radon_Part3_json/*")
for path in tqdm(repo_metrics_paths):
  print(path)
  json_to_csv(path)

  7%|▋         | 2/28 [00:00<00:02, 11.45it/s]

/content/Radon_Part3_json/gunthercox_ChatterBot
/content/Radon_Part3_json/Mikubill_sd-webui-controlnet
/content/Radon_Part3_json/mlc-ai_mlc-llm
/content/Radon_Part3_json/pyecharts_pyecharts
/content/Radon_Part3_json/unifyai_ivy


 25%|██▌       | 7/28 [00:00<00:02,  8.78it/s]

/content/Radon_Part3_json/Rapptz_discord.py
/content/Radon_Part3_json/borgbackup_borg
/content/Radon_Part3_json/comfyanonymous_ComfyUI
/content/Radon_Part3_json/ludwig-ai_ludwig


 39%|███▉      | 11/28 [00:01<00:01, 11.26it/s]

/content/Radon_Part3_json/lllyasviel_Fooocus
/content/Radon_Part3_json/facebookresearch_ParlAI
/content/Radon_Part3_json/aleju_imgaug


 46%|████▋     | 13/28 [00:01<00:01, 12.24it/s]

/content/Radon_Part3_json/ultralytics_ultralytics
/content/Radon_Part3_json/pyodide_pyodide
/content/Radon_Part3_json/Nuitka_Nuitka


 54%|█████▎    | 15/28 [00:01<00:01,  8.86it/s]

/content/Radon_Part3_json/twitter_the-algorithm-ml
/content/Radon_Part3_json/apache_tvm


 61%|██████    | 17/28 [00:01<00:01,  7.07it/s]

/content/Radon_Part3_json/saltstack_salt


 71%|███████▏  | 20/28 [00:02<00:01,  5.03it/s]

/content/Radon_Part3_json/flairNLP_flair
/content/Radon_Part3_json/microsoft_nni
/content/Radon_Part3_json/smicallef_spiderfoot
/content/Radon_Part3_json/reflex-dev_reflex
/content/Radon_Part3_json/spotDL_spotify-downloader
/content/Radon_Part3_json/PaddlePaddle_PaddleNLP


 89%|████████▉ | 25/28 [00:03<00:00,  6.65it/s]

/content/Radon_Part3_json/PrefectHQ_prefect
/content/Radon_Part3_json/horovod_horovod


100%|██████████| 28/28 [00:03<00:00,  7.57it/s]

/content/Radon_Part3_json/OpenBMB_ChatDev
/content/Radon_Part3_json/networkx_networkx





In [None]:
! zip -r "./Radon_Par3_Methods.zip" "./Radon_Part3_Methods"

  adding: Radon_Part3_Methods/ (stored 0%)
  adding: Radon_Part3_Methods/saltstack_salt-Methods.csv (deflated 87%)
  adding: Radon_Part3_Methods/horovod_horovod-Methods.csv (deflated 84%)
  adding: Radon_Part3_Methods/twitter_the-algorithm-ml-Methods.csv (deflated 81%)
  adding: Radon_Part3_Methods/Nuitka_Nuitka-Methods.csv (deflated 87%)
  adding: Radon_Part3_Methods/borgbackup_borg-Methods.csv (deflated 83%)
  adding: Radon_Part3_Methods/ludwig-ai_ludwig-Methods.csv (deflated 85%)
  adding: Radon_Part3_Methods/pyodide_pyodide-Methods.csv (deflated 83%)
  adding: Radon_Part3_Methods/networkx_networkx-Methods.csv (deflated 86%)
  adding: Radon_Part3_Methods/pyecharts_pyecharts-Methods.csv (deflated 87%)
  adding: Radon_Part3_Methods/reflex-dev_reflex-Methods.csv (deflated 86%)
  adding: Radon_Part3_Methods/flairNLP_flair-Methods.csv (deflated 82%)
  adding: Radon_Part3_Methods/microsoft_nni-Methods.csv (deflated 84%)
  adding: Radon_Part3_Methods/unifyai_ivy-Methods.csv (deflated 87%)


In [None]:
! rm -rf "/content/RadonResultsPart3"
! rm -rf "/content/Radon_Part3_json"