In [1]:
from utils.linter import LingtingReport
from utils.radon_generator import RadonAnalyzer
from utils.results import QualityReport
from utils.bandit_eval import Bandit
from utils.memory_profiler_generator import MemoryProfilerScriptGenerator
import glob
import os

# List of folders to be excluded from analysis
folders_not_to_visit = ["archived", "utils", "app_domain_template", "data"]

algorithms = [x for x in glob.glob("*") if x not in folders_not_to_visit and os.path.isdir(x)]

In [5]:
import yaml

# Load the .evaluate_config.yml file
with open('.evaluate_config.yml', 'r') as file:
    config = yaml.safe_load(file)

# Access the contents of the file
print(config)


{'PCA': '# Execute script data to get memory usage\nif __name__ == "__main__":\n    # For reproducibility\n    np.random.seed(42)\n\n    # Generate random data\n    data = np.random.rand(10000, 100)\n    \n    # Create PCA object\n    pca = PrincipalComponentAnalysis()\n\n    # Fit and transform data\n    pca.fit(data)\n    _ = pca.transform(data)\n    \n    # Get the CPU percentage usage of the process\n    cpu_usage = process.cpu_percent(interval=1)/ num_cores\n    print(f"CPU Usage: {cpu_usage}%")'}


In [9]:
# Loop through each algorithm
for algorithm in algorithms:
    print(f"Analyzing: {algorithm} ({algorithms.index(algorithm)+1}/{len(algorithms)})...\n")
    
    # Initializing QualityReport() Instance
    report = QualityReport()
    
    # Initializing Bandit() Instance
    bandit = Bandit(algorithm)
    
    # Filtering the list of llms to only include valid folders
    algorithm_path = glob.glob(f"{algorithm}/*")
    algorithm_folder = [os.path.isdir(folder) for folder in algorithm_path]
    filtered_list = [value + "/" for value, condition in zip(algorithm_path, algorithm_folder) if condition and value.split('/')[-1] not in folders_not_to_visit]
    print(filtered_list)

    # Looping through each LLM
    for llm in filtered_list:
        model = llm.split("/")[-2]
        print(f"Analyzing: {model} ({filtered_list.index(llm)+1}/{len(filtered_list)})...\n")
        
        # Initializing LingtingReport() Instance
        linter = LingtingReport(llm)
        
        # Initializing RadonAnalyzer() Instance
        radon = RadonAnalyzer(llm)
        
        # Generate memory profiler results
        #profiler = MemoryProfilerScriptGenerator(llm,
                                           #      execute_statement=PCA_execute_statement)
        
        
        # Adding Radon Results to QualityReport
        report([radon.df, linter.df])
    
    report(bandit.df)
    #report.save_results(sheet_name=algorithm)
    
    

Analyzing: dimension_reduction (1/1)...

['dimension_reduction/Gemini/', 'dimension_reduction/codellama/', 'dimension_reduction/Codeium/', 'dimension_reduction/WizardCoder/', 'dimension_reduction/GPT_35/', 'dimension_reduction/codex/', 'dimension_reduction/baseline/', 'dimension_reduction/Tabnine/', 'dimension_reduction/DeepSeeker-Coder/', 'dimension_reduction/GPT4/']
Analyzing: Gemini (1/10)...

Linting file: Gemini/medium.py...
************* Module utils/.pylintrc
utils/.pylintrc:1:0: F0011: error while parsing the configuration: While reading from 'utils/.pylintrc' [line  6]: option 'disable' in section 'MESSAGES CONTROL' already exists (config-parse-error)
************* Module medium
dimension_reduction/Gemini/medium.py:8:0: C0301: Line too long (142/100) (line-too-long)
dimension_reduction/Gemini/medium.py:9:0: C0301: Line too long (127/100) (line-too-long)
dimension_reduction/Gemini/medium.py:15:0: C0303: Trailing whitespace (trailing-whitespace)
dimension_reduction/Gemini/medium

In [4]:
df = report.df

In [5]:
df = df.assign(
    metric = lambda x: x.metric.astype(str),
    framework = lambda x: x.framework.astype(str),
    model = lambda x: x.model.astype(str),
    prompt = lambda x: x.prompt.astype(str),
    value = lambda x: x.value.astype(float),
)

In [6]:
df_transformed = df.pivot_table(index=['model', 'prompt'], columns='metric', values='value').reset_index()

In [7]:
df[df.framework == "pylint"].model.unique()

array(['Gemini', 'codellama', 'Codeium', 'WizardCoder', 'GPT_35', 'codex',
       'baseline', 'Tabnine', 'DeepSeeker-Coder', 'GPT4'], dtype=object)

In [8]:
df_transformed

metric,model,prompt,blank,comments,comments_to_loc,comments_to_sloc,convention,errors,halstead_bugs,halstead_difficulty,...,multi,multi_and_comments_to_loc,number_of_methods,pylint_score,refactor,security_high,security_low,security_medium,sloc,warnings
0,Codeium,long,15.0,4.0,0.048193,0.117647,9.0,9.0,0.01673,2.25,...,30.0,0.409639,4.0,0.0,0.0,0.0,0.0,0.0,34.0,0.0
1,Codeium,medium,14.0,7.0,0.098592,0.28,13.0,0.0,0.016144,1.6875,...,25.0,0.450704,4.0,4.4,0.0,0.0,0.0,0.0,25.0,1.0
2,Codeium,small,4.0,3.0,0.1,0.130435,13.0,0.0,0.016144,1.6875,...,0.0,0.1,3.0,3.181818,0.0,0.0,0.0,0.0,23.0,2.0
3,DeepSeeker-Coder,long,13.0,9.0,0.111111,0.195652,9.0,0.0,0.032567,2.678571,...,13.0,0.271605,4.0,7.727273,1.0,0.0,0.0,0.0,46.0,0.0
4,DeepSeeker-Coder,medium,16.0,5.0,0.0625,0.16129,8.0,0.0,0.021575,2.2,...,28.0,0.4125,4.0,7.0,0.0,0.0,0.0,0.0,31.0,1.0
5,DeepSeeker-Coder,small,10.0,7.0,0.175,0.304348,7.0,0.0,0.016144,1.6875,...,0.0,0.175,3.0,5.454545,0.0,0.0,0.0,0.0,23.0,3.0
6,GPT4,long,22.0,5.0,0.038462,0.098039,12.0,0.0,0.050138,3.868421,...,52.0,0.438462,4.0,7.291667,0.0,0.0,0.0,0.0,51.0,1.0
7,GPT4,medium,18.0,2.0,0.018868,0.057143,6.0,0.0,0.038067,3.4,...,51.0,0.5,4.0,7.647059,0.0,0.0,0.0,0.0,35.0,2.0
8,GPT4,small,3.0,8.0,0.285714,0.470588,14.0,0.0,0.008,1.5,...,0.0,0.285714,3.0,0.588235,0.0,0.0,0.0,0.0,17.0,2.0
9,GPT_35,long,24.0,11.0,0.092437,0.25,15.0,0.0,0.043728,5.066667,...,40.0,0.428571,4.0,5.348837,1.0,0.0,0.0,0.0,44.0,4.0


### Bandit

In [None]:
import subprocess
import os

In [None]:
import subprocess
import pandas as pd
import json

class Bandit:
    
    def __init__(self, algorithm_path:str):
        """Initializes the Bandit class, which is used to run bandit on the algorithms. Methods generate a temp json file while executing, which is deleted after the execution is complete.

        Args:
            algorithm_path (str): Takes the path of the algorithm to be analyzed.
        """
        # Variables
        self.path = algorithm_path
        self.tmp = os.path.join(self.path, 'tmp')
        self.output_json = os.path.join(self.tmp, "_tmp_bandit_results.json")
        self.file_name = "_tmp_bandit_results.json"
        self.framework = "bandit"
        
        # File names and substrings to skip while analyzing
        self.skip = ['archived', 'flake8_summarizer', 'memory_usage', 'utils', 'write_to_file', '_total']
        
        self._run()
        
    def _run(self):
        """Runs bandit on the algorithm.
        """
        # Bandit command
        command = f"bandit -f json -o {self.output_json} -ll -r {self.path} --exclude archived --exclude memory_usage"
        
        # Create temporary folder
        self._create_tmp()
        
        # Runs bandit
        _ = subprocess.run(command, capture_output=True, text=True, shell=True)
        
        # Load the results
        results = self._load_results()
        
        # Wrangle the results
        self._wrangle_results(results)
        
        # Delete temporary folder
        self._del_tmp()
        
        
    def _create_tmp(self):
        """Creates a temporary folder to store json file which is used for the results of the bandit analysis.
        """
        # Create temporary folder
        if not os.path.exists(self.tmp):
            os.mkdir(self.tmp)
            
    def _load_results(self):
        """Loads the results of the bandit analysis.
        """
        # Load the results
        with open(self.output_json, "r") as file:
            # json file
            results = json.load(file)
        return results['metrics']
    
    
    def _wrangle_results(self, results):
        """Wrangles the results of the bandit analysis to a pandas DataFrame.
        """
        # Wrangle the results
        self.out = pd.DataFrame(columns=['metric', 'framework', 'model','value','prompt'])
        
        # Loop through the results
        for result in results.keys():
            if any([x in result for x in self.skip]):
                continue
            
            # Extract the model and prompt
            model = result.split('/')[-2]
            prompt = result.split('/')[-1].replace('.py', '')
            
            # Extract the severity
            try: 
                # Extract the severity
                high_severity = results[result]['SEVERITY.HIGH']
                med_severity = results[result]['SEVERITY.MEDIUM']
                low_severity = results[result]['SEVERITY.LOW']
                
                # Create the data
                data = [[metric, self.framework, model, value, prompt] for metric, value in zip(['security_high', 'security_medium', 'security_low'], [high_severity, med_severity, low_severity])]
                out = pd.DataFrame(data, columns=['metric', 'framework', 'model','value','prompt'])
                
                # Append to the main dataframe
                self.out = pd.concat([self.out, out])
            except: 
                pass
        
    def _del_tmp_json(self):
        """Deletes the json file.
        """
        # Delete temporary folder
        if os.path.exists(self.output_json):
            os.remove(self.output_json)            

    def _del_tmp(self):
        """Deletes the temporary folder and the json file.
        """
        # Delete temporary json file
        self._del_tmp_json()
        
        # Delete temporary folder
        if os.path.exists(self.tmp):
            os.rmdir(self.tmp)

In [None]:
dim = Bandit("dimension_reduction")