### Notebot to study the vulnerabilities on LLMYSMAPR

In [49]:
import json
import os

def collect_vulnerability_types(json_file_path, category='all'):
    try:
        # Load the JSON file
        with open(json_file_path, 'r') as f:
            data = json.load(f)

        # Initialize a list to store all vulnerability types
        vulnerabilities = []
        
        if category == "HighRiskFindings":
            vulnerabilities = list(data.get("HighRiskFindings", {}).keys())
        elif category == "MediumRiskFindings":
            vulnerabilities = list(data.get("MediumRiskFindings", {}).keys())

        else: 
            # Loop through all the keys in the JSON
            for category, findings in data.items():
                # Ensure the value is a dictionary (like "HighRiskFindings" etc.)
                if isinstance(findings, dict):
                    # Add the keys (vulnerability types) from each category to the list
                    vulnerabilities.extend(findings.keys())

        return vulnerabilities

    except (FileNotFoundError, json.JSONDecodeError) as e:
        print(f"Error reading JSON file: {e}")
        return []

# Example usage
#json_file_path = 'open_source_dataset/10/vulnerabilityDetails.json'
#vulnerabilities = collect_vulnerability_types(json_file_path)
#print(vulnerabilities)


In [50]:
def get_all_vulnerabilities(main_folder, category="all"):
    vulnerabilities = []

    # Walk through the directory
    for root, dirs, files in os.walk(main_folder):
        for file in files:
            if file == 'vulnerabilityDetails.json':
                print("collecting from (%s)", os.path.join(root, file))
                # Get the full path to the file and add it to the list
                vulnerabilities += collect_vulnerability_types(os.path.join(root, file), category)
                #vulnerabilities .append( collect_vulnerability_types(os.path.join(root, file)))
    

    return vulnerabilities



In [51]:
all_vul = get_all_vulnerabilities("open_source_dataset")
print(len(all_vul))

collecting from (%s) open_source_dataset/13/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/12/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/5/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/7/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/18/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/8/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/20/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/19/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/29/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/17/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/6/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/23/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/3/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/26/vulnerabilityDetails.json
collecting from (%s) open

In [52]:
high_risk_vul = get_all_vulnerabilities("open_source_dataset","HighRiskFindings" )
print(len(high_risk_vul))

collecting from (%s) open_source_dataset/13/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/12/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/5/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/7/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/18/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/8/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/20/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/19/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/29/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/17/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/6/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/23/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/3/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/26/vulnerabilityDetails.json
collecting from (%s) open

In [53]:
medium_risk_vul = get_all_vulnerabilities("open_source_dataset","MediumRiskFindings" )
print(len(medium_risk_vul))

collecting from (%s) open_source_dataset/13/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/12/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/5/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/7/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/18/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/8/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/20/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/19/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/29/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/17/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/6/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/23/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/3/vulnerabilityDetails.json
collecting from (%s) open_source_dataset/26/vulnerabilityDetails.json
collecting from (%s) open

In [54]:
import csv
 
def save_to_csv(file_list, output_csv):
    try:
        with open(output_csv, mode='w', newline='') as csvfile:
            writer = csv.writer(csvfile)
            # Write header
            writer.writerow(['File Path'])
            # Write file paths
            for file in file_list:
                writer.writerow([file])
        print(f"File paths saved to {output_csv}")
    except Exception as e:
        print(f"Error saving to CSV: {e}")

save_to_csv(all_vul, "all_vulnerabilities.csv")
save_to_csv(high_risk_vul, "high_risk_vul_vulnerabilities.csv")
save_to_csv(medium_risk_vul, "medium_risk_vul_vulnerabilities.csv")


File paths saved to all_vulnerabilities.csv
File paths saved to high_risk_vul_vulnerabilities.csv
File paths saved to medium_risk_vul_vulnerabilities.csv


In [2]:
import os

def get_current_folder():
    try:
        # Get the current working directory
        current_folder = os.getcwd()
        return current_folder
    except FileNotFoundError as e:
        print(f"Error getting current folder: {e}")
        return None

# Example usage
current_folder = get_current_folder()
if current_folder:
    print(f"You're running the script in: {current_folder}")
else:
    print("Could not determine the current folder.")


You're running the script in: /home/sofia/Desktop/experiments/LLM4SMAPR


In [4]:
os.listdir()

['Repair_workflow.jpg',
 'vulnerabilityCollector.ipynb',
 'ContractTinker',
 '.ipynb_checkpoints',
 'open_source_dataset',
 '.git',
 'README.md']