In [2]:
!pip install pandas semgrep



In [3]:
import os

# Create a directory to store all our scan results
os.makedirs("results", exist_ok=True)

In [4]:
# Run Semgrep on GSON
!semgrep scan --config "p/cwe-top-25" --json --quiet -o "results/gson-semgrep.json" ./Repos/gson 

print("DONE")

DONE




In [5]:

!semgrep scan --config "p/cwe-top-25" --json --quiet -o "results/gson-semgrep.json" ./Repos/logging-log4j2

!semgrep scan --config "p/cwe-top-25" --json --quiet -o "results/gson-semgrep.json" ./Repos/zxing

print("Semgrep Done")



Semgrep Done




In [6]:
# Run PMD on GSON
!.\Tools\pmd-bin-7.17.0\bin\pmd.bat check -d ./Repos/gson -R category/java/security.xml -f xml -r "results/gson-pmd.xml"

print("PMD scans complete!")

No java executable found in PATH
PMD scans complete!


In [7]:
# Run PMD on Log4j (with security AND errorprone rules)
!.\Tools\pmd-bin-7.17.0\bin\pmd.bat check -d ./Repos/logging-log4j2 -R category/java/security.xml,category/java/errorprone.xml -f xml -r "results/log4j-pmd.xml"

# Run PMD on ZXing (with security AND errorprone rules)
!.\Tools\pmd-bin-7.17.0\bin\pmd.bat check -d ./Repos/zxing -R category/java/security.xml,category/java/errorprone.xml -f xml -r "results/zxing-pmd.xml"

print("PMD scans complete!")

No java executable found in PATH
No java executable found in PATH
PMD scans complete!


In [1]:
# --- CodeQL Setup ---
import subprocess
import os
import shutil

In [2]:
# Define the correct root directory for your project
# This is the main 'Lab6' folder
project_root = r"C:\Sidd all in one\Unsafe\Lab6"

# Change the current working directory to the project root
os.chdir(project_root)

# Verify the change
print("Successfully changed working directory to:")
print(os.getcwd())

Successfully changed working directory to:
C:\Sidd all in one\Unsafe\Lab6


In [3]:
codeql_cli_path = r"Tools\codeql\codeql.exe"  # Or just "codeql" if it's in your PATH
codeql_queries_path = r"Tools\codeql-queries\codeql-queries" # adjust to your clone

# Better check for CLI: use exact path or check PATH with shutil.which
if os.path.exists(codeql_cli_path) or shutil.which(codeql_cli_path) or shutil.which("codeql"):
    print("CodeQL CLI found.")
else:
    print(f"ERROR: CodeQL CLI not found at '{codeql_cli_path}'. Please correct the path or install CodeQL.")

if not os.path.isdir(codeql_queries_path):
    print(f"ERROR: CodeQL queries folder not found at '{codeql_queries_path}'. Please correct the path.")
else:
    print("CodeQL queries folder found.")

CodeQL CLI found.
CodeQL queries folder found.


In [None]:
# --- Create CodeQL Databases (with explicit build commands) ---
print("Creating CodeQL databases...")

# --- !!! DEFINE BUILD COMMANDS PER PROJECT !!! ---
# You MUST find the correct build command for each project
build_commands = {
    #"gson": "mvn clean verify", # Example for Maven
    #"log4j": "mvn clean install -DskipTests", # Example for Maven (Log4j2 uses Maven)
    #"zxing": "mvn clean compile -DskipTests" # Example for Maven (ZXing uses Maven)
}

repos = {
    #"gson": os.path.join(project_root, "Repos", "gson"),
    #"log4j": os.path.join(project_root, "Repos", "logging-log4j2"),
    #"zxing": os.path.join(project_root, "Repos", "zxing")
}
output_dir = os.path.join(project_root, "CodeQL-Databases")

for name, repo_path in repos.items():
    db_name = os.path.join(output_dir, f"{name}-codeql-db")
    print(f"\\nAttempting to create database for '{name}' at '{db_name}'...")

    if not os.path.isdir(repo_path):
        print(f"  ERROR: Repository path '{repo_path}' does not exist. Skipping.")
        continue

    if name not in build_commands:
         print(f"  ERROR: Build command not defined for '{name}'. Skipping.")
         continue

    command = [
        codeql_cli_path,
        "database", "create", db_name,
        f"--language={"java"}",
        f"--source-root={repo_path}",
        f"--command={build_commands[name]}", # <-- Add the build command here
        "--overwrite"
    ]

    print(f"  Running command: {' '.join(command)}")
    try:
        result = subprocess.run(command, capture_output=True, text=True, check=True, encoding='utf-8', errors='ignore')
        print(f"  Successfully created database for '{name}'.")
    except subprocess.CalledProcessError as e:
        print(f"  ERROR creating database for '{name}':")
        print(f"  Return Code: {e.returncode}")
        print(f"  Stderr:\\n{e.stderr}")
    except Exception as e:
        print(f"  An unexpected error occurred: {e}")

print("\\nDatabase creation process finished.")

Creating CodeQL databases...
\nAttempting to create database for 'log4j' at 'C:\Sidd all in one\Unsafe\Lab6\CodeQL-Databases\log4j-codeql-db'...
  Running command: Tools\codeql\codeql.exe database create C:\Sidd all in one\Unsafe\Lab6\CodeQL-Databases\log4j-codeql-db --language=java --source-root=C:\Sidd all in one\Unsafe\Lab6\Repos\logging-log4j2 --command=mvn clean install -DskipTests --overwrite
  Successfully created database for 'log4j'.
\nDatabase creation process finished.


In [6]:
repos = {
    "gson": os.path.join(project_root, "Repos", "gson"),
    "log4j": os.path.join(project_root, "Repos", "logging-log4j2"),
    "zxing": os.path.join(project_root, "Repos", "zxing")
}

In [None]:
# --- Run CodeQL Analysis ---
print("\nRunning CodeQL analysis...")

# Define the path to the Java security query suite within your cloned queries folder
# Adjust 'java' if analyzing a different language
query_suite = os.path.join(codeql_queries_path, "java", "ql", "src", "Security", "CWE")

if not os.path.exists(query_suite):
    print(f"ERROR: Query suite path does not exist: {query_suite}")
else:
    for name in repos.keys():
        db_name = os.path.join(output_dir, f"{name}-codeql-db")
        output_file = os.path.join(output_dir, f"{name}-codeql.sarif") # Outputting as SARIF (JSON format)
        print(f"\nAnalyzing '{name}'...")

        # Check if database exists before analyzing
        if not os.path.isdir(db_name):
             print(f"  ERROR: Database '{db_name}' not found. Skipping analysis.")
             continue

        command = [
            codeql_cli_path,
            "database", "analyze", db_name,
            query_suite,
            "--format=sarif-latest", # Output format required by the lab [cite: 32]
            f"--output={output_file}",
            "--ram=8000" # Add rerun in case analysis was interrupted
        ]

        print(f"  Running command: {' '.join(command)}")
        try:
            result = subprocess.run(command, capture_output=True, text=True, check=True, encoding='utf-8', errors='ignore')
            print(f"  Successfully analyzed '{name}'. Results saved to '{output_file}'.")
            # print("  Output:\n", result.stdout) # Uncomment for detailed output
        except subprocess.CalledProcessError as e:
            print(f"  ERROR analyzing '{name}':")
            print(f"  Return Code: {e.returncode}")
            print(f"  Stderr:\n{e.stderr}")
            # print(f"  Stdout:\n{e.stdout}") # Uncomment if stdout might contain error details
        except Exception as e:
             print(f"  An unexpected error occurred: {e}")


print("\nCodeQL analysis finished.")


Running CodeQL analysis...

Analyzing 'gson'...
  Running command: Tools\codeql\codeql.exe database analyze C:\Sidd all in one\Unsafe\Lab6\CodeQL-Databases\gson-codeql-db Tools\codeql-queries\codeql-queries\java\ql\src\Security\CWE --format=sarif-latest --output=C:\Sidd all in one\Unsafe\Lab6\CodeQL-Databases\gson-codeql.sarif --ram=8000
  ERROR analyzing 'gson':
  Return Code: 1
  Stderr:
Running queries.
Compiling query plan for C:\Sidd all in one\Unsafe\Lab6\Tools\codeql-queries\codeql-queries\java\ql\src\Security\CWE\CWE-020\ExternalAPIsUsedWithUntrustedData.ql.
[1/122] Found in cache: C:\Sidd all in one\Unsafe\Lab6\Tools\codeql-queries\codeql-queries\java\ql\src\Security\CWE\CWE-020\ExternalAPIsUsedWithUntrustedData.ql.
Compiling query plan for C:\Sidd all in one\Unsafe\Lab6\Tools\codeql-queries\codeql-queries\java\ql\src\Security\CWE\CWE-020\OverlyLargeRange.ql.
[2/122] Found in cache: C:\Sidd all in one\Unsafe\Lab6\Tools\codeql-queries\codeql-queries\java\ql\src\Security\CWE\CW