# GoSec History
This notebook graphs out the change history of the integreatly operator over time based on the release of RHOAM.

TODO
* configuration
* cloning of the repo
* checking out of the version from the branch
* scanning of each version, think about how this handles master
* storing the data
* using the stored data to generate a graph
* printing of the graph

In [111]:
# Imports
import os
import shutil
import subprocess
import tempfile
import json
import tarfile

from pathlib import Path

import git
import semver
import matplotlib.pyplot as plt

In [112]:
# Settings
rhoam = "git@github.com:integr8ly/integreatly-operator.git" # git repo to be working with
initial = "0.1.0" # The earliest version to scan
fresh = False # make sure to get a fresh copy of the repo when = True
working_branch = "master" # Working branch for the last scan to happen on
tag_prefix = "rhoam-v" # What prefix has the tags
results_dir = Path(tempfile.gettempdir(), "results") # Folder to where the scan results will be saved
reuse_results = True # reuse result from past runs.
rhoam_repo = Path(tempfile.gettempdir(), "integreatly-operator")
graph_title = "RHOAM gosec history"
check_enabled = "1.20.0" # First version to have gosec prow check
existing_data = "/home/jimfitz/code/github.com/Boomatang/notebooks/data/results.tar.xz"
# check_enabled = None

In [113]:
# Clone repo
print("Repo dir is: ", rhoam_repo)

if fresh and rhoam_repo.exists():
    shutil.rmtree(rhoam_repo)

if rhoam_repo.exists():
    print("using existing repo")
    r = git.Repo(rhoam_repo)
else:
    print("Cloning repo")
    r = git.Repo.clone_from(rhoam, rhoam_repo)  # downland repo

os.chdir(rhoam_repo)
print(f"Current working dir is: {os.getcwd()}")

Repo dir is:  /tmp/integreatly-operator
using existing repo
Current working dir is: /tmp/integreatly-operator


In [114]:
# Setting up the results folder
if not reuse_results and results_dir.exists():
    shutil.rmtree(results_dir)

results_dir.mkdir(parents=True, exist_ok=True)

In [115]:
# Ensure reference to working branch
if r.head.is_detached:
    default = working_branch
    r.git.checkout(default)
else:
    default = r.active_branch.name

print(f"Current Branch: {default}")

Current Branch: master


In [116]:
def get_semver(tag, remove=None):
    if remove is None:
        name = tag.name
    else:
        name = tag.name.strip(remove)

    return semver.VersionInfo.parse(name)


In [117]:
# Getting the semver version of the repo tags
tags = r.tags
releases = (t for t in tags if t.name.lower().startswith(tag_prefix))
version_release = {}
for release in releases:
    version = get_semver(release, tag_prefix)
    version_release[version] = release

# sort the versions
keys = list(version_release.keys())
keys = sorted(keys, reverse=True)
versions = []
prerelease = []
prerelease_rcs = {}
for key in keys:
    if not key.prerelease:
        versions.append(key)
    else:
        prerelease.append(key)

for release in prerelease:
    a = semver.VersionInfo(major=release.major, minor=release.minor, patch=release.patch)
    if a not in versions:
        if a in prerelease_rcs.keys():
            prerelease_rcs[a].append(release)
        else:
            prerelease_rcs.setdefault(a, [release, ])

for rcs in prerelease_rcs:
    if len(prerelease_rcs[rcs]) == 1:
        versions.append(prerelease_rcs[rcs][0])
    else:
        print("More than 1 RC was found logic needs to be added to deal with this")
        exit(1)

versions.sort(reverse=True)

# remove on wanted versions
base = semver.VersionInfo.parse(initial)
index = versions.index(base)
versions = versions[:index+1]
versions = list(reversed(versions))
for version in versions:
    print(version)

0.1.0
0.2.0
0.3.0-rc1
0.4.0-rc1
0.5.0-rc1
0.7.0-rc1
0.8.0-rc1
0.9.0-rc1
1.0.0
1.0.1
1.1.0
1.2.0
1.3.0
1.4.0
1.5.0
1.6.0
1.6.1
1.7.0
1.8.0
1.8.1
1.8.2-rc1
1.9.0
1.10.0
1.11.0
1.12.0
1.13.0
1.14.0
1.15.0
1.15.1
1.15.2
1.16.0
1.17.0
1.18.2
1.19.0
1.20.0
1.20.1
1.21.0
1.22.0
1.23.0
1.24.0
1.24.1
1.25.0
1.26.0
1.27.0
1.28.0


In [118]:
def checkout(v):
    if type(v) is str:
        name = v
    else :
        name = v.name
    r.git.checkout(name)

In [119]:
def scan(file_name: str):
    subprocess.run(["gosec", "-fmt", "json", "-out", file_name, "./..."], check=False)


In [120]:

# load existing data

existing = Path(existing_data)
if tarfile.is_tarfile(existing):
    print("loading existing data")
    existing_tar = tarfile.open(existing)
    existing_tar.extractall(results_dir.parent)
    existing_tar.close()



loading existing data


In [122]:
# do scans
for i in versions:
    tag = version_release[i]
    checkout(tag)

    result = results_dir.joinpath(f"{str(i)}.json")

    if result.exists():
        print(f"Skipping scan of {tag}, results file exists.")
    else:
        print(f"Starting scan on {tag}")
        scan(result)

        existing = Path(existing_data)
        if tarfile.is_tarfile(existing):
            print("Saving new data")
            existing_tar = tarfile.open(existing, "w")
            existing_tar.addfile(tarfile.TarInfo(result.name), open(result))


Skipping scan of rhoam-v0.1.0, results file exists.
Skipping scan of rhoam-v0.2.0, results file exists.
Skipping scan of rhoam-v0.3.0-rc1, results file exists.
Skipping scan of rhoam-v0.4.0-rc1, results file exists.
Skipping scan of rhoam-v0.5.0-rc1, results file exists.
Skipping scan of rhoam-v0.7.0-rc1, results file exists.
Skipping scan of rhoam-v0.8.0-rc1, results file exists.
Skipping scan of rhoam-v0.9.0-rc1, results file exists.
Skipping scan of rhoam-v1.0.0, results file exists.
Skipping scan of rhoam-v1.0.1, results file exists.
Skipping scan of rhoam-v1.1.0, results file exists.
Skipping scan of rhoam-v1.2.0, results file exists.
Skipping scan of rhoam-v1.3.0, results file exists.
Skipping scan of rhoam-v1.4.0, results file exists.
Skipping scan of rhoam-v1.5.0, results file exists.
Skipping scan of rhoam-v1.6.0, results file exists.
Skipping scan of rhoam-v1.6.1, results file exists.
Skipping scan of rhoam-v1.7.0, results file exists.
Skipping scan of rhoam-v1.8.0, results f

[gosec] 2022/11/01 20:18:11 Including rules: default
[gosec] 2022/11/01 20:18:11 Excluding rules: default
[gosec] 2022/11/01 20:18:11 Import directory: /tmp/integreatly-operator/pkg/products/threescale
[gosec] 2022/11/01 20:18:11 Import directory: /tmp/integreatly-operator/test/utils
[gosec] 2022/11/01 20:18:11 Import directory: /tmp/integreatly-operator/controllers/namespacelabel
[gosec] 2022/11/01 20:18:11 Import directory: /tmp/integreatly-operator/apis-products/enmasse/v1beta1
[gosec] 2022/11/01 20:18:11 Import directory: /tmp/integreatly-operator/pkg/metrics
[gosec] 2022/11/01 20:18:11 Import directory: /tmp/integreatly-operator/pkg/resources/poddistribution
[gosec] 2022/11/01 20:18:11 Import directory: /tmp/integreatly-operator/pkg/products/amqstreams
[gosec] 2022/11/01 20:18:11 Import directory: /tmp/integreatly-operator/pkg/resources/quota
[gosec] 2022/11/01 20:18:11 Import directory: /tmp/integreatly-operator/test/common
[gosec] 2022/11/01 20:18:11 Checking package: utils
[gos

Saving new data
Starting scan on rhoam-v1.28.0


[gosec] 2022/11/01 20:18:45 Import directory: /tmp/integreatly-operator/version
[gosec] 2022/11/01 20:18:45 Import directory: /tmp/integreatly-operator/controllers/tenant
[gosec] 2022/11/01 20:18:45 Import directory: /tmp/integreatly-operator/pkg/addon
[gosec] 2022/11/01 20:18:45 Import directory: /tmp/integreatly-operator/pkg/products/observability
[gosec] 2022/11/01 20:18:45 Import directory: /tmp/integreatly-operator/pkg/resources/custom-smtp
[gosec] 2022/11/01 20:18:45 Import directory: /tmp/integreatly-operator/pkg/resources/events
[gosec] 2022/11/01 20:18:45 Import directory: /tmp/integreatly-operator/test/functional
[gosec] 2022/11/01 20:18:45 Import directory: /tmp/integreatly-operator/test/scripts/products/h22-validate-that-rate-limit-service-is-working-as-expected
[gosec] 2022/11/01 20:18:48 Import directory: /tmp/integreatly-operator/pkg/resources/ratelimit
[gosec] 2022/11/01 20:18:48 Checking package: controllers
[gosec] 2022/11/01 20:18:48 Checking file: /tmp/integreatly-o

# Plotting the results

In [None]:
labels=["1.20.0", "1.21.0", "1.22.0", "1.23.0", "1.24.0", "1.25.0", "1.26.0", "1.27.0"]
issues=[2, 4, 6, 8, 10, None, 14, 16]
plt.plot(labels, issues)
plt.axvline(x=0, color="red", linestyle="--", label="PR check")
plt.legend()
plt.show()

In [None]:
result = {}

for dirpath, _, filenames in os.walk(results_dir):
    for filename in filenames:
        if filename.lower().endswith(".json"):
            v = filename.lower().strip(".json")
            result[v] = Path(dirpath, filename)


In [None]:
paths = result.copy()
nosec = {}
for path in paths:
    with open(paths[path]) as f:
        data = json.loads(f.read())
        result[path] = data['Stats']['found']
        nosec[path] = data['Stats']['nosec']


In [None]:
# sort the version to the correct order and mark xy lists
labels, issues, nosec_issues = [], [], []
for i in versions:
    labels.append(str(i))
    issues.append(result[str(i)])
    nosec_issues.append(nosec[str(i)])


plt.figure(figsize=(20,5))
plt.plot(labels, issues, label="sec issues")
plt.plot(labels, nosec_issues, label="nosec issues")
plt.xlabel("Tagged Releases")
plt.tick_params(axis="x", rotation=60)
plt.title(graph_title)

# marker to when prow check was enabled
if check_enabled:
    check = semver.VersionInfo.parse(check_enabled)
    marker_index = versions.index(check)
    plt.axvline(x=marker_index, color="red", linestyle="--", label="PROW check enabled")

plt.legend()
plt.savefig("/tmp/RHOAM_gosec_history.png", dpi=300, bbox_inches = "tight")