In [27]:
import os
from pathlib import Path
import subprocess
from io import StringIO
import pandas as pd
import plotly.express as px

In [87]:
dir = "/Users/eliasberger/Documents/Uni/4_Semester/Hyperautomation/Generating-Terraform-configuration-files/data/aws-easy/code"
provider = "AWS"
model = "GPT-2"

In [88]:
csv = StringIO()
files_affected = 0
total_files = 0
for i,p in enumerate(Path(dir).rglob("*")):
    if str(p).endswith(".tf"):
        total_files += 1
        result = subprocess.run(["tfsec", f"{str(os.path.dirname(p))}", "-f", "csv"], capture_output=True)
        csv.write(result.stdout.decode("utf-8"))
        if len(result.stdout.decode("utf-8")) > 66:
            files_affected += 1

Number of files affected by at least one security smell

In [89]:
files_affected

245

Percentage of files affected by at least one security smell

In [90]:
f"{(files_affected / total_files)*100}%"

'100.0%'

In [91]:
csv.seek(0)

0

In [92]:
df = pd.read_csv(csv)

In [93]:
df

Unnamed: 0,file,start_line,end_line,rule_id,severity,description,link,passed
0,main.tf,16,19,aws-ec2-enable-at-rest-encryption,HIGH,Root block device is not encrypted.,https://aquasecurity.github.io/tfsec/v1.28.1/c...,false
1,main.tf,10,13,aws-ec2-enable-at-rest-encryption,HIGH,Root block device is not encrypted.,https://aquasecurity.github.io/tfsec/v1.28.1/c...,false
2,main.tf,16,19,aws-ec2-enforce-http-token-imds,HIGH,Instance does not require IMDS access to requi...,https://aquasecurity.github.io/tfsec/v1.28.1/c...,false
3,main.tf,10,13,aws-ec2-enforce-http-token-imds,HIGH,Instance does not require IMDS access to requi...,https://aquasecurity.github.io/tfsec/v1.28.1/c...,false
4,Error: scan failed: Users/eliasberger/Document...,1-9: Argument or block definition required; An...,"use the equals sign ""="" to introduce the argu...",,,,,
...,...,...,...,...,...,...,...,...
998,Error: scan failed: Users/eliasberger/Document...,1-8: Argument or block definition required; An...,"use the equals sign ""="" to introduce the argu...",,,,,
999,Error: scan failed: Users/eliasberger/Document...,1-8: Argument or block definition required; An...,"use the equals sign ""="" to introduce the argu...",,,,,
1000,Error: scan failed: Users/eliasberger/Document...,1-8: Argument or block definition required; An...,"use the equals sign ""="" to introduce the argu...",,,,,
1001,Error: scan failed: Users/eliasberger/Document...,1-8: Argument or block definition required; An...,"use the equals sign ""="" to introduce the argu...",,,,,


In [94]:
df_filter = df[~df["file"].str.contains("file")]

In [95]:
df_filter

Unnamed: 0,file,start_line,end_line,rule_id,severity,description,link,passed
0,main.tf,16,19,aws-ec2-enable-at-rest-encryption,HIGH,Root block device is not encrypted.,https://aquasecurity.github.io/tfsec/v1.28.1/c...,false
1,main.tf,10,13,aws-ec2-enable-at-rest-encryption,HIGH,Root block device is not encrypted.,https://aquasecurity.github.io/tfsec/v1.28.1/c...,false
2,main.tf,16,19,aws-ec2-enforce-http-token-imds,HIGH,Instance does not require IMDS access to requi...,https://aquasecurity.github.io/tfsec/v1.28.1/c...,false
3,main.tf,10,13,aws-ec2-enforce-http-token-imds,HIGH,Instance does not require IMDS access to requi...,https://aquasecurity.github.io/tfsec/v1.28.1/c...,false
37,main.tf,10,13,aws-ec2-enable-at-rest-encryption,HIGH,Root block device is not encrypted.,https://aquasecurity.github.io/tfsec/v1.28.1/c...,false
...,...,...,...,...,...,...,...,...
992,main.tf,10,13,aws-ec2-enable-at-rest-encryption,HIGH,Root block device is not encrypted.,https://aquasecurity.github.io/tfsec/v1.28.1/c...,false
993,main.tf,28,31,aws-ec2-enforce-http-token-imds,HIGH,Instance does not require IMDS access to requi...,https://aquasecurity.github.io/tfsec/v1.28.1/c...,false
994,main.tf,22,25,aws-ec2-enforce-http-token-imds,HIGH,Instance does not require IMDS access to requi...,https://aquasecurity.github.io/tfsec/v1.28.1/c...,false
995,main.tf,16,19,aws-ec2-enforce-http-token-imds,HIGH,Instance does not require IMDS access to requi...,https://aquasecurity.github.io/tfsec/v1.28.1/c...,false


In [96]:
fig = px.histogram(
    df_filter,
    x="severity",
    category_orders=dict(severity=["LOW", "MEDIUM", "HIGH", "CRITICAL"]),
    title=f"Security Severity Distribution for {provider} using {model}"
)
fig.show()

In [98]:
fig.write_image(f"{provider}_{model}_severity.svg")
fig.write_image(f"{provider}_{model}_severity.png")

In [99]:
df_filter["occurred"] = 1 # pd.Series([1 for x in range(len(df))])



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [100]:
df_filter

Unnamed: 0,file,start_line,end_line,rule_id,severity,description,link,passed,occurred
0,main.tf,16,19,aws-ec2-enable-at-rest-encryption,HIGH,Root block device is not encrypted.,https://aquasecurity.github.io/tfsec/v1.28.1/c...,false,1
1,main.tf,10,13,aws-ec2-enable-at-rest-encryption,HIGH,Root block device is not encrypted.,https://aquasecurity.github.io/tfsec/v1.28.1/c...,false,1
2,main.tf,16,19,aws-ec2-enforce-http-token-imds,HIGH,Instance does not require IMDS access to requi...,https://aquasecurity.github.io/tfsec/v1.28.1/c...,false,1
3,main.tf,10,13,aws-ec2-enforce-http-token-imds,HIGH,Instance does not require IMDS access to requi...,https://aquasecurity.github.io/tfsec/v1.28.1/c...,false,1
37,main.tf,10,13,aws-ec2-enable-at-rest-encryption,HIGH,Root block device is not encrypted.,https://aquasecurity.github.io/tfsec/v1.28.1/c...,false,1
...,...,...,...,...,...,...,...,...,...
992,main.tf,10,13,aws-ec2-enable-at-rest-encryption,HIGH,Root block device is not encrypted.,https://aquasecurity.github.io/tfsec/v1.28.1/c...,false,1
993,main.tf,28,31,aws-ec2-enforce-http-token-imds,HIGH,Instance does not require IMDS access to requi...,https://aquasecurity.github.io/tfsec/v1.28.1/c...,false,1
994,main.tf,22,25,aws-ec2-enforce-http-token-imds,HIGH,Instance does not require IMDS access to requi...,https://aquasecurity.github.io/tfsec/v1.28.1/c...,false,1
995,main.tf,16,19,aws-ec2-enforce-http-token-imds,HIGH,Instance does not require IMDS access to requi...,https://aquasecurity.github.io/tfsec/v1.28.1/c...,false,1


In [101]:
top_10 = df_filter.groupby("rule_id").sum().reset_index().sort_values("occurred", ascending=False).head(n=10)


The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



In [102]:
fig = px.bar(top_10, x="rule_id", y="occurred", title="AWS Codex Security Smells")
fig.show()

In [103]:
fig.write_image(f"{provider}_{model}_issues.svg")
fig.write_image(f"{provider}_{model}_issues.png")