In [25]:
import csv
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import matplotlib
matplotlib.rcParams['text.usetex'] = True
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import seaborn
seaborn.set(font_scale=2.3)
seaborn.set_style("whitegrid")

width = 4 
font = {'family' : 'normal',
		'weight' : 'normal',
		'size'   : 22} 

plt.rc('font', **font)
import re

In [2]:
with open('../responses/devs.csv', 'r') as file:
    csv_reader = csv.reader(file)
    header = next(csv_reader)
    data = [row for row in csv_reader]

In [3]:
header[28:39]

['Absence of coding logic or sanity check',
 'Logic errors',
 'Oracle manipulation',
 'Token standard incompatibility',
 'Function/State Visibility Error',
 'Improper asset locks or frozen asset',
 'Unhandled or mishandled exception',
 'Timestamp Dependency',
 'Integer Overflow and Underflow',
 'Other',
 "Do you want to answer two more questions regarding tools' usage (approx. 3 minutes)?"]

In [4]:
# Clean up the tools names for better labeling on the chart
def clean_tool_name(name):
    name = str(name)
    name = re.sub(r'Other\.[1-9]', 'Other', name)
    name = name.replace('Model checking/', '')
    name = name.replace('/Bytecode hardening', '')
    name = re.sub(r'\(.*\)', '', name).strip()
    return name

# Load the devs CSV file into a pandas dataframe
df_devs = pd.read_csv('../responses/devs.csv')

# Subset the dataframe to only include the columns for the tools
tools_df_devs = df_devs.iloc[:, 1:11]

# Convert each value to True if not NaN, else False
tools_df_devs = tools_df_devs.notna()

# Count the number of True values for each tool and store it in a new dataframe
counts_df_devs = tools_df_devs.sum(axis=0)

# Sort the values by count in descending order
counts_df_devs = counts_df_devs.sort_values(ascending=False)

counts_df_devs.index = counts_df_devs.index.map(clean_tool_name)

# Print the number of responses
num_responses_devs = len(df_devs)
print('Number of Devs Responses:', num_responses_devs)

# Calculate the average number of tools mentioned in a reply, and the maximum and minimum number of tools mentioned in a reply
num_tools_per_reply_devs = tools_df_devs.sum(axis=1)
avg_tools_per_reply_devs = num_tools_per_reply_devs.mean()
max_tools_per_reply_devs = num_tools_per_reply_devs.max()
min_tools_per_reply_devs = num_tools_per_reply_devs.min()

# Print the results for Devs
print('Average Number of Tools Mentioned in a Devs Response:', avg_tools_per_reply_devs)
print('Maximum Number of Tools Mentioned in a Devs Response:', max_tools_per_reply_devs)
print('Minimum Number of Tools Mentioned in a Devs Response:', min_tools_per_reply_devs)


Number of Devs Responses: 27
Average Number of Tools Mentioned in a Devs Response: 4.555555555555555
Maximum Number of Tools Mentioned in a Devs Response: 9
Minimum Number of Tools Mentioned in a Devs Response: 1


In [5]:
# Load the auditors CSV file into a pandas dataframe
df_auditors = pd.read_csv('../responses/auditors.csv')

# Subset the dataframe to only include the columns for the tools
tools_df_auditors = df_auditors.iloc[:, 1:11]

# Convert each value to True if not NaN, else False
tools_df_auditors = tools_df_auditors.notna()

# Count the number of True values for each tool and store it in a new dataframe
counts_df_auditors = tools_df_auditors.sum(axis=0)

# Sort the values by count in descending order
counts_df_auditors = counts_df_auditors.sort_values(ascending=False)

counts_df_auditors.index = counts_df_auditors.index.map(clean_tool_name)


# Print the number of responses
num_responses_auditors = len(df_auditors)


# Calculate the average number of tools mentioned in a reply, and the maximum and minimum number of tools mentioned in a reply
num_tools_per_reply_auditors = tools_df_auditors.sum(axis=1)
avg_tools_per_reply_auditors = num_tools_per_reply_auditors.mean()
max_tools_per_reply_auditors = num_tools_per_reply_auditors.max()
min_tools_per_reply_auditors = num_tools_per_reply_auditors.min()

#Create a bar chart using matplotlib

fig, ax = plt.subplots()
#Calculate the percentages for devs and auditors

counts_df_devs_pct = (counts_df_devs / num_responses_devs) * 100
counts_df_auditors_pct = (counts_df_auditors / num_responses_auditors) * 100

bar_width = 0.35
tools = [clean_tool_name(t) for t in counts_df_devs_pct.index.tolist()]
#Set the x-axis positions

x_devs = np.arange(len(tools))
x_auditors = x_devs + bar_width

counts_df_auditors_pct = counts_df_auditors_pct.reindex(counts_df_devs_pct.index)
#Create the bar plots

devs_bars = ax.barh(x_devs, counts_df_devs_pct, height=bar_width, color='lightblue', label='Devs')
auditors_bars = ax.barh(x_auditors, counts_df_auditors_pct, height=bar_width, color='lightcoral', label='Auditors')
#Set the title and axis labels

# ax.set_ylabel('Tools')
ax.set_xlabel('Percentage of Responses')
#Set the y-tick labels and positions
ax.set_xticks(np.arange(0, 110, 10))
ax.set_xticklabels([f"{x:.0f}\%" for x in np.arange(0, 110, 10)])

ax.set_yticks(x_devs + bar_width / 2)
ax.set_yticklabels(tools)
#Add a legend

ax.legend()
#Show the plot
#plt.show()

plt.legend(loc='upper right', prop={'size':22}, frameon=True)
fig = plt.gcf()
fig.set_size_inches(11,6)
axis = plt.gca()
fig.tight_layout()
plt.savefig('../figures/tools_experience.pdf', bbox_inches='tight')

In [6]:
# Define the data
total_responses = 27
responses = {
    "OSS": 25,
    "Internally developed": 14,
    "Extended existing OSS": 9,
    "Third-party service": 7,
    "Not using security tools": 2,
}

# Calculate the percentages for each response
percentages = [responses[key] / total_responses * 100 for key in responses.keys()]

# Create a figure and axis
fig, ax = plt.subplots()

# Plot the bars horizontally
bar_width = 0.25
bars = ax.barh(list(responses.keys()), list(responses.values()), height=bar_width, color='lightblue')

# Add percentage labels next to each bar
for i, bar in enumerate(bars):
    ax.text(bar.get_width() + 1, bar.get_y() + bar.get_height() / 2, f"{percentages[i]:.1f}\%", va='center')

# Set the title and axis labels
ax.set_xlabel('Number of Responses')


# Show the plot
# plt.show()
fig = plt.gcf()
fig.set_size_inches(10,4)
axis = plt.gca()
fig.tight_layout()
plt.savefig('../figures/tools_type.pdf', bbox_inches='tight')

In [7]:
specs = {
    "Developers of the dapp": 11 + 3,
    "Auditors and developers": 6 + 4,
    "Auditors": 4 + 3,
    "I don't know": 1 + 3,
}

sizes = list(specs.values())
labels = list(specs.keys())

# Set up the plot style
sns.set_style("whitegrid")
sns.set_palette("pastel")

# Set up the plot
fig, ax = plt.subplots(figsize=(8, 6))

# Create the pie chart
wedges, _, autotexts = ax.pie(sizes, labels=None, startangle=90, autopct='%1.1f%%')

# Add legend
ax.legend(wedges, labels, loc="center left", bbox_to_anchor=(1, 0, 0.5, 1))

# Set aspect ratio to be equal so that pie is drawn as a circle
ax.axis('equal')

# Add labels to pie chart
for i, wedge in enumerate(wedges):
    wedge.set_label(labels[i])

# Set labels for autopct values
for autotext in autotexts:
    autotext.set_color('grey')
    autotext.set_fontsize(14)
    
# Show the plot
plt.show()

  plt.show()


In [8]:
# Clean up the tools names for better labeling on the chart
def clean_tool_name(name):
    name = str(name)
    name = re.sub(r'Other\.[1-9]', 'Other', name)
    name = name.replace('Model checking/', '')
    name = name.replace('/Bytecode hardening', '')
    name = name.replace('Other formal verification / model checking tool', 'Other FV')
    name = name.replace("Foundry's propert-based fuzzer", "Foundry's fuzzer")
    name = name.replace('Other static analyzer', 'Other SA')
    name = name.replace('Other symbolic execution tool', 'Other SE')
    name = name.replace('Other runtime monitoring', 'Runtime monitoring')
    name = re.sub(r'\(.*\)', '', name).strip()
    return name

# Load the devs CSV file into a pandas dataframe
df_devs = pd.read_csv('../responses/devs.csv')

# Subset the dataframe to only include the columns for the tools
tools_df_devs = df_devs.iloc[:, 51:71]

# Convert each value to True if not NaN, else False
tools_df_devs = tools_df_devs.notna()

# Count the number of True values for each tool and store it in a new dataframe
counts_df_devs = tools_df_devs.sum(axis=0)

# Sort the values by count in descending order
counts_df_devs = counts_df_devs.sort_values(ascending=False)

counts_df_devs.index = counts_df_devs.index.map(clean_tool_name)

# Print the number of responses
num_responses_devs = 22
print('Number of Devs Responses:', num_responses_devs)

# Calculate the average number of tools mentioned in a reply, and the maximum and minimum number of tools mentioned in a reply
num_tools_per_reply_devs = tools_df_devs.sum(axis=1)
avg_tools_per_reply_devs = num_tools_per_reply_devs.mean()
max_tools_per_reply_devs = num_tools_per_reply_devs.max()
min_tools_per_reply_devs = num_tools_per_reply_devs.min()

# Print the results for Devs
print('Average Number of Tools Mentioned in a Devs Response:', avg_tools_per_reply_devs)
print('Maximum Number of Tools Mentioned in a Devs Response:', max_tools_per_reply_devs)
print('Minimum Number of Tools Mentioned in a Devs Response:', min_tools_per_reply_devs)

# Create a bar chart using matplotlib
fig, ax = plt.subplots()

# Calculate the percentages for devs and auditors
counts_df_devs_pct = (counts_df_devs / num_responses_devs) * 100
counts_df_auditors_pct = (counts_df_auditors / num_responses_auditors) * 100

bar_width = 0.35
tools = [clean_tool_name(t) for t in counts_df_devs_pct.index.tolist()]

# Set the x-axis positions
x_devs = np.arange(len(tools))
x_auditors = x_devs + bar_width

counts_df_auditors_pct = counts_df_auditors_pct.reindex(counts_df_devs_pct.index)
# Create the bar plots
# Reverse the order of the bars
counts_df_devs_pct = counts_df_devs_pct.iloc[::-1]
counts_df_auditors_pct = counts_df_auditors_pct.iloc[::-1]

# Create the horizontal bar plots
devs_bars = ax.barh(x_devs, counts_df_devs_pct, height=bar_width, color='lightblue', label='Devs')
auditors_bars = ax.barh(x_auditors, counts_df_auditors_pct, height=bar_width, color='lightcoral', label='Auditors')

# Set the title and axis labels
ax.set_xlabel('Percentage of Responses')
ax.set_ylabel('Tools')

# Set the y-tick labels and positions
ax.set_yticks(x_devs + bar_width / 2)
ax.set_yticklabels(tools, ha='right')

# Add a legend
ax.legend()

# Show the plot
# plt.show()
plt.legend(loc='lower right', prop={'size':20}, frameon=True)
fig = plt.gcf()
fig.set_size_inches(10, 15)
axis = plt.gca()
fig.tight_layout()
plt.savefig('../figures/tools_horizontal.pdf', bbox_inches='tight')

Number of Devs Responses: 22
Average Number of Tools Mentioned in a Devs Response: 3.2962962962962963
Maximum Number of Tools Mentioned in a Devs Response: 9
Minimum Number of Tools Mentioned in a Devs Response: 0


In [24]:
# Clean up the tools names for better labeling on the chart
def clean_tool_name(name):
    name = str(name)
    name = re.sub(r'Other\.[1-9]', 'Other', name)
    name = name.replace('Other formal verification / model checking tool', 'Other FV')
    name = name.replace("Foundry's propert-based fuzzer", "Foundry's fuzzer")
    name = name.replace('Other static analyzer', 'Other SA')
    name = name.replace('Other symbolic execution tool', 'Other SE')
    name = name.replace('Other runtime monitoring', 'Runtime monitoring')
    name = re.sub(r'\(.*\)', '', name).strip()
    return name

# Replace function for auditors dataframe
def replace_tools(row):
    # It already has MythX
    if row['Other.4'] == "We (ConsenSys Diligence) use our fuzzer Harvey":
        row['Other.4'] = np.nan
    if row['Other.4'] == "contract library etc":
        row['Other.4'] = np.nan
        row["Other static analyzer (similar to Slither)"] = "Other static analyzer (similar to Slither)"
    if row['Other.4'] == "our static analyzer + symbolic execution tool":
        row['Other.4'] = np.nan
        row["Other static analyzer (similar to Slither)"] = "Other static analyzer (similar to Slither)"
        row["Other symbolic execution tool (Similar to Mythril)"] = "Other symbolic execution tool (Similar to Mythril)"
    if row['Other.4'] == "Analyses running on top of the gigahorse framework":
        row['Other.4'] = np.nan
        row["Other static analyzer (similar to Slither)"] = "Other static analyzer (similar to Slither)"
    return row

# Load the devs CSV file into a pandas dataframe
df_devs = pd.read_csv('../responses/devs.csv')

# Subset the dataframe to only include the columns for the tools
tools_df_devs = df_devs.iloc[:, 51:71]

# Convert each value to True if not NaN, else False
tools_df_devs = tools_df_devs.notna()

tools_df_devs = tools_df_devs.loc[:, ~(((tools_df_devs.columns == 'Maian') | (tools_df_devs.columns == 'Securify2')) & ~(tools_df_devs == True).any())]

# Count the number of True values for each tool and store it in a new dataframe
counts_df_devs = tools_df_devs.sum(axis=0)

# Sort the values by count in descending order
counts_df_devs = counts_df_devs.sort_values(ascending=False)

counts_df_devs.index = counts_df_devs.index.map(clean_tool_name)

# Print the number of responses
num_responses_devs = 22
print('Number of Devs Responses:', num_responses_devs)

# Calculate the average number of tools mentioned in a reply, and the maximum and minimum number of tools mentioned in a reply
num_tools_per_reply_devs = tools_df_devs.sum(axis=1)
avg_tools_per_reply_devs = num_tools_per_reply_devs.mean()
max_tools_per_reply_devs = num_tools_per_reply_devs.max()
min_tools_per_reply_devs = num_tools_per_reply_devs.min()

# Print the results for Devs
print('Average Number of Tools Mentioned in a Devs Response:', avg_tools_per_reply_devs)
print('Maximum Number of Tools Mentioned in a Devs Response:', max_tools_per_reply_devs)
print('Minimum Number of Tools Mentioned in a Devs Response:', min_tools_per_reply_devs)

# Load the auditors CSV file into a pandas dataframe
df_auditors = pd.read_csv('../responses/auditors.csv')

# Subset the dataframe to only include the columns for the tools
tools_df_auditors = df_auditors.iloc[:, 50:70]

tools_df_auditors = tools_df_auditors.apply(replace_tools, axis=1)
# Convert each value to True if not NaN, else False
tools_df_auditors = tools_df_auditors.notna()

tools_df_auditors = tools_df_auditors.loc[:, ~(((tools_df_auditors.columns == 'Maian') | (tools_df_auditors.columns == 'Securify2')) & ~(tools_df_auditors == True).any())]
# Count the number of True values for each tool and store it in a new dataframe
counts_df_auditors = tools_df_auditors.sum(axis=0)

# Sort the values by count in descending order
counts_df_auditors = counts_df_auditors.sort_values(ascending=False)

counts_df_auditors.index = counts_df_auditors.index.map(clean_tool_name)

# Print the number of responses
num_responses_auditors = 21
print('Number of Auditors Responses:', num_responses_auditors)

# Calculate the average number of tools mentioned in a reply, and the maximum and minimum number of tools mentioned in a reply
num_tools_per_reply_auditors = tools_df_auditors.sum(axis=1)
avg_tools_per_reply_auditors = num_tools_per_reply_auditors.mean()
max_tools_per_reply_auditors = num_tools_per_reply_auditors.max()
min_tools_per_reply_auditors = num_tools_per_reply_auditors.min()

# Print the results for Auditors
print('Average Number of Tools Mentioned in an Auditors Response:', avg_tools_per_reply_auditors)
print('Maximum Number of Tools Mentioned in an Auditors Response:', max_tools_per_reply_auditors)
print('Minimum Number of Tools Mentioned in an Auditors Response:', min_tools_per_reply_auditors)


# Create a bar chart using matplotlib
fig, ax = plt.subplots()

# Calculate the percentages for devs and auditors
counts_df_devs_pct = (counts_df_devs / num_responses_devs) * 100
counts_df_devs_pct = counts_df_devs_pct[:-1]
counts_df_auditors_pct = (counts_df_auditors / num_responses_auditors) * 100
counts_df_auditors_pct = counts_df_auditors_pct[:-1]

bar_width = 0.35
tools = [clean_tool_name(t) for t in counts_df_devs_pct.index.tolist()]

# Set the x-axis positions
x_devs = np.arange(len(tools))
x_auditors = x_devs + bar_width

counts_df_auditors_pct = counts_df_auditors_pct.reindex(counts_df_devs_pct.index)
# Create the bar plots
devs_bars = ax.barh(x_devs, counts_df_devs_pct, height=bar_width, color='lightblue', label='Devs')
auditors_bars = ax.barh(x_auditors, counts_df_auditors_pct, height=bar_width, color='lightcoral', label='Auditors')


# Set the title and axis labels
ax.set_xlabel('Percentage of Responses')
# ax.set_ylabel('Tools')
# Set the x-axis tick labels
ax.set_xticks(np.arange(0, 90, 10))
ax.set_xticklabels([f"{x:.0f}\%" for x in np.arange(0, 90, 10)])

# Set the y-tick labels and positions
ax.set_yticks(x_devs + bar_width / 2)
ax.set_yticklabels(tools, ha='right')

# Add a legend
ax.legend()

# Show the plot
plt.legend(loc='upper right', prop={'size':22}, frameon=True)
fig = plt.gcf()
fig.set_size_inches(9,8)
axis = plt.gca()
fig.tight_layout()
plt.savefig('../figures/tools.pdf', bbox_inches='tight')

tools_logic = ["Echidna", "Foundry's propert-based fuzzer",]
# Check if any of the tools_logic columns have a value of True for each row
logic_tool_responses = tools_df_auditors[tools_logic].apply(lambda x: x.any(), axis=1)
# Count the number of rows with at least one True value
num_responses_logic = logic_tool_responses.sum()
print(f"Auditors: Number of responses with at least one of the logic tools: {num_responses_logic} ({num_responses_logic/21})")

# Check if any of the tools_logic columns have a value of True for each row
logic_tool_responses = tools_df_devs[tools_logic].apply(lambda x: x.any(), axis=1)
# Count the number of rows with at least one True value
num_responses_logic = logic_tool_responses.sum()
print(f"Devs: Number of responses with at least one of the logic tools: {num_responses_logic} ({num_responses_logic/22})")

Number of Devs Responses: 22
Average Number of Tools Mentioned in a Devs Response: 3.2962962962962963
Maximum Number of Tools Mentioned in a Devs Response: 9
Minimum Number of Tools Mentioned in a Devs Response: 0
Number of Auditors Responses: 21
Average Number of Tools Mentioned in an Auditors Response: 3.409090909090909
Maximum Number of Tools Mentioned in an Auditors Response: 10
Minimum Number of Tools Mentioned in an Auditors Response: 0
Auditors: Number of responses with at least one of the logic tools: 9 (0.42857142857142855)
Devs: Number of responses with at least one of the logic tools: 11 (0.5)


In [32]:
def plot_likert_chart(data_frame, name):
    # Select relevant columns
    likert_columns = data_frame.iloc[:, 11:16]

    # Rename columns for better readability
    likert_columns.columns = [
        'Low false positives',
        'Low false negatives',
        'Ease of use',
        'Documentation',
        'Report quality'
    ]

    # Convert columns to category type
    categories = [
        'Strongly disagree',
        'Disagree',
        'Neither agree nor disagree',
        'Agree',
        'Strongly agree'
    ]

    for col in likert_columns.columns:
        likert_columns[col] = pd.Categorical(likert_columns[col], categories=categories, ordered=True)

    # Calculate proportions
    proportions = likert_columns.apply(lambda x: x.value_counts(normalize=True)).T.fillna(0)

    # Plot bar chart
    sns.set(style="whitegrid")
    ax = proportions.plot(kind='barh', stacked=True, colormap='coolwarm_r', edgecolor='black')

    ax.set_xticklabels([])  # Remove x-tick labels
    plt.xlim(0, 1)  # Adjust x-axis limits to remove empty space on the right

    # Add percentages inside bars
    for idx, container in enumerate(ax.containers):
        for bar, prop in zip(container, proportions.iloc[:, idx]):
            if prop > 0:
                ax.text(
                    bar.get_x() + bar.get_width() / 2,
                    bar.get_y() + bar.get_height() / 2,
                    '{:.1%}'.format(prop),
                    ha='center',
                    va='center',
                    fontsize=17,
                    color='black'
                )
    ax.tick_params(axis='y', labelsize=20)

    # Add legend to the right of the figure
    ax.legend(categories, loc='upper center', bbox_to_anchor=(0.5, -0.01), ncol=len(categories)-2, frameon=True, prop={'size':15})

    # Save the figure
    fig = plt.gcf()
    fig.set_size_inches(10, 6)
    fig.tight_layout()
    plt.savefig(f'../figures/likert_chart_{name}.pdf', bbox_inches='tight')
    plt.show()


In [35]:
# Example usage:
data_aud = pd.read_csv('../responses/auditors.csv')
plot_likert_chart(data_frame=data_aud, name="auditors")
# Example usage:
data_dev = pd.read_csv('../responses/devs.csv')
plot_likert_chart(data_frame=data_aud, name="developers")


  plt.show()
  plt.show()


In [11]:
# Clean up the tools names for better labeling on the chart
def clean_tool_name(name):
    name = str(name)
    name = name.replace('Integer Overflow and Underflow', 'Integer Overflow/Underflow')
    name = name.replace('Function/State Visibility Error', 'Function/State Visibility Error')
    name = name.replace('Timestamp Dependency', 'Timestamp Dependency')
    name = name.replace('Token standard incompatibility', 'Token standard incompatibility')
    name = name.replace('Reentrancy', 'Reentrancy')
    name = name.replace('Unhandled or mishandled exception', 'Unhandled/mishandled exception')
    name = name.replace('Absence of coding logic or sanity check', 'Absence of coding logic')
    name = name.replace('Improper asset locks or frozen asset', 'Improper asset locks/frozen asset')
    name = name.replace('Logic errors', 'Logic errors')
    name = name.replace('Oracle manipulation', 'Oracle manipulation')
    name = re.sub(r'Other\.[1-9]', 'Other', name)
    name = name.replace('Other formal verification / model checking tool', 'Other FV')
    name = name.replace("Foundry's propert-based fuzzer", "Foundry's fuzzer")
    name = name.replace('Other static analyzer', 'Other SA')
    name = name.replace('Other symbolic execution tool', 'Other SE')
    name = name.replace('Other runtime monitoring', 'Runtime monitoring')
    name = re.sub(r'\(.*\)', '', name).strip()
    return name

# Replace function for auditors dataframe
def replace_tools(row):
    return row

# Load the devs CSV file into a pandas dataframe
df_devs = pd.read_csv('../responses/devs.csv')

# Subset the dataframe to only include the columns for the tools
tools_df_devs = df_devs.iloc[:, 16:27]

# Convert each value to True if not NaN, else False
tools_df_devs = tools_df_devs.notna()

# Count the number of True values for each tool and store it in a new dataframe
counts_df_devs = tools_df_devs.sum(axis=0)

# Sort the values by count in descending order
counts_df_devs = counts_df_devs.sort_values(ascending=False)

counts_df_devs.index = counts_df_devs.index.map(clean_tool_name)

# Print the number of responses
num_responses_devs = 27
print('Number of Devs Responses:', num_responses_devs)

# Calculate the average number of tools mentioned in a reply, and the maximum and minimum number of tools mentioned in a reply
num_tools_per_reply_devs = tools_df_devs.sum(axis=1)
avg_tools_per_reply_devs = num_tools_per_reply_devs.mean()
max_tools_per_reply_devs = num_tools_per_reply_devs.max()
min_tools_per_reply_devs = num_tools_per_reply_devs.min()

# Print the results for Devs
print('Average Number of Tools Mentioned in a Devs Response:', avg_tools_per_reply_devs)
print('Maximum Number of Tools Mentioned in a Devs Response:', max_tools_per_reply_devs)
print('Minimum Number of Tools Mentioned in a Devs Response:', min_tools_per_reply_devs)

# Load the auditors CSV file into a pandas dataframe
df_auditors = pd.read_csv('../responses/auditors.csv')

# Subset the dataframe to only include the columns for the tools
tools_df_auditors = df_auditors.iloc[:, 16:27]

tools_df_auditors = tools_df_auditors.apply(replace_tools, axis=1)
# Convert each value to True if not NaN, else False
tools_df_auditors = tools_df_auditors.notna()

# Count the number of True values for each tool and store it in a new dataframe
counts_df_auditors = tools_df_auditors.sum(axis=0)

# Sort the values by count in descending order
counts_df_auditors = counts_df_auditors.sort_values(ascending=False)

counts_df_auditors.index = counts_df_auditors.index.map(clean_tool_name)

# Print the number of responses
num_responses_auditors = 21
print('Number of Auditors Responses:', num_responses_auditors)

# Calculate the average number of tools mentioned in a reply, and the maximum and minimum number of tools mentioned in a reply
num_tools_per_reply_auditors = tools_df_auditors.sum(axis=1)
avg_tools_per_reply_auditors = num_tools_per_reply_auditors.mean()
max_tools_per_reply_auditors = num_tools_per_reply_auditors.max()
min_tools_per_reply_auditors = num_tools_per_reply_auditors.min()

# Print the results for Auditors
print('Average Number of Tools Mentioned in an Auditors Response:', avg_tools_per_reply_auditors)
print('Maximum Number of Tools Mentioned in an Auditors Response:', max_tools_per_reply_auditors)
print('Minimum Number of Tools Mentioned in an Auditors Response:', min_tools_per_reply_auditors)


# Create a bar chart using matplotlib
fig, ax = plt.subplots()

# Calculate the percentages for devs and auditors
counts_df_devs_pct = (counts_df_devs / num_responses_devs) * 100
counts_df_auditors_pct = (counts_df_auditors / num_responses_auditors) * 100

bar_width = 0.35
tools = [clean_tool_name(t) for t in counts_df_devs_pct.index.tolist()]

# Set the x-axis positions
x_devs = np.arange(len(tools))
x_auditors = x_devs + bar_width

counts_df_auditors_pct = counts_df_auditors_pct.reindex(counts_df_devs_pct.index)
# Create the bar plots
devs_bars = ax.barh(x_devs, counts_df_devs_pct, height=bar_width, color='lightblue', label='Devs')
auditors_bars = ax.barh(x_auditors, counts_df_auditors_pct, height=bar_width, color='lightcoral', label='Auditors')


# Set the title and axis labels
ax.set_xlabel('Percentage of Responses')
# ax.set_ylabel('Tools')
ax.set_xticks(np.arange(0, 70, 10))
ax.set_xticklabels([f"{x:.0f}\%" for x in np.arange(0, 70, 10)])

# Set the y-tick labels and positions
ax.set_yticks(x_devs + bar_width / 2)
ax.set_yticklabels(tools, ha='right')

# Add a legend
ax.legend()

# Show the plot
# plt.show()
plt.legend(loc='upper right', prop={'size':22}, frameon=True)
fig = plt.gcf()
fig.set_size_inches(10,6)
axis = plt.gca()
fig.tight_layout()
plt.savefig('../figures/vulns.pdf', bbox_inches='tight')

Number of Devs Responses: 27
Average Number of Tools Mentioned in a Devs Response: 2.814814814814815
Maximum Number of Tools Mentioned in a Devs Response: 6
Minimum Number of Tools Mentioned in a Devs Response: 1
Number of Auditors Responses: 21
Average Number of Tools Mentioned in an Auditors Response: 2.6818181818181817
Maximum Number of Tools Mentioned in an Auditors Response: 7
Minimum Number of Tools Mentioned in an Auditors Response: 0


In [36]:
# Clean up the tools names for better labeling on the chart
def clean_tool_name(name):
    name = str(name)
    name = name.replace('Integer Overflow and Underflow.1', 'Integer Overflow/Underflow')
    name = name.replace('Function/State Visibility Error.1', 'Function/State Visibility Error')
    name = name.replace('Timestamp Dependency', 'Timestamp Dependency')
    name = name.replace('Token standard incompatibility.1', 'Token standard incompatibility')
    name = name.replace('Reentrancy.1', 'Reentrancy')
    name = name.replace('Unhandled or mishandled exception.1', 'Unhandled/mishandled exception')
    name = name.replace('Absence of coding logic or sanity check.1', 'Absence of coding logic')
    name = name.replace('Improper asset locks or frozen asset.1', 'Improper asset locks/frozen asset')
    name = name.replace('Logic errors.1', 'Logic errors')
    name = name.replace('Oracle manipulation.1', 'Oracle manipulation')
    name = re.sub(r'Other\.[1-9]', 'Other', name)
    name = name.replace('Other formal verification / model checking tool', 'Other FV')
    name = name.replace("Foundry's propert-based fuzzer", "Foundry's fuzzer")
    name = name.replace('Other static analyzer', 'Other SA')
    name = name.replace('Other symbolic execution tool', 'Other SE')
    name = name.replace('Other runtime monitoring', 'Runtime monitoring')
    name = re.sub(r'\(.*\)', '', name).strip()
    return name

# Replace function for auditors dataframe
def replace_tools(row):
    return row

# Load the devs CSV file into a pandas dataframe
df_devs = pd.read_csv('../responses/devs.csv')

# Subset the dataframe to only include the columns for the tools
tools_df_devs = df_devs.iloc[:, 27:38]

# Convert each value to True if not NaN, else False
tools_df_devs = tools_df_devs.notna()

# Count the number of True values for each tool and store it in a new dataframe
counts_df_devs = tools_df_devs.sum(axis=0)

# Sort the values by count in descending order
counts_df_devs = counts_df_devs.sort_values(ascending=False)

counts_df_devs.index = counts_df_devs.index.map(clean_tool_name)

# Print the number of responses
num_responses_devs = 24
print('Number of Devs Responses:', num_responses_devs)

# Calculate the average number of tools mentioned in a reply, and the maximum and minimum number of tools mentioned in a reply
num_tools_per_reply_devs = tools_df_devs.sum(axis=1)
avg_tools_per_reply_devs = num_tools_per_reply_devs.mean()
max_tools_per_reply_devs = num_tools_per_reply_devs.max()
min_tools_per_reply_devs = num_tools_per_reply_devs.min()

# Print the results for Devs
print('Average Number of Tools Mentioned in a Devs Response:', avg_tools_per_reply_devs)
print('Maximum Number of Tools Mentioned in a Devs Response:', max_tools_per_reply_devs)
print('Minimum Number of Tools Mentioned in a Devs Response:', min_tools_per_reply_devs)

df_auditors = pd.read_csv('../responses/auditors.csv')

# Subset the dataframe to only include the columns for the tools
tools_df_auditors = df_auditors.iloc[:, 27:38]

tools_df_auditors = tools_df_auditors.apply(replace_tools, axis=1)
# Convert each value to True if not NaN, else False
tools_df_auditors = tools_df_auditors.notna()

# Count the number of True values for each tool and store it in a new dataframe
counts_df_auditors = tools_df_auditors.sum(axis=0)

# Sort the values by count in descending order
counts_df_auditors = counts_df_auditors.sort_values(ascending=False)

counts_df_auditors.index = counts_df_auditors.index.map(clean_tool_name)

# Print the number of responses
num_responses_auditors = 22
print('Number of Auditors Responses:', num_responses_auditors)

# Calculate the average number of tools mentioned in a reply, and the maximum and minimum number of tools mentioned in a reply
num_tools_per_reply_auditors = tools_df_auditors.sum(axis=1)
avg_tools_per_reply_auditors = num_tools_per_reply_auditors.mean()
max_tools_per_reply_auditors = num_tools_per_reply_auditors.max()
min_tools_per_reply_auditors = num_tools_per_reply_auditors.min()

# Print the results for Auditors
print('Average Number of Tools Mentioned in an Auditors Response:', avg_tools_per_reply_auditors)
print('Maximum Number of Tools Mentioned in an Auditors Response:', max_tools_per_reply_auditors)
print('Minimum Number of Tools Mentioned in an Auditors Response:', min_tools_per_reply_auditors)


# Create a bar chart using matplotlib
fig, ax = plt.subplots()

# Calculate the percentages for devs and auditors
counts_df_devs_pct = (counts_df_devs / num_responses_devs) * 100
counts_df_auditors_pct = (counts_df_auditors / num_responses_auditors) * 100

bar_width = 0.35
tools = [clean_tool_name(t) for t in counts_df_devs_pct.index.tolist()]

# Set the x-axis positions
x_devs = np.arange(len(tools))
x_auditors = x_devs + bar_width

counts_df_auditors_pct = counts_df_auditors_pct.reindex(counts_df_devs_pct.index)
# Create the bar plots
devs_bars = ax.barh(x_devs, counts_df_devs_pct, height=bar_width, color='lightblue', label='Devs')
auditors_bars = ax.barh(x_auditors, counts_df_auditors_pct, height=bar_width, color='lightcoral', label='Auditors')


# Set the title and axis labels
ax.set_xlabel('Percentage of Responses')
# ax.set_ylabel('Tools')
ax.set_xticks(np.arange(0, 80, 10))
ax.set_xticklabels([f"{x:.0f}\%" for x in np.arange(0, 80, 10)])

# Set the y-tick labels and positions
ax.set_yticks(x_devs + bar_width / 2)
ax.set_yticklabels(tools, ha='right')

# Add a legend
ax.legend()

# Show the plot
# plt.show()
plt.legend(loc='upper right', prop={'size':22}, frameon=True)
fig = plt.gcf()
fig.set_size_inches(10,6)
axis = plt.gca()
fig.tight_layout()
plt.savefig('../figures/vulns_tools.pdf', bbox_inches='tight')

Number of Devs Responses: 24
Average Number of Tools Mentioned in a Devs Response: 1.8888888888888888
Maximum Number of Tools Mentioned in a Devs Response: 4
Minimum Number of Tools Mentioned in a Devs Response: 0
Number of Auditors Responses: 22
Average Number of Tools Mentioned in an Auditors Response: 2.272727272727273
Maximum Number of Tools Mentioned in an Auditors Response: 5
Minimum Number of Tools Mentioned in an Auditors Response: 1
