In [1]:
#Table 2
import pandas as pd
import glob
import os
# Path to the folder containing your CSV files
csv_folder = 'road/preprocessed'  # Update this
# Reading all CSV files in the folder
csv_files = glob.glob(os.path.join(csv_folder, "*.csv"))
# Filter: exclude files ending with 'm.csv'
csv_files = [f for f in csv_files if not os.path.basename(f).rstrip(".csv").endswith("m")]


In [2]:
# Define a helper function to count benign and malicious
def count_flags(csv_files):
    results = []
    for file_path in csv_files:
        df = pd.read_csv(file_path)
        flag_col = df.columns[-1]  # use last column
        if df[flag_col].dtype == object:
            flags = df[flag_col].astype(str).str.strip()
            benign = (flags == '0').sum()
            malicious = (flags == '1').sum()
        else:
            benign = (df[flag_col] == 0).sum()
            malicious = (df[flag_col] == 1).sum()

        total = benign + malicious
        results.append((os.path.basename(file_path), benign, malicious, total))
    return results

In [3]:
# Collect counts for each attack type
summary_data = {
    'Attack Type': [],
    'Benign': [],
    'Malicious': [],
    'Total': []
}

results = count_flags(csv_files)
for filename, benign, malicious, total in results:
    name = os.path.splitext(filename)[0]  # Attack type from filename
    summary_data['Attack Type'].append(name)
    summary_data['Benign'].append(benign)
    summary_data['Malicious'].append(malicious)
    summary_data['Total'].append(total)


In [4]:
# Create the summary DataFrame
summary_df = pd.DataFrame(summary_data)

# Add a total row
total_row = {
    'Attack Type': 'Total',
    'Benign': summary_df['Benign'].sum(),
    'Malicious': summary_df['Malicious'].sum(),
    'Total': summary_df['Total'].sum()
}
summary_df = pd.concat([summary_df, pd.DataFrame([total_row])], ignore_index=True)

In [7]:
# Ensure counts are integers before formatting
summary_df[['Benign', 'Malicious', 'Total']] = summary_df[['Benign', 'Malicious', 'Total']].apply(pd.to_numeric, errors='ignore')

# Apply comma formatting only to numeric values
for col in ['Benign', 'Malicious', 'Total']:
    summary_df[col] = summary_df[col].apply(lambda x: f"{x:,}" if isinstance(x, (int, float)) else x)

# Display
print(summary_df.to_string(index=False))
#summary_df.to_csv('attack_summary.csv', index=False)
#with open('attack_summary.txt', 'w') as f:
#    f.write(summary_df.to_string(index=False))


Attack Type    Benign Malicious     Total
       csa1    74,151     2,087    76,238
       csa2    63,259     2,141    65,400
       csa3    38,002     1,265    39,267
        fa1    45,656       592    46,248
        fa2    29,964       353    30,317
        fa3    12,287       116    12,403
      mecta    57,932        88    58,020
       msa1   194,553     5,555   200,108
       msa2   133,104     3,776   136,880
       msa3   192,559     7,890   200,449
    rloffa1    60,119     3,535    63,654
    rloffa2    60,119     3,535    63,654
    rloffa3    60,119     3,535    63,654
     rlona1   156,318     5,065   161,383
     rlona2   156,318     5,065   161,383
     rlona3   156,318     5,065   161,383
      Total 1,490,778    49,663 1,540,441


  summary_df[['Benign', 'Malicious', 'Total']] = summary_df[['Benign', 'Malicious', 'Total']].apply(pd.to_numeric, errors='ignore')
