In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
benign_df = pd.read_csv('benign.csv')
fuzzing_df = pd.read_csv('fuzzing.csv')
flooding_df = pd.read_csv('flooding.csv')

benign_df.dropna(inplace=True)
fuzzing_df.dropna(inplace=True)
flooding_df.dropna(inplace=True)

benign_df['classification'] = 'benign'
fuzzing_df['classification'] = 'fuzzing'
fuzzing_df['label'] = 1
flooding_df['classification'] = 'flooding'

In [None]:
benign_df

In [None]:
fuzzing_df

In [None]:
flooding_df

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(30, 8), sharex=True)

# Benign boxplot
sns.boxplot(data=benign_df.drop(columns=['label', 'classification']), orient='h', ax=axes[0])
axes[0].set_title('Benign')
axes[0].set_xscale('log')

# Fuzzing boxplot
sns.boxplot(data=fuzzing_df.drop(columns=['label', 'classification']), orient='h', ax=axes[1])
axes[1].set_title('Fuzzing')
axes[1].set_xscale('log')

# Flooding boxplot
sns.boxplot(data=flooding_df.drop(columns=['label', 'classification']), orient='h', ax=axes[2])
axes[2].set_title('Flooding')
axes[2].set_xscale('log')

plt.suptitle('Comparison of Feature Distributions Across Datasets')
plt.show()

In [None]:
combined_df = pd.concat([benign_df, fuzzing_df, flooding_df])

# Melt to long-form
long_df = combined_df.melt(id_vars=['classification'], var_name='feature', value_name='value')

# Plot overlapping boxplots
plt.figure(figsize=(20, 8))
sns.boxplot(data=long_df, x='value', y='feature', hue='classification')

plt.xscale('log')
plt.title('Overlapping Boxplot of Feature Distributions')
plt.show()

In [3]:
# Combine fuzzing and flooding into a single attack dataset
attack_df = pd.concat([fuzzing_df, flooding_df])

# Set window size
window_size = 10000
halfway_point = 10

# Alternate windows of benign and attack
combined_df = []
i = 0

while len(benign_df) >= window_size:
    if i % 2 == 0:
        # Add a benign window
        combined_df.append(benign_df.iloc[:window_size])
        benign_df = benign_df.iloc[window_size:]
    else:
        if (len(flooding_df) < window_size) and (len(fuzzing_df) < window_size):
            break
        # Before halfway, use flooding; after halfway, switch to fuzzing
        if i // 2 < halfway_point:
            if len(flooding_df) >= window_size:
                combined_df.append(flooding_df.iloc[:window_size])
                flooding_df = flooding_df.iloc[window_size:]
        else:
            if len(fuzzing_df) >= window_size:
                combined_df.append(fuzzing_df.iloc[:window_size])
                fuzzing_df = fuzzing_df.iloc[window_size:]
    i += 1

# Concatenate all windows
combined_df = pd.concat(combined_df).reset_index(drop=True)

In [None]:
combined_df['classification'].value_counts()

In [None]:
# Define color mapping for classification
color_map = {
    'benign': 'blue',
    'fuzzing': 'red',
    'flooding': 'orange'
}

plt.figure(figsize=(12, 6))
plt.scatter(combined_df.index, combined_df['label'], 
            c=combined_df['classification'].map(color_map),
            alpha=0.6, s=10)

# Add legend manually
import matplotlib.patches as mpatches
legend_handles = [
    mpatches.Patch(color='blue', label='benign'),
    mpatches.Patch(color='red', label='fuzzing'),
    mpatches.Patch(color='orange', label='flooding')
]
plt.legend(handles=legend_handles, title="Classification")

plt.title('QUIC Traffic Classification Over Sample Index')
plt.xlabel('Sample Index')
plt.ylabel('Label (1 = Benign, 0 = Attack)')
plt.show()

In [4]:
combined_df.drop(columns=['classification'], inplace=True)
combined_df

Unnamed: 0,dst_port,dst_asn,quic_ver,dur,ratio,flow_pkt_rate,flow_byte_rate,total_pkts,total_bytes,max_bytes,...,ave_fwd_iat,std_fwd_iat,var_fwd_iat,rev_dur,max_rev_iat,min_rev_iat,ave_rev_iat,std_rev_iat,var_rev_iat,label
0,443,13335.0,1,1.616348,0,12.373573,3.154024e+03,20,5098.0,1250.0,...,0.244896,0.468212,2.192224e-01,0.358289,0.358289,3.452879e-02,0.096112,0.094490,0.008928,0
1,443,13335.0,1,0.127682,1,234.958082,1.819672e+05,30,23234.0,1250.0,...,0.072812,0.034301,1.176543e-03,0.127682,0.127682,4.894717e-02,0.096772,0.023456,0.000550,0
2,443,13335.0,1,0.725707,1,41.339020,2.833101e+04,30,20560.0,1250.0,...,0.256128,0.294598,8.678807e-02,0.725707,0.725707,5.600000e-08,0.474463,0.311842,0.097245,0
3,443,13335.0,1,0.226025,0,132.728759,8.810535e+04,30,19914.0,1250.0,...,0.199391,0.011408,1.301348e-04,0.226025,0.226025,5.700000e-08,0.052990,0.077739,0.006043,0
4,443,13335.0,1,0.172525,1,173.887621,1.572118e+05,30,27123.0,1200.0,...,0.077448,0.045418,2.062815e-03,0.172525,0.172525,2.566661e-02,0.153548,0.045018,0.002027,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
309995,443,9821.0,1,0.005424,1,5530.641042,6.464398e+06,30,35065.0,1250.0,...,0.003651,0.000618,3.819815e-07,0.005424,0.005424,1.696050e-04,0.001512,0.001930,0.000004,0
309996,443,9821.0,1,0.005768,1,5201.078912,5.445703e+06,30,31411.0,1250.0,...,0.002881,0.001329,1.766493e-06,0.005768,0.005768,1.001060e-04,0.002819,0.001902,0.000004,0
309997,443,9821.0,1,0.004888,1,6137.696772,6.925368e+06,30,33850.0,1250.0,...,0.002451,0.001596,2.547167e-06,0.004888,0.004888,6.500001e-08,0.002884,0.001486,0.000002,0
309998,443,9821.0,1,0.004692,1,6394.357960,7.124807e+06,30,33427.0,1250.0,...,0.002590,0.001209,1.462033e-06,0.004692,0.004692,6.600000e-08,0.001944,0.001614,0.000003,0


In [5]:
combined_df.dropna(inplace=True)
combined_df

Unnamed: 0,dst_port,dst_asn,quic_ver,dur,ratio,flow_pkt_rate,flow_byte_rate,total_pkts,total_bytes,max_bytes,...,ave_fwd_iat,std_fwd_iat,var_fwd_iat,rev_dur,max_rev_iat,min_rev_iat,ave_rev_iat,std_rev_iat,var_rev_iat,label
0,443,13335.0,1,1.616348,0,12.373573,3.154024e+03,20,5098.0,1250.0,...,0.244896,0.468212,2.192224e-01,0.358289,0.358289,3.452879e-02,0.096112,0.094490,0.008928,0
1,443,13335.0,1,0.127682,1,234.958082,1.819672e+05,30,23234.0,1250.0,...,0.072812,0.034301,1.176543e-03,0.127682,0.127682,4.894717e-02,0.096772,0.023456,0.000550,0
2,443,13335.0,1,0.725707,1,41.339020,2.833101e+04,30,20560.0,1250.0,...,0.256128,0.294598,8.678807e-02,0.725707,0.725707,5.600000e-08,0.474463,0.311842,0.097245,0
3,443,13335.0,1,0.226025,0,132.728759,8.810535e+04,30,19914.0,1250.0,...,0.199391,0.011408,1.301348e-04,0.226025,0.226025,5.700000e-08,0.052990,0.077739,0.006043,0
4,443,13335.0,1,0.172525,1,173.887621,1.572118e+05,30,27123.0,1200.0,...,0.077448,0.045418,2.062815e-03,0.172525,0.172525,2.566661e-02,0.153548,0.045018,0.002027,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
309995,443,9821.0,1,0.005424,1,5530.641042,6.464398e+06,30,35065.0,1250.0,...,0.003651,0.000618,3.819815e-07,0.005424,0.005424,1.696050e-04,0.001512,0.001930,0.000004,0
309996,443,9821.0,1,0.005768,1,5201.078912,5.445703e+06,30,31411.0,1250.0,...,0.002881,0.001329,1.766493e-06,0.005768,0.005768,1.001060e-04,0.002819,0.001902,0.000004,0
309997,443,9821.0,1,0.004888,1,6137.696772,6.925368e+06,30,33850.0,1250.0,...,0.002451,0.001596,2.547167e-06,0.004888,0.004888,6.500001e-08,0.002884,0.001486,0.000002,0
309998,443,9821.0,1,0.004692,1,6394.357960,7.124807e+06,30,33427.0,1250.0,...,0.002590,0.001209,1.462033e-06,0.004692,0.004692,6.600000e-08,0.001944,0.001614,0.000003,0


In [6]:
combined_df.to_csv('merged.csv', index=False)