In [7]:
import pandas as pd
import plotly.express as px

In [8]:
# Load the dataset
data = pd.read_csv('dataset.csv', skiprows=8)

In [9]:
# Ensure all entries in the tags column are strings and handle missing values
data['tags'] = data['tags'].fillna('').astype(str)

In [10]:
# Split the tags into a list and count the frequency of each unique combination
data['tag_list'] = data['tags'].apply(lambda x: tuple(sorted(x.split(','))))
tag_combinations = data['tag_list'].value_counts().head(10).reset_index()
tag_combinations.columns = ['tag_combination', 'frequency']

tag_combinations['tag_combination_str'] = tag_combinations['tag_combination'].apply(lambda x: ', '.join(x))

print(tag_combinations)

                                    tag_combination  frequency  \
0     (DEU, StrelaStealer, dll, geofenced, opendir)      87875   
1                         (32-bit, Mozi, elf, mips)      83121   
2                                       (Mozi, elf)      18229   
3                               (botnetdomain, elf)       6560   
4                                      (elf, mirai)       5127   
5                                           (Mozi,)       4785   
6                   (32-bit, Mozi, arm, elf, mirai)       3409   
7  (45.9.74.36, DEU, StrelaStealer, dll, geofenced)       2975   
8                                            (elf,)       2234   
9                                (botnetdomain, sh)       1938   

                              tag_combination_str  
0     DEU, StrelaStealer, dll, geofenced, opendir  
1                         32-bit, Mozi, elf, mips  
2                                       Mozi, elf  
3                               botnetdomain, elf  
4            

In [11]:
fig = px.bar(
    tag_combinations,
    x='tag_combination_str',
    y='frequency',
    labels={'tag_combination_str': 'Tag Combination', 'frequency': 'Frequency'},
    title='Frequency of Tag Combinations in Malicious URLs',
    text='frequency')

fig.update_traces(textposition='outside', texttemplate='%{text:.2s}')

fig.update_layout(xaxis_title='Tag Combination', yaxis_title='Frequency', xaxis_tickangle=-45, height=700)

fig.show()

In [12]:
fig.write_image('tag_combinations.svg')
fig.write_image('tag_combinations.png')
fig.write_html('tag_combinations.html')