In [None]:
import pandas as pd
import numpy as np
import time
import os
import matplotlib.pyplot as plt
from datetime import datetime

In [None]:
prefix = '[LABELS]'
# Cambia percorso per grafi taggati 
files = [f for f in os.listdir('data_split/new_dst_labels') if f.startswith(prefix) and f.endswith(".csv")]


In [None]:
s_dataset_last_timestamps = []
s_graph_timestamps = []
txs = {}
global_timestamps = {}
clusters = {}
for file in files:
    address = file[8:-4]
    s_dataset_last_timestamps.append(pd.to_datetime(exp[exp['address']==address].iloc[0]['lastTx'], unit='s').date())
    rans = pd.read_csv(f"data_split/{file[8:]}")
    record = rans[rans['seed']==address]
    s_graph_timestamps.append(pd.to_datetime(record.iloc[len(record)-1]['timestamp']).date())

    cluster = str(int(exp[exp['address']==address].iloc[0]['cluster']))
    if cluster not in clusters:
        clusters[cluster] = 1
    else:
        clusters[cluster] += 1
    
    for index, row in rans.iterrows():
        if rans.loc[index]['txid'] not in txs:
            txs[rans.loc[index]['txid']] = 1
            year = (int(rans.loc[index]['timestamp'][0:4]))
            if year not in global_timestamps:
                global_timestamps[year] = 1
            else:
                global_timestamps[year] += 1


In [None]:
years = list(global_timestamps.keys())
instances = list(global_timestamps.values())

plt.figure(figsize=(10, 5))
plt.barh(years, instances, color='purple', alpha=0.8)

# Labels and Title
plt.xlabel('No. of Transactions')
plt.ylabel('Year')
plt.title('Transactions\' Distribution Per Year')
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.savefig('total_years.pdf', format='pdf', bbox_inches='tight', dpi=1000)
# Show Plot
plt.show()

In [None]:
import matplotlib.dates as mdates
plt.figure(figsize=(10, 8))
num_graphs = 107
plt.plot(range(1, num_graphs + 1), s_dataset_last_timestamps, label='Dataset\'s Last Tx', marker='s', color = 'orange')
plt.plot(range(1, num_graphs + 1), s_graph_timestamps, label='Scraping\' Last Tx ', marker='o', linestyle='dashed', color = 'purple')

plt.xlabel('Graph Index')
plt.ylabel('Last Transaction')
plt.title('Transaction Activity')
plt.legend()
plt.grid(True)
plt.yticks(rotation=45)
#plt.gca().yaxis.set_major_locator(mdates.DayLocator(interval=32))  
#plt.gca().yaxis.set_major_formatter(mdates.DateFormatter("%Y-%m")) 

plt.savefig('seed_activity.png', format='png', dpi=1000)

plt.show()

In [None]:
from collections import defaultdict

families = defaultdict(dict)
for f in files:
    row = exp[exp['address']==f[8:-4]]
    family = row.iloc[0]['family']
    cluster = str(int(row.iloc[0]['cluster']))
    if family not in families:
        #families[family] = 1
        families[family][cluster] = 1
    else:
        #families[family] += 1
        if cluster not in families[family]:
            families[family][cluster] = 1
        else:
            families[family][cluster] += 1

In [None]:
clst = list(clusters.keys())
instances = list(clusters.values())

plt.figure(figsize=(9, 5))
plt.bar(clst, instances, color='blue', alpha=0.8)

# Labels and Title
plt.xlabel('Cluster ID')
plt.ylabel('Number of Instances')
plt.title('Address Cluster Distribution')
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.savefig('clusters.pdf', format='pdf', bbox_inches='tight', dpi=1000)
# Show Plot
plt.show()

In [None]:
ransomware = list(families.keys())
instances = list(families.values())

plt.figure(figsize=(9, 5))
plt.bar(ransomware, instances, color='red', alpha=0.7)

# Labels and Title
plt.xlabel('Ransomware Family')
plt.ylabel('Number of Instances')
plt.title('Ransomware Family Distribution')
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
#plt.savefig('families.png', format='png', bbox_inches='tight', dpi=1000)
# Show Plot
plt.show()